diff options
Diffstat (limited to 'tools/testing/selftests/net')
454 files changed, 33977 insertions, 4450 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 28a715a8ef2b..97ad4d551d44 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -4,11 +4,10 @@ bind_timewait bind_wildcard busy_poller cmsg_sender -diag_uid epoll_busy_poll fin_ack_lat -gro hwtstamp_config +icmp_rfc4884 io_uring_zerocopy_tx ioam6_parser ip_defrag @@ -16,11 +15,12 @@ ip_local_port_range ipsec ipv6_flowlabel ipv6_flowlabel_mgr +ipv6_fragmentation log.txt -msg_oob msg_zerocopy netlink-dumps nettest +proc_net_pktgen psock_fanout psock_snd psock_tpacket @@ -33,23 +33,24 @@ reuseport_bpf_numa reuseport_dualstack rxtimestamp sctp_hello -scm_pidfd -scm_rights sk_bind_sendto_listen sk_connect_zero_addr sk_so_peek_off +skf_net_off socket so_incoming_cpu so_netns_cookie so_txtime +so_rcv_listener stress_reuseport_listen tap tcp_fastopen_backup_key tcp_inq tcp_mmap +tcp_port_share +tfo timestamping tls -toeplitz tools tun txring_overwrite @@ -57,4 +58,3 @@ txtimestamp udpgso udpgso_bench_rx udpgso_bench_tx -unix_connect diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 73ee88d6b043..a275ed584026 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -1,112 +1,214 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for net selftests -CFLAGS += -Wall -Wl,--no-as-needed -O2 -g +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES) # Additional include paths needed by kselftest.h CFLAGS += -I../ -TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \ - rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh -TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh -TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh -TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh -TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh -TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh -TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh fib_nexthop_nongw.sh -TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh -TEST_PROGS += route_localnet.sh -TEST_PROGS += reuseaddr_ports_exhausted.sh -TEST_PROGS += txtimestamp.sh -TEST_PROGS += vrf-xfrm-tests.sh -TEST_PROGS += rxtimestamp.sh -TEST_PROGS += drop_monitor_tests.sh -TEST_PROGS += vrf_route_leaking.sh -TEST_PROGS += bareudp.sh -TEST_PROGS += amt.sh -TEST_PROGS += unicast_extensions.sh -TEST_PROGS += udpgro_fwd.sh -TEST_PROGS += udpgro_frglist.sh -TEST_PROGS += veth.sh -TEST_PROGS += ioam6.sh -TEST_PROGS += gro.sh -TEST_PROGS += gre_gso.sh -TEST_PROGS += cmsg_so_mark.sh -TEST_PROGS += cmsg_so_priority.sh -TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh -TEST_PROGS += netns-name.sh -TEST_PROGS += nl_netdev.py -TEST_PROGS += srv6_end_dt46_l3vpn_test.sh -TEST_PROGS += srv6_end_dt4_l3vpn_test.sh -TEST_PROGS += srv6_end_dt6_l3vpn_test.sh -TEST_PROGS += srv6_hencap_red_l3vpn_test.sh -TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh -TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh -TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh -TEST_PROGS += srv6_end_flavors_test.sh -TEST_PROGS += srv6_end_dx4_netfilter_test.sh -TEST_PROGS += srv6_end_dx6_netfilter_test.sh -TEST_PROGS += vrf_strict_mode_test.sh -TEST_PROGS += arp_ndisc_evict_nocarrier.sh -TEST_PROGS += ndisc_unsolicited_na_test.sh -TEST_PROGS += arp_ndisc_untracked_subnets.sh -TEST_PROGS += stress_reuseport_listen.sh -TEST_PROGS += l2_tos_ttl_inherit.sh -TEST_PROGS += bind_bhash.sh -TEST_PROGS += ip_local_port_range.sh -TEST_PROGS += rps_default_mask.sh -TEST_PROGS += big_tcp.sh -TEST_PROGS += netns-sysctl.sh -TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh -TEST_GEN_FILES = socket nettest -TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any -TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite -TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag -TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr so_netns_cookie -TEST_GEN_FILES += tcp_fastopen_backup_key -TEST_GEN_FILES += fin_ack_lat -TEST_GEN_FILES += reuseaddr_ports_exhausted -TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp -TEST_GEN_FILES += ipsec -TEST_GEN_FILES += ioam6_parser -TEST_GEN_FILES += gro -TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa -TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_poll -TEST_GEN_FILES += toeplitz -TEST_GEN_FILES += cmsg_sender -TEST_GEN_FILES += stress_reuseport_listen -TEST_PROGS += test_vxlan_vnifiltering.sh -TEST_GEN_FILES += io_uring_zerocopy_tx -TEST_PROGS += io_uring_zerocopy_tx.sh -TEST_GEN_FILES += bind_bhash -TEST_GEN_PROGS += sk_bind_sendto_listen -TEST_GEN_PROGS += sk_connect_zero_addr -TEST_GEN_PROGS += sk_so_peek_off -TEST_PROGS += test_ingress_egress_chaining.sh -TEST_GEN_PROGS += so_incoming_cpu -TEST_PROGS += sctp_vrf.sh -TEST_GEN_FILES += sctp_hello -TEST_GEN_FILES += ip_local_port_range -TEST_GEN_PROGS += bind_wildcard -TEST_GEN_PROGS += bind_timewait -TEST_PROGS += test_vxlan_mdb.sh -TEST_PROGS += test_bridge_neigh_suppress.sh -TEST_PROGS += test_vxlan_nolocalbypass.sh -TEST_PROGS += test_bridge_backup_port.sh -TEST_PROGS += fdb_flush.sh fdb_notify.sh -TEST_PROGS += fq_band_pktlimit.sh -TEST_PROGS += vlan_hw_filter.sh -TEST_PROGS += vlan_bridge_binding.sh -TEST_PROGS += bpf_offload.py -TEST_PROGS += ipv6_route_update_soft_lockup.sh -TEST_PROGS += busy_poll_test.sh +TEST_PROGS := \ + altnames.sh \ + amt.sh \ + arp_ndisc_evict_nocarrier.sh \ + arp_ndisc_untracked_subnets.sh \ + bareudp.sh \ + big_tcp.sh \ + bind_bhash.sh \ + bpf_offload.py \ + bridge_stp_mode.sh \ + bridge_vlan_dump.sh \ + broadcast_ether_dst.sh \ + broadcast_pmtu.sh \ + busy_poll_test.sh \ + cmsg_ip.sh \ + cmsg_so_mark.sh \ + cmsg_so_priority.sh \ + cmsg_time.sh \ + double_udp_encap.sh \ + drop_monitor_tests.sh \ + fcnal-ipv4.sh \ + fcnal-ipv6.sh \ + fcnal-other.sh \ + fdb_flush.sh \ + fdb_notify.sh \ + fib-onlink-tests.sh \ + fib_nexthop_multiprefix.sh \ + fib_nexthop_nongw.sh \ + fib_nexthops.sh \ + fib_rule_tests.sh \ + fib_tests.sh \ + fin_ack_lat.sh \ + fq_band_pktlimit.sh \ + gre_gso.sh \ + gre_ipv6_lladdr.sh \ + icmp.sh \ + icmp_redirect.sh \ + io_uring_zerocopy_tx.sh \ + ioam6.sh \ + ip6_gre_headroom.sh \ + ip6_tunnel.sh \ + ip_defrag.sh \ + ip_local_port_range.sh \ + ipv6_flowlabel.sh \ + ipv6_force_forwarding.sh \ + ipv6_route_update_soft_lockup.sh \ + ipvtap_test.sh \ + l2_tos_ttl_inherit.sh \ + l2tp.sh \ + link_netns.py \ + lwt_dst_cache_ref_loop.sh \ + macvlan_mcast_shared_mac.sh \ + msg_zerocopy.sh \ + nat6to4.sh \ + ndisc_unsolicited_na_test.sh \ + netdev-l2addr.sh \ + netdevice.sh \ + netns-name.sh \ + netns-sysctl.sh \ + nk_qlease.py \ + nl_netdev.py \ + nl_nlctrl.py \ + pmtu.sh \ + psock_snd.sh \ + reuseaddr_ports_exhausted.sh \ + reuseport_addr_any.sh \ + route_hint.sh \ + route_localnet.sh \ + rps_default_mask.sh \ + rtnetlink.py \ + rtnetlink.sh \ + rtnetlink_notification.sh \ + run_afpackettests \ + run_netsocktests \ + rxtimestamp.sh \ + sctp_vrf.sh \ + skf_net_off.sh \ + so_txtime.sh \ + srv6_end_dt46_l3vpn_test.sh \ + srv6_end_dt4_l3vpn_test.sh \ + srv6_end_dt6_l3vpn_test.sh \ + srv6_end_dx4_netfilter_test.sh \ + srv6_end_dx6_netfilter_test.sh \ + srv6_end_flavors_test.sh \ + srv6_end_next_csid_l3vpn_test.sh \ + srv6_end_x_next_csid_l3vpn_test.sh \ + srv6_hencap_red_l3vpn_test.sh \ + srv6_hl2encap_red_l2vpn_test.sh \ + srv6_iptunnel_cache.sh \ + stress_reuseport_listen.sh \ + tcp_fastopen_backup_key.sh \ + test_bpf.sh \ + test_bridge_backup_port.sh \ + test_bridge_neigh_suppress.sh \ + test_ingress_egress_chaining.sh \ + test_neigh.sh \ + test_so_rcv.sh \ + test_vxlan_fdb_changelink.sh \ + test_vxlan_mdb.sh \ + test_vxlan_nh.sh \ + test_vxlan_nolocalbypass.sh \ + test_vxlan_under_vrf.sh \ + test_vxlan_vnifiltering.sh \ + tfo_passive.sh \ + traceroute.sh \ + txtimestamp.sh \ + udpgro.sh \ + udpgro_bench.sh \ + udpgro_frglist.sh \ + udpgro_fwd.sh \ + udpgso.sh \ + udpgso_bench.sh \ + unicast_extensions.sh \ + veth.sh \ + vlan_bridge_binding.sh \ + vlan_hw_filter.sh \ + vrf-xfrm-tests.sh \ + vrf_route_leaking.sh \ + vrf_strict_mode_test.sh \ + xfrm_policy.sh \ + xfrm_state.sh \ +# end of TEST_PROGS + +TEST_PROGS_EXTENDED := \ + xfrm_policy_add_speed.sh \ +# end of TEST_PROGS_EXTENDED + +TEST_GEN_FILES := \ + bind_bhash \ + cmsg_sender \ + fin_ack_lat \ + hwtstamp_config \ + io_uring_zerocopy_tx \ + ioam6_parser \ + ip_defrag \ + ip_local_port_range \ + ipsec \ + ipv6_flowlabel \ + ipv6_flowlabel_mgr \ + msg_zerocopy \ + nettest \ + psock_fanout \ + psock_snd \ + psock_tpacket \ + reuseaddr_ports_exhausted \ + reuseport_addr_any \ + rxtimestamp \ + sctp_hello \ + skf_net_off \ + so_netns_cookie \ + so_rcv_listener \ + so_txtime \ + socket \ + stress_reuseport_listen \ + tcp_fastopen_backup_key \ + tcp_inq \ + tcp_mmap \ + tfo \ + timestamping \ + txring_overwrite \ + txtimestamp \ + udpgso \ + udpgso_bench_rx \ + udpgso_bench_tx \ +# end of TEST_GEN_FILES + +TEST_GEN_PROGS := \ + bind_timewait \ + bind_wildcard \ + epoll_busy_poll \ + icmp_rfc4884 \ + ipv6_fragmentation \ + proc_net_pktgen \ + reuseaddr_conflict \ + reuseport_bpf \ + reuseport_bpf_cpu \ + reuseport_bpf_numa \ + reuseport_dualstack \ + sk_bind_sendto_listen \ + sk_connect_zero_addr \ + sk_so_peek_off \ + so_incoming_cpu \ + tap \ + tcp_port_share \ + tls \ +# end of TEST_GEN_PROGS + +TEST_FILES := \ + fcnal-test.sh \ + in_netns.sh \ + lib.sh \ + settings \ +# end of TEST_FILES # YNL files, must be before "include ..lib.mk" -YNL_GEN_FILES := busy_poller netlink-dumps -TEST_GEN_FILES += $(YNL_GEN_FILES) +YNL_GEN_FILES := busy_poller +YNL_GEN_PROGS := \ + netlink-dumps \ + tun \ +# end of YNL_GEN_PROGS -TEST_FILES := settings -TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh +TEST_GEN_FILES += $(YNL_GEN_FILES) +TEST_GEN_PROGS += $(YNL_GEN_PROGS) TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) @@ -115,7 +217,14 @@ TEST_INCLUDES := forwarding/lib.sh include ../lib.mk # YNL build -YNL_GENS := netdev +YNL_GENS := \ + netdev \ + rt-addr \ + rt-link \ + rt-neigh \ + rt-route \ +# end of YNL_GENS + include ynl.mk $(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap diff --git a/tools/testing/selftests/net/af_unix/.gitignore b/tools/testing/selftests/net/af_unix/.gitignore new file mode 100644 index 000000000000..240b26740c9e --- /dev/null +++ b/tools/testing/selftests/net/af_unix/.gitignore @@ -0,0 +1,8 @@ +diag_uid +msg_oob +scm_inq +scm_pidfd +scm_rights +so_peek_off +unix_connect +unix_connreset diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 50584479540b..4c0375e28bbe 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,19 @@ -CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect +top_srcdir := ../../../../.. +include $(top_srcdir)/scripts/Makefile.compiler + +cc-option = $(call __cc-option, $(CC),,$(1),$(2)) + +CFLAGS += $(KHDR_INCLUDES) -Wall $(call cc-option,-Wflex-array-member-not-at-end) + +TEST_GEN_PROGS := \ + diag_uid \ + msg_oob \ + scm_inq \ + scm_pidfd \ + scm_rights \ + so_peek_off \ + unix_connect \ + unix_connreset \ +# end of TEST_GEN_PROGS include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/config b/tools/testing/selftests/net/af_unix/config index 37368567768c..b5429c15a53c 100644 --- a/tools/testing/selftests/net/af_unix/config +++ b/tools/testing/selftests/net/af_unix/config @@ -1,3 +1,3 @@ -CONFIG_UNIX=y CONFIG_AF_UNIX_OOB=y +CONFIG_UNIX=y CONFIG_UNIX_DIAG=m diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c index 79a3dd75590e..da7d50cedee6 100644 --- a/tools/testing/selftests/net/af_unix/diag_uid.c +++ b/tools/testing/selftests/net/af_unix/diag_uid.c @@ -14,7 +14,7 @@ #include <sys/types.h> #include <sys/un.h> -#include "../../kselftest_harness.h" +#include "kselftest_harness.h" FIXTURE(diag_uid) { diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 3ed3882a93b8..1b499d56656c 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -11,7 +11,7 @@ #include <sys/signalfd.h> #include <sys/socket.h> -#include "../../kselftest_harness.h" +#include "kselftest_harness.h" #define BUF_SZ 32 @@ -210,7 +210,7 @@ static void __sendpair(struct __test_metadata *_metadata, static void __recvpair(struct __test_metadata *_metadata, FIXTURE_DATA(msg_oob) *self, const char *expected_buf, int expected_len, - int buf_len, int flags) + int buf_len, int flags, bool is_sender) { int i, ret[2], recv_errno[2], expected_errno = 0; char recv_buf[2][BUF_SZ] = {}; @@ -221,7 +221,9 @@ static void __recvpair(struct __test_metadata *_metadata, errno = 0; for (i = 0; i < 2; i++) { - ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags); + int index = is_sender ? i * 2 : i * 2 + 1; + + ret[i] = recv(self->fd[index], recv_buf[i], buf_len, flags); recv_errno[i] = errno; } @@ -308,6 +310,20 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, ASSERT_EQ(answ[0], answ[1]); } +static void __resetpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const FIXTURE_VARIANT(msg_oob) *variant, + bool reset) +{ + int i; + + for (i = 0; i < 2; i++) + close(self->fd[i * 2 + 1]); + + __recvpair(_metadata, self, "", reset ? -ECONNRESET : 0, 1, + variant->peek ? MSG_PEEK : 0, true); +} + #define sendpair(buf, len, flags) \ __sendpair(_metadata, self, buf, len, flags) @@ -316,9 +332,10 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, if (variant->peek) \ __recvpair(_metadata, self, \ expected_buf, expected_len, \ - buf_len, (flags) | MSG_PEEK); \ + buf_len, (flags) | MSG_PEEK, false); \ __recvpair(_metadata, self, \ - expected_buf, expected_len, buf_len, flags); \ + expected_buf, expected_len, \ + buf_len, flags, false); \ } while (0) #define epollpair(oob_remaining) \ @@ -330,6 +347,9 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, #define setinlinepair() \ __setinlinepair(_metadata, self) +#define resetpair(reset) \ + __resetpair(_metadata, self, variant, reset) + #define tcp_incompliant \ for (self->tcp_compliant = false; \ self->tcp_compliant == false; \ @@ -344,6 +364,21 @@ TEST_F(msg_oob, non_oob) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(true); +} + +TEST_F(msg_oob, non_oob_no_reset) +{ + sendpair("x", 1, 0); + epollpair(false); + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob) @@ -355,6 +390,19 @@ TEST_F(msg_oob, oob) recvpair("x", 1, 1, MSG_OOB); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } +} + +TEST_F(msg_oob, oob_reset) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + resetpair(true); } TEST_F(msg_oob, oob_drop) @@ -370,6 +418,8 @@ TEST_F(msg_oob, oob_drop) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_ahead) @@ -385,6 +435,10 @@ TEST_F(msg_oob, oob_ahead) recvpair("hell", 4, 4, 0); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } } TEST_F(msg_oob, oob_break) @@ -403,6 +457,8 @@ TEST_F(msg_oob, oob_break) recvpair("", -EAGAIN, 1, 0); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_ahead_break) @@ -426,6 +482,8 @@ TEST_F(msg_oob, oob_ahead_break) recvpair("world", 5, 5, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_break_drop) @@ -449,6 +507,8 @@ TEST_F(msg_oob, oob_break_drop) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, ex_oob_break) @@ -476,6 +536,8 @@ TEST_F(msg_oob, ex_oob_break) recvpair("ld", 2, 2, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, ex_oob_drop) @@ -498,6 +560,8 @@ TEST_F(msg_oob, ex_oob_drop) epollpair(false); siocatmarkpair(true); } + + resetpair(false); } TEST_F(msg_oob, ex_oob_drop_2) @@ -523,6 +587,8 @@ TEST_F(msg_oob, ex_oob_drop_2) epollpair(false); siocatmarkpair(true); } + + resetpair(false); } TEST_F(msg_oob, ex_oob_oob) @@ -546,6 +612,54 @@ TEST_F(msg_oob, ex_oob_oob) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); +} + +TEST_F(msg_oob, ex_oob_ex_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } +} + +TEST_F(msg_oob, ex_oob_ex_oob_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("z", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); } TEST_F(msg_oob, ex_oob_ahead_break) @@ -576,6 +690,10 @@ TEST_F(msg_oob, ex_oob_ahead_break) recvpair("d", 1, 1, MSG_OOB); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } } TEST_F(msg_oob, ex_oob_siocatmark) @@ -595,6 +713,8 @@ TEST_F(msg_oob, ex_oob_siocatmark) recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ epollpair(true); siocatmarkpair(false); + + resetpair(true); } TEST_F(msg_oob, inline_oob) @@ -612,6 +732,8 @@ TEST_F(msg_oob, inline_oob) recvpair("x", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_oob_break) @@ -633,6 +755,8 @@ TEST_F(msg_oob, inline_oob_break) recvpair("o", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_oob_ahead_break) @@ -661,6 +785,8 @@ TEST_F(msg_oob, inline_oob_ahead_break) epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_break) @@ -686,6 +812,8 @@ TEST_F(msg_oob, inline_ex_oob_break) recvpair("rld", 3, 3, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_no_drop) @@ -707,6 +835,8 @@ TEST_F(msg_oob, inline_ex_oob_no_drop) recvpair("y", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_drop) @@ -731,6 +861,8 @@ TEST_F(msg_oob, inline_ex_oob_drop) epollpair(false); siocatmarkpair(false); } + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_siocatmark) @@ -752,6 +884,8 @@ TEST_F(msg_oob, inline_ex_oob_siocatmark) recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ epollpair(true); siocatmarkpair(false); + + resetpair(true); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c new file mode 100644 index 000000000000..3a86be9bda17 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/scm_inq.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include <linux/sockios.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "kselftest_harness.h" + +#define NR_CHUNKS 100 +#define MSG_LEN 256 + +FIXTURE(scm_inq) +{ + int fd[2]; +}; + +FIXTURE_VARIANT(scm_inq) +{ + int type; +}; + +FIXTURE_VARIANT_ADD(scm_inq, stream) +{ + .type = SOCK_STREAM, +}; + +FIXTURE_VARIANT_ADD(scm_inq, dgram) +{ + .type = SOCK_DGRAM, +}; + +FIXTURE_VARIANT_ADD(scm_inq, seqpacket) +{ + .type = SOCK_SEQPACKET, +}; + +FIXTURE_SETUP(scm_inq) +{ + int err; + + err = socketpair(AF_UNIX, variant->type | SOCK_NONBLOCK, 0, self->fd); + ASSERT_EQ(0, err); +} + +FIXTURE_TEARDOWN(scm_inq) +{ + close(self->fd[0]); + close(self->fd[1]); +} + +static void send_chunks(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_inq) *self) +{ + char buf[MSG_LEN] = {}; + int i, ret; + + for (i = 0; i < NR_CHUNKS; i++) { + ret = send(self->fd[0], buf, sizeof(buf), 0); + ASSERT_EQ(sizeof(buf), ret); + } +} + +static void recv_chunks(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_inq) *self) +{ + char cmsg_buf[CMSG_SPACE(sizeof(int))]; + struct msghdr msg = {}; + struct iovec iov = {}; + struct cmsghdr *cmsg; + char buf[MSG_LEN]; + int i, ret; + int inq; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsg_buf; + msg.msg_controllen = sizeof(cmsg_buf); + + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + + for (i = 0; i < NR_CHUNKS; i++) { + memset(buf, 0, sizeof(buf)); + memset(cmsg_buf, 0, sizeof(cmsg_buf)); + + ret = recvmsg(self->fd[1], &msg, 0); + ASSERT_EQ(MSG_LEN, ret); + + cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(NULL, cmsg); + ASSERT_EQ(CMSG_LEN(sizeof(int)), cmsg->cmsg_len); + ASSERT_EQ(SOL_SOCKET, cmsg->cmsg_level); + ASSERT_EQ(SCM_INQ, cmsg->cmsg_type); + + ret = ioctl(self->fd[1], SIOCINQ, &inq); + ASSERT_EQ(0, ret); + ASSERT_EQ(*(int *)CMSG_DATA(cmsg), inq); + } +} + +TEST_F(scm_inq, basic) +{ + int err, inq; + + err = setsockopt(self->fd[1], SOL_SOCKET, SO_INQ, &(int){1}, sizeof(int)); + if (variant->type != SOCK_STREAM) { + ASSERT_EQ(-ENOPROTOOPT, -errno); + return; + } + + ASSERT_EQ(0, err); + + err = ioctl(self->fd[1], SIOCINQ, &inq); + ASSERT_EQ(0, err); + ASSERT_EQ(0, inq); + + send_chunks(_metadata, self); + recv_chunks(_metadata, self); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c index 7e534594167e..2c18b92a2603 100644 --- a/tools/testing/selftests/net/af_unix/scm_pidfd.c +++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c @@ -15,7 +15,8 @@ #include <sys/types.h> #include <sys/wait.h> -#include "../../kselftest_harness.h" +#include "../../pidfd/pidfd.h" +#include "kselftest_harness.h" #define clean_errno() (errno == 0 ? "None" : strerror(errno)) #define log_err(MSG, ...) \ @@ -26,6 +27,8 @@ #define SCM_PIDFD 0x04 #endif +#define CHILD_EXIT_CODE_OK 123 + static void child_die() { exit(1); @@ -126,16 +129,64 @@ out: return result; } +struct cmsg_data { + struct ucred *ucred; + int *pidfd; +}; + +static int parse_cmsg(struct msghdr *msg, struct cmsg_data *res) +{ + struct cmsghdr *cmsg; + + if (msg->msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { + log_err("recvmsg: truncated"); + return 1; + } + + for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_PIDFD) { + if (cmsg->cmsg_len < sizeof(*res->pidfd)) { + log_err("CMSG parse: SCM_PIDFD wrong len"); + return 1; + } + + res->pidfd = (void *)CMSG_DATA(cmsg); + } + + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_CREDENTIALS) { + if (cmsg->cmsg_len < sizeof(*res->ucred)) { + log_err("CMSG parse: SCM_CREDENTIALS wrong len"); + return 1; + } + + res->ucred = (void *)CMSG_DATA(cmsg); + } + } + + if (!res->pidfd) { + log_err("CMSG parse: SCM_PIDFD not found"); + return 1; + } + + if (!res->ucred) { + log_err("CMSG parse: SCM_CREDENTIALS not found"); + return 1; + } + + return 0; +} + static int cmsg_check(int fd) { struct msghdr msg = { 0 }; - struct cmsghdr *cmsg; + struct cmsg_data res; struct iovec iov; - struct ucred *ucred = NULL; int data = 0; char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))] = { 0 }; - int *pidfd = NULL; pid_t parent_pid; int err; @@ -158,53 +209,98 @@ static int cmsg_check(int fd) return 1; } - for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; - cmsg = CMSG_NXTHDR(&msg, cmsg)) { - if (cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SCM_PIDFD) { - if (cmsg->cmsg_len < sizeof(*pidfd)) { - log_err("CMSG parse: SCM_PIDFD wrong len"); - return 1; - } + /* send(pfd, "x", sizeof(char), 0) */ + if (data != 'x') { + log_err("recvmsg: data corruption"); + return 1; + } - pidfd = (void *)CMSG_DATA(cmsg); - } + if (parse_cmsg(&msg, &res)) { + log_err("CMSG parse: parse_cmsg() failed"); + return 1; + } - if (cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SCM_CREDENTIALS) { - if (cmsg->cmsg_len < sizeof(*ucred)) { - log_err("CMSG parse: SCM_CREDENTIALS wrong len"); - return 1; - } + /* pidfd from SCM_PIDFD should point to the parent process PID */ + parent_pid = + get_pid_from_fdinfo_file(*res.pidfd, "Pid:", sizeof("Pid:") - 1); + if (parent_pid != getppid()) { + log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid()); + close(*res.pidfd); + return 1; + } - ucred = (void *)CMSG_DATA(cmsg); - } + close(*res.pidfd); + return 0; +} + +static int cmsg_check_dead(int fd, int expected_pid) +{ + int err; + struct msghdr msg = { 0 }; + struct cmsg_data res; + struct iovec iov; + int data = 0; + char control[CMSG_SPACE(sizeof(struct ucred)) + + CMSG_SPACE(sizeof(int))] = { 0 }; + struct pidfd_info info = { + .mask = PIDFD_INFO_EXIT, + }; + + iov.iov_base = &data; + iov.iov_len = sizeof(data); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + err = recvmsg(fd, &msg, 0); + if (err < 0) { + log_err("recvmsg"); + return 1; } - /* send(pfd, "x", sizeof(char), 0) */ - if (data != 'x') { + if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { + log_err("recvmsg: truncated"); + return 1; + } + + /* send(cfd, "y", sizeof(char), 0) */ + if (data != 'y') { log_err("recvmsg: data corruption"); return 1; } - if (!pidfd) { - log_err("CMSG parse: SCM_PIDFD not found"); + if (parse_cmsg(&msg, &res)) { + log_err("CMSG parse: parse_cmsg() failed"); return 1; } - if (!ucred) { - log_err("CMSG parse: SCM_CREDENTIALS not found"); + /* + * pidfd from SCM_PIDFD should point to the client_pid. + * Let's read exit information and check if it's what + * we expect to see. + */ + if (ioctl(*res.pidfd, PIDFD_GET_INFO, &info)) { + log_err("%s: ioctl(PIDFD_GET_INFO) failed", __func__); + close(*res.pidfd); return 1; } - /* pidfd from SCM_PIDFD should point to the parent process PID */ - parent_pid = - get_pid_from_fdinfo_file(*pidfd, "Pid:", sizeof("Pid:") - 1); - if (parent_pid != getppid()) { - log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid()); + if (!(info.mask & PIDFD_INFO_EXIT)) { + log_err("%s: No exit information from ioctl(PIDFD_GET_INFO)", __func__); + close(*res.pidfd); + return 1; + } + + err = WIFEXITED(info.exit_code) ? WEXITSTATUS(info.exit_code) : 1; + if (err != CHILD_EXIT_CODE_OK) { + log_err("%s: wrong exit_code %d != %d", __func__, err, CHILD_EXIT_CODE_OK); + close(*res.pidfd); return 1; } + close(*res.pidfd); return 0; } @@ -291,6 +387,24 @@ static void fill_sockaddr(struct sock_addr *addr, bool abstract) memcpy(sun_path_buf, addr->sock_name, strlen(addr->sock_name)); } +static int sk_enable_cred_pass(int sk) +{ + int on = 0; + + on = 1; + if (setsockopt(sk, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) { + log_err("Failed to set SO_PASSCRED"); + return 1; + } + + if (setsockopt(sk, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) { + log_err("Failed to set SO_PASSPIDFD"); + return 1; + } + + return 0; +} + static void client(FIXTURE_DATA(scm_pidfd) *self, const FIXTURE_VARIANT(scm_pidfd) *variant) { @@ -299,7 +413,6 @@ static void client(FIXTURE_DATA(scm_pidfd) *self, struct ucred peer_cred; int peer_pidfd; pid_t peer_pid; - int on = 0; cfd = socket(AF_UNIX, variant->type, 0); if (cfd < 0) { @@ -322,14 +435,8 @@ static void client(FIXTURE_DATA(scm_pidfd) *self, child_die(); } - on = 1; - if (setsockopt(cfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) { - log_err("Failed to set SO_PASSCRED"); - child_die(); - } - - if (setsockopt(cfd, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) { - log_err("Failed to set SO_PASSPIDFD"); + if (sk_enable_cred_pass(cfd)) { + log_err("sk_enable_cred_pass() failed"); child_die(); } @@ -340,6 +447,12 @@ static void client(FIXTURE_DATA(scm_pidfd) *self, child_die(); } + /* send something to the parent so it can receive SCM_PIDFD too and validate it */ + if (send(cfd, "y", sizeof(char), 0) == -1) { + log_err("Failed to send(cfd, \"y\", sizeof(char), 0)"); + child_die(); + } + /* skip further for SOCK_DGRAM as it's not applicable */ if (variant->type == SOCK_DGRAM) return; @@ -398,7 +511,13 @@ TEST_F(scm_pidfd, test) close(self->server); close(self->startup_pipe[0]); client(self, variant); - exit(0); + + /* + * It's a bit unusual, but in case of success we return non-zero + * exit code (CHILD_EXIT_CODE_OK) and then we expect to read it + * from ioctl(PIDFD_GET_INFO) in cmsg_check_dead(). + */ + exit(CHILD_EXIT_CODE_OK); } close(self->startup_pipe[1]); @@ -421,9 +540,17 @@ TEST_F(scm_pidfd, test) ASSERT_NE(-1, err); } - close(pfd); waitpid(self->client_pid, &child_status, 0); - ASSERT_EQ(0, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1); + /* see comment before exit(CHILD_EXIT_CODE_OK) */ + ASSERT_EQ(CHILD_EXIT_CODE_OK, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1); + + err = sk_enable_cred_pass(pfd); + ASSERT_EQ(0, err); + + err = cmsg_check_dead(pfd, self->client_pid); + ASSERT_EQ(0, err); + + close(pfd); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c index d66336256580..d82a79c21c17 100644 --- a/tools/testing/selftests/net/af_unix/scm_rights.c +++ b/tools/testing/selftests/net/af_unix/scm_rights.c @@ -10,7 +10,7 @@ #include <sys/socket.h> #include <sys/un.h> -#include "../../kselftest_harness.h" +#include "kselftest_harness.h" FIXTURE(scm_rights) { @@ -23,6 +23,7 @@ FIXTURE_VARIANT(scm_rights) int type; int flags; bool test_listener; + bool disabled; }; FIXTURE_VARIANT_ADD(scm_rights, dgram) @@ -31,6 +32,16 @@ FIXTURE_VARIANT_ADD(scm_rights, dgram) .type = SOCK_DGRAM, .flags = 0, .test_listener = false, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, dgram_disabled) +{ + .name = "UNIX ", + .type = SOCK_DGRAM, + .flags = 0, + .test_listener = false, + .disabled = true, }; FIXTURE_VARIANT_ADD(scm_rights, stream) @@ -39,6 +50,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream) .type = SOCK_STREAM, .flags = 0, .test_listener = false, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_disabled) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = 0, + .test_listener = false, + .disabled = true, }; FIXTURE_VARIANT_ADD(scm_rights, stream_oob) @@ -47,6 +68,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_oob) .type = SOCK_STREAM, .flags = MSG_OOB, .test_listener = false, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_oob_disabled) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = MSG_OOB, + .test_listener = false, + .disabled = true, }; FIXTURE_VARIANT_ADD(scm_rights, stream_listener) @@ -55,6 +86,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_listener) .type = SOCK_STREAM, .flags = 0, .test_listener = true, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_listener_disabled) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = 0, + .test_listener = true, + .disabled = true, }; FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob) @@ -63,6 +104,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob) .type = SOCK_STREAM, .flags = MSG_OOB, .test_listener = true, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob_disabled) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = MSG_OOB, + .test_listener = true, + .disabled = true, }; static int count_sockets(struct __test_metadata *_metadata, @@ -105,6 +156,9 @@ FIXTURE_SETUP(scm_rights) ret = unshare(CLONE_NEWNET); ASSERT_EQ(0, ret); + if (variant->disabled) + return; + ret = count_sockets(_metadata, variant); ASSERT_EQ(0, ret); } @@ -113,6 +167,9 @@ FIXTURE_TEARDOWN(scm_rights) { int ret; + if (variant->disabled) + return; + sleep(1); ret = count_sockets(_metadata, variant); @@ -121,6 +178,7 @@ FIXTURE_TEARDOWN(scm_rights) static void create_listeners(struct __test_metadata *_metadata, FIXTURE_DATA(scm_rights) *self, + const FIXTURE_VARIANT(scm_rights) *variant, int n) { struct sockaddr_un addr = { @@ -140,6 +198,12 @@ static void create_listeners(struct __test_metadata *_metadata, ret = listen(self->fd[i], -1); ASSERT_EQ(0, ret); + if (variant->disabled) { + ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS, + &(int){0}, sizeof(int)); + ASSERT_EQ(0, ret); + } + addrlen = sizeof(addr); ret = getsockname(self->fd[i], (struct sockaddr *)&addr, &addrlen); ASSERT_EQ(0, ret); @@ -164,6 +228,12 @@ static void create_socketpairs(struct __test_metadata *_metadata, for (i = 0; i < n * 2; i += 2) { ret = socketpair(AF_UNIX, variant->type, 0, self->fd + i); ASSERT_EQ(0, ret); + + if (variant->disabled) { + ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS, + &(int){0}, sizeof(int)); + ASSERT_EQ(0, ret); + } } } @@ -175,7 +245,7 @@ static void __create_sockets(struct __test_metadata *_metadata, ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0])); if (variant->test_listener) - create_listeners(_metadata, self, n); + create_listeners(_metadata, self, variant, n); else create_socketpairs(_metadata, self, variant, n); } @@ -201,20 +271,11 @@ void __send_fd(struct __test_metadata *_metadata, { #define MSG "x" #define MSGLEN 1 - struct { - struct cmsghdr cmsghdr; - int fd[2]; - } cmsg = { - .cmsghdr = { - .cmsg_len = CMSG_LEN(sizeof(cmsg.fd)), - .cmsg_level = SOL_SOCKET, - .cmsg_type = SCM_RIGHTS, - }, - .fd = { - self->fd[inflight * 2], - self->fd[inflight * 2], - }, + int fds[2] = { + self->fd[inflight * 2], + self->fd[inflight * 2], }; + char cmsg_buf[CMSG_SPACE(sizeof(fds))]; struct iovec iov = { .iov_base = MSG, .iov_len = MSGLEN, @@ -224,13 +285,26 @@ void __send_fd(struct __test_metadata *_metadata, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, - .msg_control = &cmsg, - .msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)), + .msg_control = cmsg_buf, + .msg_controllen = sizeof(cmsg_buf), }; + struct cmsghdr *cmsg; int ret; + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(fds)); + memcpy(CMSG_DATA(cmsg), fds, sizeof(fds)); + ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags); - ASSERT_EQ(MSGLEN, ret); + + if (variant->disabled) { + ASSERT_EQ(-1, ret); + ASSERT_EQ(-EPERM, -errno); + } else { + ASSERT_EQ(MSGLEN, ret); + } } #define create_sockets(n) \ diff --git a/tools/testing/selftests/net/af_unix/so_peek_off.c b/tools/testing/selftests/net/af_unix/so_peek_off.c new file mode 100644 index 000000000000..f6466a717f49 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/so_peek_off.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include <stdlib.h> +#include <unistd.h> + +#include <sys/socket.h> + +#include "../../kselftest_harness.h" + +FIXTURE(so_peek_off) +{ + int fd[2]; /* 0: sender, 1: receiver */ +}; + +FIXTURE_VARIANT(so_peek_off) +{ + int type; +}; + +FIXTURE_VARIANT_ADD(so_peek_off, stream) +{ + .type = SOCK_STREAM, +}; + +FIXTURE_VARIANT_ADD(so_peek_off, dgram) +{ + .type = SOCK_DGRAM, +}; + +FIXTURE_VARIANT_ADD(so_peek_off, seqpacket) +{ + .type = SOCK_SEQPACKET, +}; + +FIXTURE_SETUP(so_peek_off) +{ + struct timeval timeout = { + .tv_sec = 5, + .tv_usec = 0, + }; + int ret; + + ret = socketpair(AF_UNIX, variant->type, 0, self->fd); + ASSERT_EQ(0, ret); + + ret = setsockopt(self->fd[1], SOL_SOCKET, SO_RCVTIMEO_NEW, + &timeout, sizeof(timeout)); + ASSERT_EQ(0, ret); + + ret = setsockopt(self->fd[1], SOL_SOCKET, SO_PEEK_OFF, + &(int){0}, sizeof(int)); + ASSERT_EQ(0, ret); +} + +FIXTURE_TEARDOWN(so_peek_off) +{ + close_range(self->fd[0], self->fd[1], 0); +} + +#define sendeq(fd, str, flags) \ + do { \ + int bytes, len = strlen(str); \ + \ + bytes = send(fd, str, len, flags); \ + ASSERT_EQ(len, bytes); \ + } while (0) + +#define recveq(fd, str, buflen, flags) \ + do { \ + char buf[(buflen) + 1] = {}; \ + int bytes; \ + \ + bytes = recv(fd, buf, buflen, flags); \ + ASSERT_NE(-1, bytes); \ + ASSERT_STREQ(str, buf); \ + } while (0) + +#define peekoffeq(fd, expected) \ + do { \ + socklen_t optlen = sizeof(int); \ + int off = -1; \ + int ret; \ + \ + ret = getsockopt(fd, SOL_SOCKET, SO_PEEK_OFF, \ + &off, &optlen); \ + ASSERT_EQ(0, ret); \ + ASSERT_EQ((socklen_t)sizeof(off), optlen); \ + ASSERT_EQ(expected, off); \ + } while (0) + +#define async \ + for (pid_t pid = (pid = fork(), \ + pid < 0 ? \ + __TH_LOG("Failed to start async {}"), \ + _metadata->exit_code = KSFT_FAIL, \ + __bail(1, _metadata), \ + 0xdead : \ + pid); \ + !pid; exit(0)) + +TEST_F(so_peek_off, single_chunk) +{ + sendeq(self->fd[0], "aaaabbbb", 0); + + recveq(self->fd[1], "aaaa", 4, MSG_PEEK); + peekoffeq(self->fd[1], 4); + recveq(self->fd[1], "bbbb", 100, MSG_PEEK); + peekoffeq(self->fd[1], 8); + + recveq(self->fd[1], "aaaabbbb", 8, 0); + peekoffeq(self->fd[1], 0); +} + +TEST_F(so_peek_off, two_chunks) +{ + sendeq(self->fd[0], "aaaa", 0); + sendeq(self->fd[0], "bbbb", 0); + + recveq(self->fd[1], "aaaa", 4, MSG_PEEK); + peekoffeq(self->fd[1], 4); + recveq(self->fd[1], "bbbb", 100, MSG_PEEK); + peekoffeq(self->fd[1], 8); + + recveq(self->fd[1], "aaaa", 4, 0); + recveq(self->fd[1], "bbbb", 4, 0); + peekoffeq(self->fd[1], 0); +} + +TEST_F(so_peek_off, two_chunks_blocking) +{ + async { + usleep(1000); + sendeq(self->fd[0], "aaaa", 0); + } + + recveq(self->fd[1], "aaaa", 4, MSG_PEEK); + peekoffeq(self->fd[1], 4); + + async { + usleep(1000); + sendeq(self->fd[0], "bbbb", 0); + } + + /* goto again; -> goto redo; in unix_stream_read_generic(). */ + recveq(self->fd[1], "bbbb", 100, MSG_PEEK); + peekoffeq(self->fd[1], 8); + + recveq(self->fd[1], "aaaa", 4, 0); + recveq(self->fd[1], "bbbb", 4, 0); + peekoffeq(self->fd[1], 0); +} + +TEST_F(so_peek_off, two_chunks_overlap) +{ + sendeq(self->fd[0], "aaaa", 0); + recveq(self->fd[1], "aa", 2, MSG_PEEK); + peekoffeq(self->fd[1], 2); + + sendeq(self->fd[0], "bbbb", 0); + + if (variant->type == SOCK_STREAM) { + /* SOCK_STREAM tries to fill the buffer. */ + recveq(self->fd[1], "aabb", 4, MSG_PEEK); + peekoffeq(self->fd[1], 6); + recveq(self->fd[1], "bb", 100, MSG_PEEK); + peekoffeq(self->fd[1], 8); + } else { + /* SOCK_DGRAM and SOCK_SEQPACKET returns at the skb boundary. */ + recveq(self->fd[1], "aa", 100, MSG_PEEK); + peekoffeq(self->fd[1], 4); + recveq(self->fd[1], "bbbb", 100, MSG_PEEK); + peekoffeq(self->fd[1], 8); + } + + recveq(self->fd[1], "aaaa", 4, 0); + recveq(self->fd[1], "bbbb", 4, 0); + peekoffeq(self->fd[1], 0); +} + +TEST_F(so_peek_off, two_chunks_overlap_blocking) +{ + async { + usleep(1000); + sendeq(self->fd[0], "aaaa", 0); + } + + recveq(self->fd[1], "aa", 2, MSG_PEEK); + peekoffeq(self->fd[1], 2); + + async { + usleep(1000); + sendeq(self->fd[0], "bbbb", 0); + } + + /* Even SOCK_STREAM does not wait if at least one byte is read. */ + recveq(self->fd[1], "aa", 100, MSG_PEEK); + peekoffeq(self->fd[1], 4); + + recveq(self->fd[1], "bbbb", 100, MSG_PEEK); + peekoffeq(self->fd[1], 8); + + recveq(self->fd[1], "aaaa", 4, 0); + recveq(self->fd[1], "bbbb", 4, 0); + peekoffeq(self->fd[1], 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/unix_connect.c b/tools/testing/selftests/net/af_unix/unix_connect.c index d799fd8f5c7c..870ca96fa8ea 100644 --- a/tools/testing/selftests/net/af_unix/unix_connect.c +++ b/tools/testing/selftests/net/af_unix/unix_connect.c @@ -10,7 +10,7 @@ #include <sys/socket.h> #include <sys/un.h> -#include "../../kselftest_harness.h" +#include "kselftest_harness.h" FIXTURE(unix_connect) { diff --git a/tools/testing/selftests/net/af_unix/unix_connreset.c b/tools/testing/selftests/net/af_unix/unix_connreset.c new file mode 100644 index 000000000000..08c1de8f5a98 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/unix_connreset.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Selftest for AF_UNIX socket close and ECONNRESET behaviour. + * + * This test verifies: + * 1. SOCK_STREAM returns EOF when the peer closes normally. + * 2. SOCK_STREAM returns ECONNRESET if peer closes with unread data. + * 3. SOCK_SEQPACKET returns EOF when the peer closes normally. + * 4. SOCK_SEQPACKET returns ECONNRESET if the peer closes with unread data. + * 5. SOCK_DGRAM does not return ECONNRESET when the peer closes. + * + * These tests document the intended Linux behaviour. + * + */ + +#define _GNU_SOURCE +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> +#include <sys/socket.h> +#include <sys/un.h> +#include "../../kselftest_harness.h" + +#define SOCK_PATH "/tmp/af_unix_connreset.sock" + +static void remove_socket_file(void) +{ + unlink(SOCK_PATH); +} + +FIXTURE(unix_sock) +{ + int server; + int client; + int child; +}; + +FIXTURE_VARIANT(unix_sock) +{ + int socket_type; + const char *name; +}; + +FIXTURE_VARIANT_ADD(unix_sock, stream) { + .socket_type = SOCK_STREAM, + .name = "SOCK_STREAM", +}; + +FIXTURE_VARIANT_ADD(unix_sock, dgram) { + .socket_type = SOCK_DGRAM, + .name = "SOCK_DGRAM", +}; + +FIXTURE_VARIANT_ADD(unix_sock, seqpacket) { + .socket_type = SOCK_SEQPACKET, + .name = "SOCK_SEQPACKET", +}; + +FIXTURE_SETUP(unix_sock) +{ + struct sockaddr_un addr = {}; + int err; + + addr.sun_family = AF_UNIX; + strcpy(addr.sun_path, SOCK_PATH); + remove_socket_file(); + + self->server = socket(AF_UNIX, variant->socket_type, 0); + ASSERT_LT(-1, self->server); + + err = bind(self->server, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(0, err); + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + err = listen(self->server, 1); + ASSERT_EQ(0, err); + } + + self->client = socket(AF_UNIX, variant->socket_type | SOCK_NONBLOCK, 0); + ASSERT_LT(-1, self->client); + + err = connect(self->client, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(0, err); +} + +FIXTURE_TEARDOWN(unix_sock) +{ + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) + close(self->child); + + close(self->client); + close(self->server); + remove_socket_file(); +} + +/* Test 1: peer closes normally */ +TEST_F(unix_sock, eof) +{ + char buf[16] = {}; + ssize_t n; + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + self->child = accept(self->server, NULL, NULL); + ASSERT_LT(-1, self->child); + + close(self->child); + } else { + close(self->server); + } + + n = recv(self->client, buf, sizeof(buf), 0); + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + ASSERT_EQ(0, n); + } else { + ASSERT_EQ(-1, n); + ASSERT_EQ(EAGAIN, errno); + } +} + +/* Test 2: peer closes with unread data */ +TEST_F(unix_sock, reset_unread_behavior) +{ + char buf[16] = {}; + ssize_t n; + + /* Send data that will remain unread */ + send(self->client, "hello", 5, 0); + + if (variant->socket_type == SOCK_DGRAM) { + /* No real connection, just close the server */ + close(self->server); + } else { + self->child = accept(self->server, NULL, NULL); + ASSERT_LT(-1, self->child); + + /* Peer closes before client reads */ + close(self->child); + } + + n = recv(self->client, buf, sizeof(buf), 0); + ASSERT_EQ(-1, n); + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + ASSERT_EQ(ECONNRESET, errno); + } else { + ASSERT_EQ(EAGAIN, errno); + } +} + +/* Test 3: closing unaccepted (embryo) server socket should reset client. */ +TEST_F(unix_sock, reset_closed_embryo) +{ + char buf[16] = {}; + ssize_t n; + + if (variant->socket_type == SOCK_DGRAM) { + snprintf(_metadata->results->reason, + sizeof(_metadata->results->reason), + "Test only applies to SOCK_STREAM and SOCK_SEQPACKET"); + exit(KSFT_XFAIL); + } + + /* Close server without accept()ing */ + close(self->server); + + n = recv(self->client, buf, sizeof(buf), 0); + + ASSERT_EQ(-1, n); + ASSERT_EQ(ECONNRESET, errno); +} + +TEST_HARNESS_MAIN + diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh index d458b45c775b..663744305e52 100755 --- a/tools/testing/selftests/net/amt.sh +++ b/tools/testing/selftests/net/amt.sh @@ -73,6 +73,8 @@ # +------------------------+ #============================================================================== +source lib.sh + readonly LISTENER=$(mktemp -u listener-XXXXXXXX) readonly GATEWAY=$(mktemp -u gateway-XXXXXXXX) readonly RELAY=$(mktemp -u relay-XXXXXXXX) @@ -194,15 +196,21 @@ test_remote_ip() send_mcast_torture4() { - ip netns exec "${SOURCE}" bash -c \ - 'cat /dev/urandom | head -c 1G | nc -w 1 -u 239.0.0.1 4001' + for i in `seq 10`; do + ip netns exec "${SOURCE}" bash -c \ + 'cat /dev/urandom | head -c 100M | nc -w 1 -u 239.0.0.1 4001' + echo -n "." + done } send_mcast_torture6() { - ip netns exec "${SOURCE}" bash -c \ - 'cat /dev/urandom | head -c 1G | nc -w 1 -u ff0e::5:6 6001' + for i in `seq 10`; do + ip netns exec "${SOURCE}" bash -c \ + 'cat /dev/urandom | head -c 100M | nc -w 1 -u ff0e::5:6 6001' + echo -n "." + done } check_features() @@ -240,14 +248,15 @@ test_ipv6_forward() send_mcast4() { - sleep 2 + sleep 5 + wait_local_port_listen ${LISTENER} 4000 udp ip netns exec "${SOURCE}" bash -c \ 'printf "%s %128s" 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' & } send_mcast6() { - sleep 2 + wait_local_port_listen ${LISTENER} 6000 udp ip netns exec "${SOURCE}" bash -c \ 'printf "%s %128s" 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' & } @@ -278,10 +287,12 @@ wait $pid || err=$? if [ $err -eq 1 ]; then ERR=1 fi +printf "TEST: %-50s" "IPv4 amt traffic forwarding torture" send_mcast_torture4 -printf "TEST: %-60s [ OK ]\n" "IPv4 amt traffic forwarding torture" +printf " [ OK ]\n" +printf "TEST: %-50s" "IPv6 amt traffic forwarding torture" send_mcast_torture6 -printf "TEST: %-60s [ OK ]\n" "IPv6 amt traffic forwarding torture" +printf " [ OK ]\n" sleep 5 if [ "${ERR}" -eq 1 ]; then echo "Some tests failed." >&2 diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh index 92eb880c52f2..00758f00efbf 100755 --- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh +++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh @@ -75,7 +75,7 @@ setup_v4() { ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1 if [ $? -ne 0 ]; then cleanup_v4 - echo "failed" + echo "failed; is the system using MACAddressPolicy=persistent ?" exit 1 fi diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh index f366cadbc5e8..d9e5b967f815 100755 --- a/tools/testing/selftests/net/bareudp.sh +++ b/tools/testing/selftests/net/bareudp.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Test various bareudp tunnel configurations. @@ -106,26 +106,16 @@ # | | # +-----------------------------------------------------------------------+ +. ./lib.sh + ERR=4 # Return 4 by default, which is the SKIP code for kselftest PING6="ping" PAUSE_ON_FAIL="no" -readonly NS0=$(mktemp -u ns0-XXXXXXXX) -readonly NS1=$(mktemp -u ns1-XXXXXXXX) -readonly NS2=$(mktemp -u ns2-XXXXXXXX) -readonly NS3=$(mktemp -u ns3-XXXXXXXX) - # Exit the script after having removed the network namespaces it created -# -# Parameters: -# -# * The list of network namespaces to delete before exiting. -# exit_cleanup() { - for ns in "$@"; do - ip netns delete "${ns}" 2>/dev/null || true - done + cleanup_all_ns if [ "${ERR}" -eq 4 ]; then echo "Error: Setting up the testing environment failed." >&2 @@ -140,17 +130,7 @@ exit_cleanup() # namespaces created by this script are deleted. create_namespaces() { - ip netns add "${NS0}" || exit_cleanup - ip netns add "${NS1}" || exit_cleanup "${NS0}" - ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}" - ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}" -} - -# The trap function handler -# -exit_cleanup_all() -{ - exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}" + setup_ns NS0 NS1 NS2 NS3 || exit_cleanup } # Configure a network interface using a host route @@ -188,10 +168,6 @@ iface_config() # setup_underlay() { - for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do - ip -netns "${ns}" link set dev lo up - done; - ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}" ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}" ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}" @@ -234,14 +210,6 @@ setup_overlay_ipv4() ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1 ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10 ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33 - - # The intermediate namespaces don't have routes for the reverse path, - # as it will be handled by tc. So we need to ensure that rp_filter is - # not going to block the traffic. - ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0 - ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 - ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0 - ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0 } setup_overlay_ipv6() @@ -521,13 +489,10 @@ done check_features -# Create namespaces before setting up the exit trap. -# Otherwise, exit_cleanup_all() could delete namespaces that were not created -# by this script. -create_namespaces - set -e -trap exit_cleanup_all EXIT +trap exit_cleanup EXIT + +create_namespaces setup_underlay setup_overlay_ipv4 diff --git a/tools/testing/selftests/net/bench/Makefile b/tools/testing/selftests/net/bench/Makefile new file mode 100644 index 000000000000..2546c45e42f7 --- /dev/null +++ b/tools/testing/selftests/net/bench/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_GEN_MODS_DIR := page_pool + +TEST_PROGS += test_bench_page_pool.sh + +include ../../lib.mk diff --git a/tools/testing/selftests/net/bench/page_pool/Makefile b/tools/testing/selftests/net/bench/page_pool/Makefile new file mode 100644 index 000000000000..0549a16ba275 --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/Makefile @@ -0,0 +1,17 @@ +BENCH_PAGE_POOL_SIMPLE_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +KDIR ?= /lib/modules/$(shell uname -r)/build + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +obj-m += bench_page_pool.o +bench_page_pool-y += bench_page_pool_simple.o time_bench.o + +all: + +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) modules + +clean: + +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) clean diff --git a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c new file mode 100644 index 000000000000..cb6468adbda4 --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Benchmark module for page_pool. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/interrupt.h> +#include <linux/limits.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <net/page_pool/helpers.h> + +#include "time_bench.h" + +static int verbose = 1; +#define MY_POOL_SIZE 1024 + +/* Makes tests selectable. Useful for perf-record to analyze a single test. + * Hint: Bash shells support writing binary number like: $((2#101010) + * + * # modprobe bench_page_pool_simple run_flags=$((2#100)) + */ +static unsigned long run_flags = 0xFFFFFFFF; +module_param(run_flags, ulong, 0); +MODULE_PARM_DESC(run_flags, "Limit which bench test that runs"); + +/* Count the bit number from the enum */ +enum benchmark_bit { + bit_run_bench_baseline, + bit_run_bench_no_softirq01, + bit_run_bench_no_softirq02, + bit_run_bench_no_softirq03, +}; + +#define bit(b) (1 << (b)) +#define enabled(b) ((run_flags & (bit(b)))) + +/* notice time_bench is limited to U32_MAX nr loops */ +static unsigned long loops = 10000000; +module_param(loops, ulong, 0); +MODULE_PARM_DESC(loops, "Specify loops bench will run"); + +/* Timing at the nanosec level, we need to know the overhead + * introduced by the for loop itself + */ +static int time_bench_for_loop(struct time_bench_record *rec, void *data) +{ + uint64_t loops_cnt = 0; + int i; + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + loops_cnt++; + barrier(); /* avoid compiler to optimize this loop */ + } + time_bench_stop(rec, loops_cnt); + return loops_cnt; +} + +static int time_bench_atomic_inc(struct time_bench_record *rec, void *data) +{ + uint64_t loops_cnt = 0; + atomic_t cnt; + int i; + + atomic_set(&cnt, 0); + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + atomic_inc(&cnt); + barrier(); /* avoid compiler to optimize this loop */ + } + loops_cnt = atomic_read(&cnt); + time_bench_stop(rec, loops_cnt); + return loops_cnt; +} + +/* The ptr_ping in page_pool uses a spinlock. We need to know the minimum + * overhead of taking+releasing a spinlock, to know the cycles that can be saved + * by e.g. amortizing this via bulking. + */ +static int time_bench_lock(struct time_bench_record *rec, void *data) +{ + uint64_t loops_cnt = 0; + spinlock_t lock; + int i; + + spin_lock_init(&lock); + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + spin_lock(&lock); + loops_cnt++; + barrier(); /* avoid compiler to optimize this loop */ + spin_unlock(&lock); + } + time_bench_stop(rec, loops_cnt); + return loops_cnt; +} + +/* Helper for filling some page's into ptr_ring */ +static void pp_fill_ptr_ring(struct page_pool *pp, int elems) +{ + /* GFP_ATOMIC needed when under run softirq */ + gfp_t gfp_mask = GFP_ATOMIC; + struct page **array; + int i; + + array = kcalloc(elems, sizeof(struct page *), gfp_mask); + + for (i = 0; i < elems; i++) + array[i] = page_pool_alloc_pages(pp, gfp_mask); + for (i = 0; i < elems; i++) + page_pool_put_page(pp, array[i], -1, false); + + kfree(array); +} + +enum test_type { type_fast_path, type_ptr_ring, type_page_allocator }; + +/* Depends on compile optimizing this function */ +static int time_bench_page_pool(struct time_bench_record *rec, void *data, + enum test_type type, const char *func) +{ + uint64_t loops_cnt = 0; + gfp_t gfp_mask = GFP_ATOMIC; /* GFP_ATOMIC is not really needed */ + int i, err; + + struct page_pool *pp; + struct page *page; + + struct page_pool_params pp_params = { + .order = 0, + .flags = 0, + .pool_size = MY_POOL_SIZE, + .nid = NUMA_NO_NODE, + .dev = NULL, /* Only use for DMA mapping */ + .dma_dir = DMA_BIDIRECTIONAL, + }; + + pp = page_pool_create(&pp_params); + if (IS_ERR(pp)) { + err = PTR_ERR(pp); + pr_warn("%s: Error(%d) creating page_pool\n", func, err); + goto out; + } + pp_fill_ptr_ring(pp, 64); + + if (in_serving_softirq()) + pr_warn("%s(): in_serving_softirq fast-path\n", func); + else + pr_warn("%s(): Cannot use page_pool fast-path\n", func); + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + /* Common fast-path alloc that depend on in_serving_softirq() */ + page = page_pool_alloc_pages(pp, gfp_mask); + if (!page) + break; + loops_cnt++; + barrier(); /* avoid compiler to optimize this loop */ + + /* The benchmarks purpose it to test different return paths. + * Compiler should inline optimize other function calls out + */ + if (type == type_fast_path) { + /* Fast-path recycling e.g. XDP_DROP use-case */ + page_pool_recycle_direct(pp, page); + + } else if (type == type_ptr_ring) { + /* Normal return path */ + page_pool_put_page(pp, page, -1, false); + + } else if (type == type_page_allocator) { + /* Test if not pages are recycled, but instead + * returned back into systems page allocator + */ + get_page(page); /* cause no-recycling */ + page_pool_put_page(pp, page, -1, false); + put_page(page); + } else { + BUILD_BUG(); + } + } + time_bench_stop(rec, loops_cnt); +out: + page_pool_destroy(pp); + return loops_cnt; +} + +static int time_bench_page_pool01_fast_path(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_fast_path, __func__); +} + +static int time_bench_page_pool02_ptr_ring(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_ptr_ring, __func__); +} + +static int time_bench_page_pool03_slow(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_page_allocator, __func__); +} + +static int run_benchmark_tests(void) +{ + uint32_t nr_loops = loops; + + /* Baseline tests */ + if (enabled(bit_run_bench_baseline)) { + time_bench_loop(nr_loops * 10, 0, "for_loop", NULL, + time_bench_for_loop); + time_bench_loop(nr_loops * 10, 0, "atomic_inc", NULL, + time_bench_atomic_inc); + time_bench_loop(nr_loops, 0, "lock", NULL, time_bench_lock); + } + + /* This test cannot activate correct code path, due to no-softirq ctx */ + if (enabled(bit_run_bench_no_softirq01)) + time_bench_loop(nr_loops, 0, "no-softirq-page_pool01", NULL, + time_bench_page_pool01_fast_path); + if (enabled(bit_run_bench_no_softirq02)) + time_bench_loop(nr_loops, 0, "no-softirq-page_pool02", NULL, + time_bench_page_pool02_ptr_ring); + if (enabled(bit_run_bench_no_softirq03)) + time_bench_loop(nr_loops, 0, "no-softirq-page_pool03", NULL, + time_bench_page_pool03_slow); + + return 0; +} + +static int __init bench_page_pool_simple_module_init(void) +{ + if (verbose) + pr_info("Loaded\n"); + + if (loops > U32_MAX) { + pr_err("Module param loops(%lu) exceeded U32_MAX(%u)\n", loops, + U32_MAX); + return -ECHRNG; + } + + run_benchmark_tests(); + + return 0; +} +module_init(bench_page_pool_simple_module_init); + +static void __exit bench_page_pool_simple_module_exit(void) +{ + if (verbose) + pr_info("Unloaded\n"); +} +module_exit(bench_page_pool_simple_module_exit); + +MODULE_DESCRIPTION("Benchmark of page_pool simple cases"); +MODULE_AUTHOR("Jesper Dangaard Brouer <netoptimizer@brouer.com>"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.c b/tools/testing/selftests/net/bench/page_pool/time_bench.c new file mode 100644 index 000000000000..073bb36ec5f2 --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/time_bench.c @@ -0,0 +1,394 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Benchmarking code execution time inside the kernel + * + * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/time.h> + +#include <linux/perf_event.h> /* perf_event_create_kernel_counter() */ + +/* For concurrency testing */ +#include <linux/completion.h> +#include <linux/sched.h> +#include <linux/workqueue.h> +#include <linux/kthread.h> + +#include "time_bench.h" + +static int verbose = 1; + +/** TSC (Time-Stamp Counter) based ** + * See: linux/time_bench.h + * tsc_start_clock() and tsc_stop_clock() + */ + +/** Wall-clock based ** + */ + +/** PMU (Performance Monitor Unit) based ** + */ +#define PERF_FORMAT \ + (PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | \ + PERF_FORMAT_TOTAL_TIME_RUNNING) + +struct raw_perf_event { + uint64_t config; /* event */ + uint64_t config1; /* umask */ + struct perf_event *save; + char *desc; +}; + +/* if HT is enable a maximum of 4 events (5 if one is instructions + * retired can be specified, if HT is disabled a maximum of 8 (9 if + * one is instructions retired) can be specified. + * + * From Table 19-1. Architectural Performance Events + * Architectures Software Developer’s Manual Volume 3: System Programming + * Guide + */ +struct raw_perf_event perf_events[] = { + { 0x3c, 0x00, NULL, "Unhalted CPU Cycles" }, + { 0xc0, 0x00, NULL, "Instruction Retired" } +}; + +#define NUM_EVTS (ARRAY_SIZE(perf_events)) + +/* WARNING: PMU config is currently broken! + */ +bool time_bench_PMU_config(bool enable) +{ + int i; + struct perf_event_attr perf_conf; + struct perf_event *perf_event; + int cpu; + + preempt_disable(); + cpu = smp_processor_id(); + pr_info("DEBUG: cpu:%d\n", cpu); + preempt_enable(); + + memset(&perf_conf, 0, sizeof(struct perf_event_attr)); + perf_conf.type = PERF_TYPE_RAW; + perf_conf.size = sizeof(struct perf_event_attr); + perf_conf.read_format = PERF_FORMAT; + perf_conf.pinned = 1; + perf_conf.exclude_user = 1; /* No userspace events */ + perf_conf.exclude_kernel = 0; /* Only kernel events */ + + for (i = 0; i < NUM_EVTS; i++) { + perf_conf.disabled = enable; + //perf_conf.disabled = (i == 0) ? 1 : 0; + perf_conf.config = perf_events[i].config; + perf_conf.config1 = perf_events[i].config1; + if (verbose) + pr_info("%s() enable PMU counter: %s\n", + __func__, perf_events[i].desc); + perf_event = perf_event_create_kernel_counter(&perf_conf, cpu, + NULL /* task */, + NULL /* overflow_handler*/, + NULL /* context */); + if (perf_event) { + perf_events[i].save = perf_event; + pr_info("%s():DEBUG perf_event success\n", __func__); + + perf_event_enable(perf_event); + } else { + pr_info("%s():DEBUG perf_event is NULL\n", __func__); + } + } + + return true; +} + +/** Generic functions ** + */ + +/* Calculate stats, store results in record */ +bool time_bench_calc_stats(struct time_bench_record *rec) +{ +#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ + uint64_t ns_per_call_tmp_rem = 0; + uint32_t ns_per_call_remainder = 0; + uint64_t pmc_ipc_tmp_rem = 0; + uint32_t pmc_ipc_remainder = 0; + uint32_t pmc_ipc_div = 0; + uint32_t invoked_cnt_precision = 0; + uint32_t invoked_cnt = 0; /* 32-bit due to div_u64_rem() */ + + if (rec->flags & TIME_BENCH_LOOP) { + if (rec->invoked_cnt < 1000) { + pr_err("ERR: need more(>1000) loops(%llu) for timing\n", + rec->invoked_cnt); + return false; + } + if (rec->invoked_cnt > ((1ULL << 32) - 1)) { + /* div_u64_rem() can only support div with 32bit*/ + pr_err("ERR: Invoke cnt(%llu) too big overflow 32bit\n", + rec->invoked_cnt); + return false; + } + invoked_cnt = (uint32_t)rec->invoked_cnt; + } + + /* TSC (Time-Stamp Counter) records */ + if (rec->flags & TIME_BENCH_TSC) { + rec->tsc_interval = rec->tsc_stop - rec->tsc_start; + if (rec->tsc_interval == 0) { + pr_err("ABORT: timing took ZERO TSC time\n"); + return false; + } + /* Calculate stats */ + if (rec->flags & TIME_BENCH_LOOP) + rec->tsc_cycles = rec->tsc_interval / invoked_cnt; + else + rec->tsc_cycles = rec->tsc_interval; + } + + /* Wall-clock time calc */ + if (rec->flags & TIME_BENCH_WALLCLOCK) { + rec->time_start = rec->ts_start.tv_nsec + + (NANOSEC_PER_SEC * rec->ts_start.tv_sec); + rec->time_stop = rec->ts_stop.tv_nsec + + (NANOSEC_PER_SEC * rec->ts_stop.tv_sec); + rec->time_interval = rec->time_stop - rec->time_start; + if (rec->time_interval == 0) { + pr_err("ABORT: timing took ZERO wallclock time\n"); + return false; + } + /* Calculate stats */ + /*** Division in kernel it tricky ***/ + /* Orig: time_sec = (time_interval / NANOSEC_PER_SEC); */ + /* remainder only correct because NANOSEC_PER_SEC is 10^9 */ + rec->time_sec = div_u64_rem(rec->time_interval, NANOSEC_PER_SEC, + &rec->time_sec_remainder); + //TODO: use existing struct timespec records instead of div? + + if (rec->flags & TIME_BENCH_LOOP) { + /*** Division in kernel it tricky ***/ + /* Orig: ns = ((double)time_interval / invoked_cnt); */ + /* First get quotient */ + rec->ns_per_call_quotient = + div_u64_rem(rec->time_interval, invoked_cnt, + &ns_per_call_remainder); + /* Now get decimals .xxx precision (incorrect roundup)*/ + ns_per_call_tmp_rem = ns_per_call_remainder; + invoked_cnt_precision = invoked_cnt / 1000; + if (invoked_cnt_precision > 0) { + rec->ns_per_call_decimal = + div_u64_rem(ns_per_call_tmp_rem, + invoked_cnt_precision, + &ns_per_call_remainder); + } + } + } + + /* Performance Monitor Unit (PMU) counters */ + if (rec->flags & TIME_BENCH_PMU) { + //FIXME: Overflow handling??? + rec->pmc_inst = rec->pmc_inst_stop - rec->pmc_inst_start; + rec->pmc_clk = rec->pmc_clk_stop - rec->pmc_clk_start; + + /* Calc Instruction Per Cycle (IPC) */ + /* First get quotient */ + rec->pmc_ipc_quotient = div_u64_rem(rec->pmc_inst, rec->pmc_clk, + &pmc_ipc_remainder); + /* Now get decimals .xxx precision (incorrect roundup)*/ + pmc_ipc_tmp_rem = pmc_ipc_remainder; + pmc_ipc_div = rec->pmc_clk / 1000; + if (pmc_ipc_div > 0) { + rec->pmc_ipc_decimal = div_u64_rem(pmc_ipc_tmp_rem, + pmc_ipc_div, + &pmc_ipc_remainder); + } + } + + return true; +} + +/* Generic function for invoking a loop function and calculating + * execution time stats. The function being called/timed is assumed + * to perform a tight loop, and update the timing record struct. + */ +bool time_bench_loop(uint32_t loops, int step, char *txt, void *data, + int (*func)(struct time_bench_record *record, void *data)) +{ + struct time_bench_record rec; + + /* Setup record */ + memset(&rec, 0, sizeof(rec)); /* zero func might not update all */ + rec.version_abi = 1; + rec.loops = loops; + rec.step = step; + rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | TIME_BENCH_WALLCLOCK); + + /*** Loop function being timed ***/ + if (!func(&rec, data)) { + pr_err("ABORT: function being timed failed\n"); + return false; + } + + if (rec.invoked_cnt < loops) + pr_warn("WARNING: Invoke count(%llu) smaller than loops(%d)\n", + rec.invoked_cnt, loops); + + /* Calculate stats */ + time_bench_calc_stats(&rec); + + pr_info("Type:%s Per elem: %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n", + txt, rec.tsc_cycles, rec.ns_per_call_quotient, + rec.ns_per_call_decimal, rec.step, rec.time_sec, + rec.time_sec_remainder, rec.time_interval, rec.invoked_cnt, + rec.tsc_interval); + if (rec.flags & TIME_BENCH_PMU) + pr_info("Type:%s PMU inst/clock%llu/%llu = %llu.%03llu IPC (inst per cycle)\n", + txt, rec.pmc_inst, rec.pmc_clk, rec.pmc_ipc_quotient, + rec.pmc_ipc_decimal); + return true; +} + +/* Function getting invoked by kthread */ +static int invoke_test_on_cpu_func(void *private) +{ + struct time_bench_cpu *cpu = private; + struct time_bench_sync *sync = cpu->sync; + cpumask_t newmask = CPU_MASK_NONE; + void *data = cpu->data; + + /* Restrict CPU */ + cpumask_set_cpu(cpu->rec.cpu, &newmask); + set_cpus_allowed_ptr(current, &newmask); + + /* Synchronize start of concurrency test */ + atomic_inc(&sync->nr_tests_running); + wait_for_completion(&sync->start_event); + + /* Start benchmark function */ + if (!cpu->bench_func(&cpu->rec, data)) { + pr_err("ERROR: function being timed failed on CPU:%d(%d)\n", + cpu->rec.cpu, smp_processor_id()); + } else { + if (verbose) + pr_info("SUCCESS: ran on CPU:%d(%d)\n", cpu->rec.cpu, + smp_processor_id()); + } + cpu->did_bench_run = true; + + /* End test */ + atomic_dec(&sync->nr_tests_running); + /* Wait for kthread_stop() telling us to stop */ + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } + __set_current_state(TASK_RUNNING); + return 0; +} + +void time_bench_print_stats_cpumask(const char *desc, + struct time_bench_cpu *cpu_tasks, + const struct cpumask *mask) +{ + uint64_t average = 0; + int cpu; + int step = 0; + struct sum { + uint64_t tsc_cycles; + int records; + } sum = { 0 }; + + /* Get stats */ + for_each_cpu(cpu, mask) { + struct time_bench_cpu *c = &cpu_tasks[cpu]; + struct time_bench_record *rec = &c->rec; + + /* Calculate stats */ + time_bench_calc_stats(rec); + + pr_info("Type:%s CPU(%d) %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n", + desc, cpu, rec->tsc_cycles, rec->ns_per_call_quotient, + rec->ns_per_call_decimal, rec->step, rec->time_sec, + rec->time_sec_remainder, rec->time_interval, + rec->invoked_cnt, rec->tsc_interval); + + /* Collect average */ + sum.records++; + sum.tsc_cycles += rec->tsc_cycles; + step = rec->step; + } + + if (sum.records) /* avoid div-by-zero */ + average = sum.tsc_cycles / sum.records; + pr_info("Sum Type:%s Average: %llu cycles(tsc) CPUs:%d step:%d\n", desc, + average, sum.records, step); +} + +void time_bench_run_concurrent(uint32_t loops, int step, void *data, + const struct cpumask *mask, /* Support masking outsome CPUs*/ + struct time_bench_sync *sync, + struct time_bench_cpu *cpu_tasks, + int (*func)(struct time_bench_record *record, void *data)) +{ + int cpu, running = 0; + + if (verbose) // DEBUG + pr_warn("%s() Started on CPU:%d\n", __func__, + smp_processor_id()); + + /* Reset sync conditions */ + atomic_set(&sync->nr_tests_running, 0); + init_completion(&sync->start_event); + + /* Spawn off jobs on all CPUs */ + for_each_cpu(cpu, mask) { + struct time_bench_cpu *c = &cpu_tasks[cpu]; + + running++; + c->sync = sync; /* Send sync variable along */ + c->data = data; /* Send opaque along */ + + /* Init benchmark record */ + memset(&c->rec, 0, sizeof(struct time_bench_record)); + c->rec.version_abi = 1; + c->rec.loops = loops; + c->rec.step = step; + c->rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | + TIME_BENCH_WALLCLOCK); + c->rec.cpu = cpu; + c->bench_func = func; + c->task = kthread_run(invoke_test_on_cpu_func, c, + "time_bench%d", cpu); + if (IS_ERR(c->task)) { + pr_err("%s(): Failed to start test func\n", __func__); + return; /* Argh, what about cleanup?! */ + } + } + + /* Wait until all processes are running */ + while (atomic_read(&sync->nr_tests_running) < running) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(10); + } + /* Kick off all CPU concurrently on completion event */ + complete_all(&sync->start_event); + + /* Wait for CPUs to finish */ + while (atomic_read(&sync->nr_tests_running)) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(10); + } + + /* Stop the kthreads */ + for_each_cpu(cpu, mask) { + struct time_bench_cpu *c = &cpu_tasks[cpu]; + + kthread_stop(c->task); + } + + if (verbose) // DEBUG - happens often, finish on another CPU + pr_warn("%s() Finished on CPU:%d\n", __func__, + smp_processor_id()); +} diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.h b/tools/testing/selftests/net/bench/page_pool/time_bench.h new file mode 100644 index 000000000000..e113fcf341dc --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/time_bench.h @@ -0,0 +1,238 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Benchmarking code execution time inside the kernel + * + * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer + * for licensing details see kernel-base/COPYING + */ +#ifndef _LINUX_TIME_BENCH_H +#define _LINUX_TIME_BENCH_H + +/* Main structure used for recording a benchmark run */ +struct time_bench_record { + uint32_t version_abi; + uint32_t loops; /* Requested loop invocations */ + uint32_t step; /* option for e.g. bulk invocations */ + + uint32_t flags; /* Measurements types enabled */ +#define TIME_BENCH_LOOP BIT(0) +#define TIME_BENCH_TSC BIT(1) +#define TIME_BENCH_WALLCLOCK BIT(2) +#define TIME_BENCH_PMU BIT(3) + + uint32_t cpu; /* Used when embedded in time_bench_cpu */ + + /* Records */ + uint64_t invoked_cnt; /* Returned actual invocations */ + uint64_t tsc_start; + uint64_t tsc_stop; + struct timespec64 ts_start; + struct timespec64 ts_stop; + /* PMU counters for instruction and cycles + * instructions counter including pipelined instructions + */ + uint64_t pmc_inst_start; + uint64_t pmc_inst_stop; + /* CPU unhalted clock counter */ + uint64_t pmc_clk_start; + uint64_t pmc_clk_stop; + + /* Result records */ + uint64_t tsc_interval; + uint64_t time_start, time_stop, time_interval; /* in nanosec */ + uint64_t pmc_inst, pmc_clk; + + /* Derived result records */ + uint64_t tsc_cycles; // +decimal? + uint64_t ns_per_call_quotient, ns_per_call_decimal; + uint64_t time_sec; + uint32_t time_sec_remainder; + uint64_t pmc_ipc_quotient, pmc_ipc_decimal; /* inst per cycle */ +}; + +/* For synchronizing parallel CPUs to run concurrently */ +struct time_bench_sync { + atomic_t nr_tests_running; + struct completion start_event; +}; + +/* Keep track of CPUs executing our bench function. + * + * Embed a time_bench_record for storing info per cpu + */ +struct time_bench_cpu { + struct time_bench_record rec; + struct time_bench_sync *sync; /* back ptr */ + struct task_struct *task; + /* "data" opaque could have been placed in time_bench_sync, + * but to avoid any false sharing, place it per CPU + */ + void *data; + /* Support masking outsome CPUs, mark if it ran */ + bool did_bench_run; + /* int cpu; // note CPU stored in time_bench_record */ + int (*bench_func)(struct time_bench_record *record, void *data); +}; + +/* + * Below TSC assembler code is not compatible with other archs, and + * can also fail on guests if cpu-flags are not correct. + * + * The way TSC reading is used, many iterations, does not require as + * high accuracy as described below (in Intel Doc #324264). + * + * Considering changing to use get_cycles() (#include <asm/timex.h>). + */ + +/** TSC (Time-Stamp Counter) based ** + * Recommend reading, to understand details of reading TSC accurately: + * Intel Doc #324264, "How to Benchmark Code Execution Times on Intel" + * + * Consider getting exclusive ownership of CPU by using: + * unsigned long flags; + * preempt_disable(); + * raw_local_irq_save(flags); + * _your_code_ + * raw_local_irq_restore(flags); + * preempt_enable(); + * + * Clobbered registers: "%rax", "%rbx", "%rcx", "%rdx" + * RDTSC only change "%rax" and "%rdx" but + * CPUID clears the high 32-bits of all (rax/rbx/rcx/rdx) + */ +static __always_inline uint64_t tsc_start_clock(void) +{ + /* See: Intel Doc #324264 */ + unsigned int hi, lo; + + asm volatile("CPUID\n\t" + "RDTSC\n\t" + "mov %%edx, %0\n\t" + "mov %%eax, %1\n\t" + : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx"); + //FIXME: on 32bit use clobbered %eax + %edx + return ((uint64_t)lo) | (((uint64_t)hi) << 32); +} + +static __always_inline uint64_t tsc_stop_clock(void) +{ + /* See: Intel Doc #324264 */ + unsigned int hi, lo; + + asm volatile("RDTSCP\n\t" + "mov %%edx, %0\n\t" + "mov %%eax, %1\n\t" + "CPUID\n\t" + : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx"); + return ((uint64_t)lo) | (((uint64_t)hi) << 32); +} + +/** Wall-clock based ** + * + * use: getnstimeofday() + * getnstimeofday(&rec->ts_start); + * getnstimeofday(&rec->ts_stop); + * + * API changed see: Documentation/core-api/timekeeping.rst + * https://www.kernel.org/doc/html/latest/core-api/timekeeping.html#c.getnstimeofday + * + * We should instead use: ktime_get_real_ts64() is a direct + * replacement, but consider using monotonic time (ktime_get_ts64()) + * and/or a ktime_t based interface (ktime_get()/ktime_get_real()). + */ + +/** PMU (Performance Monitor Unit) based ** + * + * Needed for calculating: Instructions Per Cycle (IPC) + * - The IPC number tell how efficient the CPU pipelining were + */ +//lookup: perf_event_create_kernel_counter() + +bool time_bench_PMU_config(bool enable); + +/* Raw reading via rdpmc() using fixed counters + * + * From: https://github.com/andikleen/simple-pmu + */ +enum { + FIXED_SELECT = (1U << 30), /* == 0x40000000 */ + FIXED_INST_RETIRED_ANY = 0, + FIXED_CPU_CLK_UNHALTED_CORE = 1, + FIXED_CPU_CLK_UNHALTED_REF = 2, +}; + +static __always_inline unsigned int long long p_rdpmc(unsigned int in) +{ + unsigned int d, a; + + asm volatile("rdpmc" : "=d"(d), "=a"(a) : "c"(in) : "memory"); + return ((unsigned long long)d << 32) | a; +} + +/* These PMU counter needs to be enabled, but I don't have the + * configure code implemented. My current hack is running: + * sudo perf stat -e cycles:k -e instructions:k insmod lib/ring_queue_test.ko + */ +/* Reading all pipelined instruction */ +static __always_inline unsigned long long pmc_inst(void) +{ + return p_rdpmc(FIXED_SELECT | FIXED_INST_RETIRED_ANY); +} + +/* Reading CPU clock cycles */ +static __always_inline unsigned long long pmc_clk(void) +{ + return p_rdpmc(FIXED_SELECT | FIXED_CPU_CLK_UNHALTED_CORE); +} + +/* Raw reading via MSR rdmsr() is likely wrong + * FIXME: How can I know which raw MSR registers are conf for what? + */ +#define MSR_IA32_PCM0 0x400000C1 /* PERFCTR0 */ +#define MSR_IA32_PCM1 0x400000C2 /* PERFCTR1 */ +#define MSR_IA32_PCM2 0x400000C3 +static inline uint64_t msr_inst(unsigned long long *msr_result) +{ + return rdmsrq_safe(MSR_IA32_PCM0, msr_result); +} + +/** Generic functions ** + */ +bool time_bench_loop(uint32_t loops, int step, char *txt, void *data, + int (*func)(struct time_bench_record *rec, void *data)); +bool time_bench_calc_stats(struct time_bench_record *rec); + +void time_bench_run_concurrent(uint32_t loops, int step, void *data, + const struct cpumask *mask, /* Support masking outsome CPUs*/ + struct time_bench_sync *sync, struct time_bench_cpu *cpu_tasks, + int (*func)(struct time_bench_record *record, void *data)); +void time_bench_print_stats_cpumask(const char *desc, + struct time_bench_cpu *cpu_tasks, + const struct cpumask *mask); + +//FIXME: use rec->flags to select measurement, should be MACRO +static __always_inline void time_bench_start(struct time_bench_record *rec) +{ + //getnstimeofday(&rec->ts_start); + ktime_get_real_ts64(&rec->ts_start); + if (rec->flags & TIME_BENCH_PMU) { + rec->pmc_inst_start = pmc_inst(); + rec->pmc_clk_start = pmc_clk(); + } + rec->tsc_start = tsc_start_clock(); +} + +static __always_inline void time_bench_stop(struct time_bench_record *rec, + uint64_t invoked_cnt) +{ + rec->tsc_stop = tsc_stop_clock(); + if (rec->flags & TIME_BENCH_PMU) { + rec->pmc_inst_stop = pmc_inst(); + rec->pmc_clk_stop = pmc_clk(); + } + //getnstimeofday(&rec->ts_stop); + ktime_get_real_ts64(&rec->ts_stop); + rec->invoked_cnt = invoked_cnt; +} + +#endif /* _LINUX_TIME_BENCH_H */ diff --git a/tools/testing/selftests/net/bench/test_bench_page_pool.sh b/tools/testing/selftests/net/bench/test_bench_page_pool.sh new file mode 100755 index 000000000000..7b8b18cfedce --- /dev/null +++ b/tools/testing/selftests/net/bench/test_bench_page_pool.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# + +set -e + +DRIVER="./page_pool/bench_page_pool.ko" +result="" + +function run_test() +{ + rmmod "bench_page_pool.ko" || true + insmod $DRIVER > /dev/null 2>&1 + result=$(dmesg | tail -10) + echo "$result" + + echo + echo "Fast path results:" + echo "${result}" | grep -o -E "no-softirq-page_pool01 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns" + + echo + echo "ptr_ring results:" + echo "${result}" | grep -o -E "no-softirq-page_pool02 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns" + + echo + echo "slow path results:" + echo "${result}" | grep -o -E "no-softirq-page_pool03 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns" +} + +run_test + +exit 0 diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c index 57ff67a3751e..da04b0b19b73 100644 --- a/tools/testing/selftests/net/bind_bhash.c +++ b/tools/testing/selftests/net/bind_bhash.c @@ -75,7 +75,7 @@ static void *setup(void *arg) int *array = (int *)arg; for (i = 0; i < MAX_CONNECTIONS; i++) { - sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr); + sock_fd = bind_socket(SO_REUSEPORT, setup_addr); if (sock_fd < 0) { ret = sock_fd; pthread_exit(&ret); @@ -103,7 +103,7 @@ int main(int argc, const char *argv[]) setup_addr = use_v6 ? setup_addr_v6 : setup_addr_v4; - listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr); + listener_fd = bind_socket(SO_REUSEPORT, setup_addr); if (listen(listener_fd, 100) < 0) { perror("listen failed"); return -1; diff --git a/tools/testing/selftests/net/bind_timewait.c b/tools/testing/selftests/net/bind_timewait.c index cb9fdf51ea59..40126f9b901e 100644 --- a/tools/testing/selftests/net/bind_timewait.c +++ b/tools/testing/selftests/net/bind_timewait.c @@ -4,7 +4,7 @@ #include <sys/socket.h> #include <netinet/in.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" FIXTURE(bind_timewait) { diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c index b7b54d646b93..7d11548b2c61 100644 --- a/tools/testing/selftests/net/bind_wildcard.c +++ b/tools/testing/selftests/net/bind_wildcard.c @@ -4,7 +4,7 @@ #include <sys/socket.h> #include <netinet/in.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" static const __u32 in4addr_any = INADDR_ANY; static const __u32 in4addr_loopback = INADDR_LOOPBACK; diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py index fd0d959914e4..c856d266c8f3 100755 --- a/tools/testing/selftests/net/bpf_offload.py +++ b/tools/testing/selftests/net/bpf_offload.py @@ -184,8 +184,8 @@ def bpftool_prog_list(expected=None, ns="", exclude_orphaned=True): progs = [ p for p in progs if not p['orphaned'] ] if expected is not None: if len(progs) != expected: - fail(True, "%d BPF programs loaded, expected %d" % - (len(progs), expected)) + fail(True, "%d BPF programs loaded, expected %d\nLoaded Progs:\n%s" % + (len(progs), expected, pp.pformat(progs))) return progs def bpftool_map_list(expected=None, ns=""): @@ -207,9 +207,11 @@ def bpftool_prog_list_wait(expected=0, n_retry=20): raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) def bpftool_map_list_wait(expected=0, n_retry=20, ns=""): + nmaps = None for i in range(n_retry): maps = bpftool_map_list(ns=ns) - if len(maps) == expected: + nmaps = len(maps) + if nmaps == expected: return maps time.sleep(0.05) raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) @@ -710,6 +712,7 @@ _, base_maps = bpftool("map") base_map_names = [ 'pid_iter.rodata', # created on each bpftool invocation 'libbpf_det_bind', # created on each bpftool invocation + 'libbpf_global', ] # Check netdevsim diff --git a/tools/testing/selftests/net/bridge_stp_mode.sh b/tools/testing/selftests/net/bridge_stp_mode.sh new file mode 100755 index 000000000000..0c81fd029d79 --- /dev/null +++ b/tools/testing/selftests/net/bridge_stp_mode.sh @@ -0,0 +1,288 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2034,SC2154,SC2317,SC2329 +# +# Test for bridge STP mode selection (IFLA_BR_STP_MODE). +# +# Verifies that: +# - stp_mode defaults to auto on new bridges +# - stp_mode can be toggled between user, kernel, and auto +# - stp_mode change is rejected while STP is active (-EBUSY) +# - stp_mode user in a netns yields userspace STP (stp_state=2) +# - stp_mode kernel forces kernel STP (stp_state=1) +# - stp_mode auto preserves traditional fallback to kernel STP +# - stp_mode and stp_state can be set atomically in one message +# - stp_mode persists across STP disable/enable cycles + +source lib.sh + +require_command jq + +ALL_TESTS=" + test_default_auto + test_set_modes + test_reject_change_while_stp_active + test_idempotent_mode_while_stp_active + test_user_mode_in_netns + test_kernel_mode + test_auto_mode + test_atomic_mode_and_state + test_mode_persistence +" + +bridge_info_get() +{ + ip -n "$NS1" -d -j link show "$1" | \ + jq -r ".[0].linkinfo.info_data.$2" +} + +check_stp_mode() +{ + local br=$1; shift + local expected=$1; shift + local msg=$1; shift + local val + + val=$(bridge_info_get "$br" stp_mode) + [ "$val" = "$expected" ] + check_err $? "$msg: expected $expected, got $val" +} + +check_stp_state() +{ + local br=$1; shift + local expected=$1; shift + local msg=$1; shift + local val + + val=$(bridge_info_get "$br" stp_state) + [ "$val" = "$expected" ] + check_err $? "$msg: expected $expected, got $val" +} + +# Create a bridge in NS1, bring it up, and defer its deletion. +bridge_create() +{ + ip -n "$NS1" link add "$1" type bridge + ip -n "$NS1" link set "$1" up + defer ip -n "$NS1" link del "$1" +} + +setup_prepare() +{ + setup_ns NS1 +} + +cleanup() +{ + defer_scopes_cleanup + cleanup_all_ns +} + +# Check that stp_mode defaults to auto when creating a bridge. +test_default_auto() +{ + RET=0 + + ip -n "$NS1" link add br-test type bridge + defer ip -n "$NS1" link del br-test + + check_stp_mode br-test auto "stp_mode default" + + log_test "stp_mode defaults to auto" +} + +# Test setting stp_mode to user, kernel, and back to auto. +test_set_modes() +{ + RET=0 + + ip -n "$NS1" link add br-test type bridge + defer ip -n "$NS1" link del br-test + + ip -n "$NS1" link set dev br-test type bridge stp_mode user + check_err $? "Failed to set stp_mode to user" + check_stp_mode br-test user "after set user" + + ip -n "$NS1" link set dev br-test type bridge stp_mode kernel + check_err $? "Failed to set stp_mode to kernel" + check_stp_mode br-test kernel "after set kernel" + + ip -n "$NS1" link set dev br-test type bridge stp_mode auto + check_err $? "Failed to set stp_mode to auto" + check_stp_mode br-test auto "after set auto" + + log_test "stp_mode set user/kernel/auto" +} + +# Verify that stp_mode cannot be changed while STP is active. +test_reject_change_while_stp_active() +{ + RET=0 + + bridge_create br-test + + ip -n "$NS1" link set dev br-test type bridge stp_mode kernel + check_err $? "Failed to set stp_mode to kernel" + + ip -n "$NS1" link set dev br-test type bridge stp_state 1 + check_err $? "Failed to enable STP" + + # Changing stp_mode while STP is active should fail. + ip -n "$NS1" link set dev br-test type bridge stp_mode auto 2>/dev/null + check_fail $? "Changing stp_mode should fail while STP is active" + + check_stp_mode br-test kernel "mode unchanged after rejected change" + + # Disable STP, then change should succeed. + ip -n "$NS1" link set dev br-test type bridge stp_state 0 + check_err $? "Failed to disable STP" + + ip -n "$NS1" link set dev br-test type bridge stp_mode auto + check_err $? "Changing stp_mode should succeed after STP is disabled" + + log_test "reject stp_mode change while STP is active" +} + +# Verify that re-setting the same stp_mode while STP is active succeeds. +test_idempotent_mode_while_stp_active() +{ + RET=0 + + bridge_create br-test + + ip -n "$NS1" link set dev br-test type bridge stp_mode user stp_state 1 + check_err $? "Failed to enable STP with user mode" + + # Re-setting the same mode while STP is active should succeed. + ip -n "$NS1" link set dev br-test type bridge stp_mode user + check_err $? "Idempotent stp_mode set should succeed while STP is active" + + check_stp_state br-test 2 "stp_state after idempotent set" + + # Changing mode while disabling STP in the same message should succeed. + ip -n "$NS1" link set dev br-test type bridge stp_mode auto stp_state 0 + check_err $? "Mode change with simultaneous STP disable should succeed" + + check_stp_mode br-test auto "mode changed after disable+change" + check_stp_state br-test 0 "stp_state after disable+change" + + log_test "idempotent and simultaneous mode change while STP active" +} + +# Test that stp_mode user in a non-init netns yields userspace STP +# (stp_state == 2). This is the key use case: userspace STP without +# needing /sbin/bridge-stp or being in init_net. +test_user_mode_in_netns() +{ + RET=0 + + bridge_create br-test + + ip -n "$NS1" link set dev br-test type bridge stp_mode user + check_err $? "Failed to set stp_mode to user" + + ip -n "$NS1" link set dev br-test type bridge stp_state 1 + check_err $? "Failed to enable STP" + + check_stp_state br-test 2 "stp_state with user mode" + + log_test "stp_mode user in netns yields userspace STP" +} + +# Test that stp_mode kernel forces kernel STP (stp_state == 1) +# regardless of whether /sbin/bridge-stp exists. +test_kernel_mode() +{ + RET=0 + + bridge_create br-test + + ip -n "$NS1" link set dev br-test type bridge stp_mode kernel + check_err $? "Failed to set stp_mode to kernel" + + ip -n "$NS1" link set dev br-test type bridge stp_state 1 + check_err $? "Failed to enable STP" + + check_stp_state br-test 1 "stp_state with kernel mode" + + log_test "stp_mode kernel forces kernel STP" +} + +# Test that stp_mode auto preserves traditional behavior: in a netns +# (non-init_net), bridge-stp is not called and STP falls back to +# kernel mode (stp_state == 1). +test_auto_mode() +{ + RET=0 + + bridge_create br-test + + # Auto mode is the default; enable STP in a netns. + ip -n "$NS1" link set dev br-test type bridge stp_state 1 + check_err $? "Failed to enable STP" + + # In a netns with auto mode, bridge-stp is skipped (init_net only), + # so STP should fall back to kernel mode (stp_state == 1). + check_stp_state br-test 1 "stp_state with auto mode in netns" + + log_test "stp_mode auto preserves traditional behavior" +} + +# Test that stp_mode and stp_state can be set in a single netlink +# message. This is the intended atomic usage pattern. +test_atomic_mode_and_state() +{ + RET=0 + + bridge_create br-test + + # Set both stp_mode and stp_state in one command. + ip -n "$NS1" link set dev br-test type bridge stp_mode user stp_state 1 + check_err $? "Failed to set stp_mode user and stp_state 1 atomically" + + check_stp_state br-test 2 "stp_state after atomic set" + + log_test "atomic stp_mode user + stp_state 1 in single message" +} + +# Test that stp_mode persists across STP disable/enable cycles. +test_mode_persistence() +{ + RET=0 + + bridge_create br-test + + # Set user mode and enable STP. + ip -n "$NS1" link set dev br-test type bridge stp_mode user + ip -n "$NS1" link set dev br-test type bridge stp_state 1 + check_err $? "Failed to enable STP with user mode" + + # Disable STP. + ip -n "$NS1" link set dev br-test type bridge stp_state 0 + check_err $? "Failed to disable STP" + + # Verify mode is still user. + check_stp_mode br-test user "stp_mode after STP disable" + + # Re-enable STP -- should use user mode again. + ip -n "$NS1" link set dev br-test type bridge stp_state 1 + check_err $? "Failed to re-enable STP" + + check_stp_state br-test 2 "stp_state after re-enable" + + log_test "stp_mode persists across STP disable/enable cycles" +} + +# Check iproute2 support before setting up resources. +if ! ip link add type bridge help 2>&1 | grep -q "stp_mode"; then + echo "SKIP: iproute2 too old, missing stp_mode support" + exit "$ksft_skip" +fi + +trap cleanup EXIT + +setup_prepare +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/bridge_vlan_dump.sh b/tools/testing/selftests/net/bridge_vlan_dump.sh new file mode 100755 index 000000000000..ad66731d2a6f --- /dev/null +++ b/tools/testing/selftests/net/bridge_vlan_dump.sh @@ -0,0 +1,204 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test bridge VLAN range grouping. VLANs are collapsed into a range entry in +# the dump if they have the same per-VLAN options. These tests verify that +# VLANs with different per-VLAN option values are not grouped together. + +# shellcheck disable=SC1091,SC2034,SC2154,SC2317 +source lib.sh + +ALL_TESTS=" + vlan_range_neigh_suppress + vlan_range_mcast_max_groups + vlan_range_mcast_n_groups + vlan_range_mcast_enabled +" + +setup_prepare() +{ + setup_ns NS + defer cleanup_all_ns + + ip -n "$NS" link add name br0 type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 1 mcast_vlan_snooping 1 + ip -n "$NS" link set dev br0 up + + ip -n "$NS" link add name dummy0 type dummy + ip -n "$NS" link set dev dummy0 master br0 + ip -n "$NS" link set dev dummy0 up +} + +vlan_range_neigh_suppress() +{ + RET=0 + + # Add two new consecutive VLANs for range grouping test + bridge -n "$NS" vlan add vid 10 dev dummy0 + defer bridge -n "$NS" vlan del vid 10 dev dummy0 + + bridge -n "$NS" vlan add vid 11 dev dummy0 + defer bridge -n "$NS" vlan del vid 11 dev dummy0 + + # Configure different neigh_suppress values and verify no range grouping + bridge -n "$NS" vlan set vid 10 dev dummy0 neigh_suppress on + check_err $? "Failed to set neigh_suppress for VLAN 10" + + bridge -n "$NS" vlan set vid 11 dev dummy0 neigh_suppress off + check_err $? "Failed to set neigh_suppress for VLAN 11" + + # Verify VLANs are not shown as a range, but individual entries exist + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_fail $? "VLANs with different neigh_suppress incorrectly grouped" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$" + check_err $? "VLAN 10 individual entry not found" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$" + check_err $? "VLAN 11 individual entry not found" + + # Configure same neigh_suppress value and verify range grouping + bridge -n "$NS" vlan set vid 11 dev dummy0 neigh_suppress on + check_err $? "Failed to set neigh_suppress for VLAN 11" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_err $? "VLANs with same neigh_suppress not grouped" + + log_test "VLAN range grouping with neigh_suppress" +} + +vlan_range_mcast_max_groups() +{ + RET=0 + + # Add two new consecutive VLANs for range grouping test + bridge -n "$NS" vlan add vid 10 dev dummy0 + defer bridge -n "$NS" vlan del vid 10 dev dummy0 + + bridge -n "$NS" vlan add vid 11 dev dummy0 + defer bridge -n "$NS" vlan del vid 11 dev dummy0 + + # Configure different mcast_max_groups values and verify no range grouping + bridge -n "$NS" vlan set vid 10 dev dummy0 mcast_max_groups 100 + check_err $? "Failed to set mcast_max_groups for VLAN 10" + + bridge -n "$NS" vlan set vid 11 dev dummy0 mcast_max_groups 200 + check_err $? "Failed to set mcast_max_groups for VLAN 11" + + # Verify VLANs are not shown as a range, but individual entries exist + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_fail $? "VLANs with different mcast_max_groups incorrectly grouped" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$" + check_err $? "VLAN 10 individual entry not found" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$" + check_err $? "VLAN 11 individual entry not found" + + # Configure same mcast_max_groups value and verify range grouping + bridge -n "$NS" vlan set vid 11 dev dummy0 mcast_max_groups 100 + check_err $? "Failed to set mcast_max_groups for VLAN 11" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_err $? "VLANs with same mcast_max_groups not grouped" + + log_test "VLAN range grouping with mcast_max_groups" +} + +vlan_range_mcast_n_groups() +{ + RET=0 + + # Add two new consecutive VLANs for range grouping test + bridge -n "$NS" vlan add vid 10 dev dummy0 + defer bridge -n "$NS" vlan del vid 10 dev dummy0 + + bridge -n "$NS" vlan add vid 11 dev dummy0 + defer bridge -n "$NS" vlan del vid 11 dev dummy0 + + # Add different numbers of multicast groups to each VLAN + bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.1 vid 10 + check_err $? "Failed to add mdb entry to VLAN 10" + defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.1 vid 10 + + bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.2 vid 10 + check_err $? "Failed to add second mdb entry to VLAN 10" + defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.2 vid 10 + + bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.1 vid 11 + check_err $? "Failed to add mdb entry to VLAN 11" + defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.1 vid 11 + + # Verify VLANs are not shown as a range due to different mcast_n_groups + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_fail $? "VLANs with different mcast_n_groups incorrectly grouped" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$" + check_err $? "VLAN 10 individual entry not found" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$" + check_err $? "VLAN 11 individual entry not found" + + # Add another group to VLAN 11 to match VLAN 10's count + bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.2 vid 11 + check_err $? "Failed to add second mdb entry to VLAN 11" + defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.2 vid 11 + + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_err $? "VLANs with same mcast_n_groups not grouped" + + log_test "VLAN range grouping with mcast_n_groups" +} + +vlan_range_mcast_enabled() +{ + RET=0 + + # Add two new consecutive VLANs for range grouping test + bridge -n "$NS" vlan add vid 10 dev br0 self + defer bridge -n "$NS" vlan del vid 10 dev br0 self + + bridge -n "$NS" vlan add vid 11 dev br0 self + defer bridge -n "$NS" vlan del vid 11 dev br0 self + + bridge -n "$NS" vlan add vid 10 dev dummy0 + defer bridge -n "$NS" vlan del vid 10 dev dummy0 + + bridge -n "$NS" vlan add vid 11 dev dummy0 + defer bridge -n "$NS" vlan del vid 11 dev dummy0 + + # Configure different mcast_snooping for bridge VLANs + # Port VLANs inherit BR_VLFLAG_MCAST_ENABLED from bridge VLANs + bridge -n "$NS" vlan global set dev br0 vid 10 mcast_snooping 1 + bridge -n "$NS" vlan global set dev br0 vid 11 mcast_snooping 0 + + # Verify port VLANs are not grouped due to different mcast_enabled + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_fail $? "VLANs with different mcast_enabled incorrectly grouped" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$" + check_err $? "VLAN 10 individual entry not found" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$" + check_err $? "VLAN 11 individual entry not found" + + # Configure same mcast_snooping and verify range grouping + bridge -n "$NS" vlan global set dev br0 vid 11 mcast_snooping 1 + + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_err $? "VLANs with same mcast_enabled not grouped" + + log_test "VLAN range grouping with mcast_enabled" +} + +# Verify the newest tested option is supported +if ! bridge vlan help 2>&1 | grep -q "neigh_suppress"; then + echo "SKIP: iproute2 too old, missing per-VLAN neighbor suppression support" + exit "$ksft_skip" +fi + +trap defer_scopes_cleanup EXIT +setup_prepare +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/broadcast_ether_dst.sh b/tools/testing/selftests/net/broadcast_ether_dst.sh new file mode 100755 index 000000000000..334a7eca8a80 --- /dev/null +++ b/tools/testing/selftests/net/broadcast_ether_dst.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Author: Brett A C Sheffield <bacs@librecast.net> +# Author: Oscar Maes <oscmaes92@gmail.com> +# +# Ensure destination ethernet field is correctly set for +# broadcast packets + +source lib.sh + +CLIENT_IP4="192.168.0.1" +GW_IP4="192.168.0.2" + +setup() { + setup_ns CLIENT_NS SERVER_NS + + ip -net "${SERVER_NS}" link add link1 type veth \ + peer name link0 netns "${CLIENT_NS}" + + ip -net "${CLIENT_NS}" link set link0 up + ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}"/24 dev link0 + + ip -net "${SERVER_NS}" link set link1 up + + ip -net "${CLIENT_NS}" route add default via "${GW_IP4}" + ip netns exec "${CLIENT_NS}" arp -s "${GW_IP4}" 00:11:22:33:44:55 +} + +cleanup() { + rm -f "${CAPFILE}" "${OUTPUT}" + ip -net "${SERVER_NS}" link del link1 + cleanup_ns "${CLIENT_NS}" "${SERVER_NS}" +} + +test_broadcast_ether_dst() { + local rc=0 + CAPFILE=$(mktemp -u cap.XXXXXXXXXX) + OUTPUT=$(mktemp -u out.XXXXXXXXXX) + + echo "Testing ethernet broadcast destination" + + # start tcpdump listening for icmp + # tcpdump will exit after receiving a single packet + # timeout will kill tcpdump if it is still running after 2s + timeout 2s ip netns exec "${CLIENT_NS}" \ + tcpdump -i link0 -c 1 -w "${CAPFILE}" icmp &> "${OUTPUT}" & + pid=$! + slowwait 1 grep -qs "listening" "${OUTPUT}" + + # send broadcast ping + ip netns exec "${CLIENT_NS}" \ + ping -W0.01 -c1 -b 255.255.255.255 &> /dev/null + + # wait for tcpdump for exit after receiving packet + wait "${pid}" + + # compare ethernet destination field to ff:ff:ff:ff:ff:ff + ether_dst=$(tcpdump -r "${CAPFILE}" -tnne 2>/dev/null | \ + awk '{sub(/,/,"",$3); print $3}') + if [[ "${ether_dst}" == "ff:ff:ff:ff:ff:ff" ]]; then + echo "[ OK ]" + rc="${ksft_pass}" + else + echo "[FAIL] expected dst ether addr to be ff:ff:ff:ff:ff:ff," \ + "got ${ether_dst}" + rc="${ksft_fail}" + fi + + return "${rc}" +} + +if [ ! -x "$(command -v tcpdump)" ]; then + echo "SKIP: Could not run test without tcpdump tool" + exit "${ksft_skip}" +fi + +trap cleanup EXIT + +setup +test_broadcast_ether_dst + +exit $? diff --git a/tools/testing/selftests/net/broadcast_pmtu.sh b/tools/testing/selftests/net/broadcast_pmtu.sh new file mode 100755 index 000000000000..726eb5d25839 --- /dev/null +++ b/tools/testing/selftests/net/broadcast_pmtu.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Ensures broadcast route MTU is respected + +CLIENT_NS=$(mktemp -u client-XXXXXXXX) +CLIENT_IP4="192.168.0.1/24" +CLIENT_BROADCAST_ADDRESS="192.168.0.255" + +SERVER_NS=$(mktemp -u server-XXXXXXXX) +SERVER_IP4="192.168.0.2/24" + +setup() { + ip netns add "${CLIENT_NS}" + ip netns add "${SERVER_NS}" + + ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}" + + ip -net "${CLIENT_NS}" link set link0 up + ip -net "${CLIENT_NS}" link set link0 mtu 9000 + ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}" dev link0 + + ip -net "${SERVER_NS}" link set link1 up + ip -net "${SERVER_NS}" link set link1 mtu 1500 + ip -net "${SERVER_NS}" addr add "${SERVER_IP4}" dev link1 + + read -r -a CLIENT_BROADCAST_ENTRY <<< "$(ip -net "${CLIENT_NS}" route show table local type broadcast)" + ip -net "${CLIENT_NS}" route del "${CLIENT_BROADCAST_ENTRY[@]}" + ip -net "${CLIENT_NS}" route add "${CLIENT_BROADCAST_ENTRY[@]}" mtu 1500 + + ip net exec "${SERVER_NS}" sysctl -wq net.ipv4.icmp_echo_ignore_broadcasts=0 +} + +cleanup() { + ip -net "${SERVER_NS}" link del link1 + ip netns del "${CLIENT_NS}" + ip netns del "${SERVER_NS}" +} + +trap cleanup EXIT + +setup && + echo "Testing for broadcast route MTU" && + ip net exec "${CLIENT_NS}" ping -f -M want -q -c 1 -s 8000 -w 1 -b "${CLIENT_BROADCAST_ADDRESS}" > /dev/null 2>&1 + +exit $? + diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh index 7db292ec4884..5ec1c85c1623 100755 --- a/tools/testing/selftests/net/busy_poll_test.sh +++ b/tools/testing/selftests/net/busy_poll_test.sh @@ -1,6 +1,6 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -source net_helper.sh +source lib.sh NSIM_SV_ID=$((256 + RANDOM % 256)) NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID @@ -27,6 +27,8 @@ NAPI_DEFER_HARD_IRQS=100 GRO_FLUSH_TIMEOUT=50000 SUSPEND_TIMEOUT=20000000 +NAPI_THREADED_MODE_BUSY_POLL=2 + setup_ns() { set -e @@ -62,6 +64,9 @@ cleanup_ns() test_busypoll() { suspend_value=${1:-0} + napi_threaded_value=${2:-0} + prefer_busy_poll_value=${3:-$PREFER_BUSY_POLL} + tmp_file=$(mktemp) out_file=$(mktemp) @@ -73,10 +78,11 @@ test_busypoll() -b${SERVER_IP} \ -m${MAX_EVENTS} \ -u${BUSY_POLL_USECS} \ - -P${PREFER_BUSY_POLL} \ + -P${prefer_busy_poll_value} \ -g${BUSY_POLL_BUDGET} \ -i${NSIM_SV_IFIDX} \ -s${suspend_value} \ + -t${napi_threaded_value} \ -o${out_file}& wait_local_port_listen nssv ${SERVER_PORT} tcp @@ -109,6 +115,15 @@ test_busypoll_with_suspend() return $? } +test_busypoll_with_napi_threaded() +{ + # Only enable napi threaded poll. Set suspend timeout and prefer busy + # poll to 0. + test_busypoll 0 ${NAPI_THREADED_MODE_BUSY_POLL} 0 + + return $? +} + ### ### Code start ### @@ -154,6 +169,13 @@ if [ $? -ne 0 ]; then exit 1 fi +test_busypoll_with_napi_threaded +if [ $? -ne 0 ]; then + echo "test_busypoll_with_napi_threaded failed" + cleanup_ns + exit 1 +fi + echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c index 04c7ff577bb8..3a81f9c94795 100644 --- a/tools/testing/selftests/net/busy_poller.c +++ b/tools/testing/selftests/net/busy_poller.c @@ -65,15 +65,16 @@ static uint32_t cfg_busy_poll_usecs; static uint16_t cfg_busy_poll_budget; static uint8_t cfg_prefer_busy_poll; -/* IRQ params */ +/* NAPI params */ static uint32_t cfg_defer_hard_irqs; static uint64_t cfg_gro_flush_timeout; static uint64_t cfg_irq_suspend_timeout; +static enum netdev_napi_threaded cfg_napi_threaded_poll = NETDEV_NAPI_THREADED_DISABLED; static void usage(const char *filepath) { error(1, 0, - "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>", + "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -t<napi_threaded_poll> -i<ifindex>", filepath); } @@ -86,7 +87,7 @@ static void parse_opts(int argc, char **argv) if (argc <= 1) usage(argv[0]); - while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) { + while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:t:")) != -1) { /* most options take integer values, except o and b, so reduce * code duplication a bit for the common case by calling * strtoull here and leave bounds checking and casting per @@ -168,6 +169,12 @@ static void parse_opts(int argc, char **argv) cfg_ifindex = (int)tmp; break; + case 't': + if (tmp > 2) + error(1, ERANGE, "napi threaded poll value must be 0-2"); + + cfg_napi_threaded_poll = (enum netdev_napi_threaded)tmp; + break; } } @@ -247,6 +254,9 @@ static void setup_queue(void) netdev_napi_set_req_set_irq_suspend_timeout(set_req, cfg_irq_suspend_timeout); + if (cfg_napi_threaded_poll) + netdev_napi_set_req_set_threaded(set_req, cfg_napi_threaded_poll); + if (netdev_napi_set(ys, set_req)) error(1, 0, "can't set NAPI params: %s\n", yerr.msg); diff --git a/tools/testing/selftests/net/can/.gitignore b/tools/testing/selftests/net/can/.gitignore new file mode 100644 index 000000000000..764a53fc837f --- /dev/null +++ b/tools/testing/selftests/net/can/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +test_raw_filter diff --git a/tools/testing/selftests/net/can/Makefile b/tools/testing/selftests/net/can/Makefile new file mode 100644 index 000000000000..5b82e60a03e7 --- /dev/null +++ b/tools/testing/selftests/net/can/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 + +top_srcdir = ../../../../.. + +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) + +TEST_PROGS := test_raw_filter.sh + +TEST_GEN_FILES := test_raw_filter + +include ../../lib.mk diff --git a/tools/testing/selftests/net/can/config b/tools/testing/selftests/net/can/config new file mode 100644 index 000000000000..188f79796670 --- /dev/null +++ b/tools/testing/selftests/net/can/config @@ -0,0 +1,3 @@ +CONFIG_CAN=m +CONFIG_CAN_DEV=m +CONFIG_CAN_VCAN=m diff --git a/tools/testing/selftests/net/can/test_raw_filter.c b/tools/testing/selftests/net/can/test_raw_filter.c new file mode 100644 index 000000000000..bb8ae8854273 --- /dev/null +++ b/tools/testing/selftests/net/can/test_raw_filter.c @@ -0,0 +1,405 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) +/* + * Copyright (c) 2011 Volkswagen Group Electronic Research + * All rights reserved. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <net/if.h> +#include <linux/if.h> + +#include <linux/can.h> +#include <linux/can/raw.h> + +#include "kselftest_harness.h" + +#define ID 0x123 + +char CANIF[IFNAMSIZ]; + +static int send_can_frames(int sock, int testcase) +{ + struct can_frame frame; + + frame.can_dlc = 1; + frame.data[0] = testcase; + + frame.can_id = ID; + if (write(sock, &frame, sizeof(frame)) < 0) + goto write_err; + + frame.can_id = (ID | CAN_RTR_FLAG); + if (write(sock, &frame, sizeof(frame)) < 0) + goto write_err; + + frame.can_id = (ID | CAN_EFF_FLAG); + if (write(sock, &frame, sizeof(frame)) < 0) + goto write_err; + + frame.can_id = (ID | CAN_EFF_FLAG | CAN_RTR_FLAG); + if (write(sock, &frame, sizeof(frame)) < 0) + goto write_err; + + return 0; + +write_err: + perror("write"); + return 1; +} + +FIXTURE(can_filters) { + int sock; +}; + +FIXTURE_SETUP(can_filters) +{ + struct sockaddr_can addr; + struct ifreq ifr; + int recv_own_msgs = 1; + int s, ret; + + s = socket(PF_CAN, SOCK_RAW, CAN_RAW); + ASSERT_GE(s, 0) + TH_LOG("failed to create CAN_RAW socket: %d", errno); + + strncpy(ifr.ifr_name, CANIF, sizeof(ifr.ifr_name)); + ret = ioctl(s, SIOCGIFINDEX, &ifr); + ASSERT_GE(ret, 0) + TH_LOG("failed SIOCGIFINDEX: %d", errno); + + addr.can_family = AF_CAN; + addr.can_ifindex = ifr.ifr_ifindex; + + setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS, + &recv_own_msgs, sizeof(recv_own_msgs)); + + ret = bind(s, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(ret, 0) + TH_LOG("failed bind socket: %d", errno); + + self->sock = s; +} + +FIXTURE_TEARDOWN(can_filters) +{ + close(self->sock); +} + +FIXTURE_VARIANT(can_filters) { + int testcase; + canid_t id; + canid_t mask; + int exp_num_rx; + canid_t exp_flags[]; +}; + +/* Receive all frames when filtering for the ID in standard frame format */ +FIXTURE_VARIANT_ADD(can_filters, base) { + .testcase = 1, + .id = ID, + .mask = CAN_SFF_MASK, + .exp_num_rx = 4, + .exp_flags = { + 0, + CAN_RTR_FLAG, + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Ignore EFF flag in filter ID if not covered by filter mask */ +FIXTURE_VARIANT_ADD(can_filters, base_eff) { + .testcase = 2, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_SFF_MASK, + .exp_num_rx = 4, + .exp_flags = { + 0, + CAN_RTR_FLAG, + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Ignore RTR flag in filter ID if not covered by filter mask */ +FIXTURE_VARIANT_ADD(can_filters, base_rtr) { + .testcase = 3, + .id = ID | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK, + .exp_num_rx = 4, + .exp_flags = { + 0, + CAN_RTR_FLAG, + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Ignore EFF and RTR flags in filter ID if not covered by filter mask */ +FIXTURE_VARIANT_ADD(can_filters, base_effrtr) { + .testcase = 4, + .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK, + .exp_num_rx = 4, + .exp_flags = { + 0, + CAN_RTR_FLAG, + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only SFF frames when expecting no EFF flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_eff) { + .testcase = 5, + .id = ID, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG, + .exp_num_rx = 2, + .exp_flags = { + 0, + CAN_RTR_FLAG, + }, +}; + +/* Receive only EFF frames when filter id and filter mask include EFF flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_eff_eff) { + .testcase = 6, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG, + .exp_num_rx = 2, + .exp_flags = { + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only SFF frames when expecting no EFF flag, ignoring RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_eff_rtr) { + .testcase = 7, + .id = ID | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG, + .exp_num_rx = 2, + .exp_flags = { + 0, + CAN_RTR_FLAG, + }, +}; + +/* Receive only EFF frames when filter id and filter mask include EFF flag, + * ignoring RTR flag + */ +FIXTURE_VARIANT_ADD(can_filters, filter_eff_effrtr) { + .testcase = 8, + .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG, + .exp_num_rx = 2, + .exp_flags = { + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive no remote frames when filtering for no RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_rtr) { + .testcase = 9, + .id = ID, + .mask = CAN_SFF_MASK | CAN_RTR_FLAG, + .exp_num_rx = 2, + .exp_flags = { + 0, + CAN_EFF_FLAG, + }, +}; + +/* Receive no remote frames when filtering for no RTR flag, ignoring EFF flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_rtr_eff) { + .testcase = 10, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_SFF_MASK | CAN_RTR_FLAG, + .exp_num_rx = 2, + .exp_flags = { + 0, + CAN_EFF_FLAG, + }, +}; + +/* Receive only remote frames when filter includes RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_rtr_rtr) { + .testcase = 11, + .id = ID | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_RTR_FLAG, + .exp_num_rx = 2, + .exp_flags = { + CAN_RTR_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only remote frames when filter includes RTR flag, ignoring EFF + * flag + */ +FIXTURE_VARIANT_ADD(can_filters, filter_rtr_effrtr) { + .testcase = 12, + .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_RTR_FLAG, + .exp_num_rx = 2, + .exp_flags = { + CAN_RTR_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only SFF data frame when filtering for no flags */ +FIXTURE_VARIANT_ADD(can_filters, filter_effrtr) { + .testcase = 13, + .id = ID, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + 0, + }, +}; + +/* Receive only EFF data frame when filtering for EFF but no RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_eff) { + .testcase = 14, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + CAN_EFF_FLAG, + }, +}; + +/* Receive only SFF remote frame when filtering for RTR but no EFF flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_rtr) { + .testcase = 15, + .id = ID | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + CAN_RTR_FLAG, + }, +}; + +/* Receive only EFF remote frame when filtering for EFF and RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_effrtr) { + .testcase = 16, + .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only SFF data frame when filtering for no EFF flag and no RTR flag + * but based on EFF mask + */ +FIXTURE_VARIANT_ADD(can_filters, eff) { + .testcase = 17, + .id = ID, + .mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + 0, + }, +}; + +/* Receive only EFF data frame when filtering for EFF flag and no RTR flag but + * based on EFF mask + */ +FIXTURE_VARIANT_ADD(can_filters, eff_eff) { + .testcase = 18, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + CAN_EFF_FLAG, + }, +}; + +/* This test verifies that the raw CAN filters work, by checking if only frames + * with the expected set of flags are received. For each test case, the given + * filter (id and mask) is added and four CAN frames are sent with every + * combination of set/unset EFF/RTR flags. + */ +TEST_F(can_filters, test_filter) +{ + struct can_filter rfilter; + int ret; + + rfilter.can_id = variant->id; + rfilter.can_mask = variant->mask; + setsockopt(self->sock, SOL_CAN_RAW, CAN_RAW_FILTER, + &rfilter, sizeof(rfilter)); + + TH_LOG("filters: can_id = 0x%08X can_mask = 0x%08X", + rfilter.can_id, rfilter.can_mask); + + ret = send_can_frames(self->sock, variant->testcase); + ASSERT_EQ(ret, 0) + TH_LOG("failed to send CAN frames"); + + for (int i = 0; i <= variant->exp_num_rx; i++) { + struct can_frame frame; + struct timeval tv = { + .tv_sec = 0, + .tv_usec = 50000, /* 50ms timeout */ + }; + fd_set rdfs; + + FD_ZERO(&rdfs); + FD_SET(self->sock, &rdfs); + + ret = select(self->sock + 1, &rdfs, NULL, NULL, &tv); + ASSERT_GE(ret, 0) + TH_LOG("failed select for frame %d, err: %d)", i, errno); + + ret = FD_ISSET(self->sock, &rdfs); + if (i == variant->exp_num_rx) { + ASSERT_EQ(ret, 0) + TH_LOG("too many frames received"); + } else { + ASSERT_NE(ret, 0) + TH_LOG("too few frames received"); + + ret = read(self->sock, &frame, sizeof(frame)); + ASSERT_GE(ret, 0) + TH_LOG("failed to read frame %d, err: %d", i, errno); + + TH_LOG("rx: can_id = 0x%08X rx = %d", frame.can_id, i); + + ASSERT_EQ(ID, frame.can_id & CAN_SFF_MASK) + TH_LOG("received wrong can_id"); + ASSERT_EQ(variant->testcase, frame.data[0]) + TH_LOG("received wrong test case"); + + ASSERT_EQ(frame.can_id & ~CAN_ERR_MASK, + variant->exp_flags[i]) + TH_LOG("received unexpected flags"); + } + } +} + +int main(int argc, char **argv) +{ + char *ifname = getenv("CANIF"); + + if (!ifname) { + printf("CANIF environment variable must contain the test interface\n"); + return KSFT_FAIL; + } + + strncpy(CANIF, ifname, sizeof(CANIF) - 1); + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/net/can/test_raw_filter.sh b/tools/testing/selftests/net/can/test_raw_filter.sh new file mode 100755 index 000000000000..276d6c06ac95 --- /dev/null +++ b/tools/testing/selftests/net/can/test_raw_filter.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + test_raw_filter +" + +net_dir=$(dirname $0)/.. +source $net_dir/lib.sh + +export CANIF=${CANIF:-"vcan0"} +BITRATE=${BITRATE:-500000} + +setup() +{ + if [[ $CANIF == vcan* ]]; then + ip link add name $CANIF type vcan || exit $ksft_skip + else + ip link set dev $CANIF type can bitrate $BITRATE || exit $ksft_skip + fi + ip link set dev $CANIF up + pwd +} + +cleanup() +{ + ip link set dev $CANIF down + if [[ $CANIF == vcan* ]]; then + ip link delete $CANIF + fi +} + +test_raw_filter() +{ + ./test_raw_filter + check_err $? + log_test "test_raw_filter" +} + +trap cleanup EXIT +setup + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/cmsg_ip.sh b/tools/testing/selftests/net/cmsg_ip.sh new file mode 100755 index 000000000000..b55680e081ad --- /dev/null +++ b/tools/testing/selftests/net/cmsg_ip.sh @@ -0,0 +1,187 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +IP4=172.16.0.1/24 +TGT4=172.16.0.2 +IP6=2001:db8:1::1/64 +TGT6=2001:db8:1::2 +TMPF=$(mktemp --suffix ".pcap") + +cleanup() +{ + rm -f $TMPF + cleanup_ns $NS +} + +trap cleanup EXIT + +tcpdump -h | grep immediate-mode >> /dev/null +if [ $? -ne 0 ]; then + echo "SKIP - tcpdump with --immediate-mode option required" + exit $ksft_skip +fi + +# Namespaces +setup_ns NS +NSEXE="ip netns exec $NS" + +$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null + +# Connectivity +ip -netns $NS link add type dummy +ip -netns $NS link set dev dummy0 up +ip -netns $NS addr add $IP4 dev dummy0 +ip -netns $NS addr add $IP6 dev dummy0 + +# Test +BAD=0 +TOTAL=0 + +check_result() { + ((TOTAL++)) + if [ $1 -ne $2 ]; then + echo " Case $3 returned $1, expected $2" + ((BAD++)) + fi +} + +# IPV6_DONTFRAG +for ovr in setsock cmsg both diff; do + for df in 0 1; do + for p in u U i r; do + [ $p == "u" ] && prot=UDP + [ $p == "U" ] && prot=UDP + [ $p == "i" ] && prot=ICMP + [ $p == "r" ] && prot=RAW + + [ $ovr == "setsock" ] && m="-F $df" + [ $ovr == "cmsg" ] && m="-f $df" + [ $ovr == "both" ] && m="-F $df -f $df" + [ $ovr == "diff" ] && m="-F $((1 - df)) -f $df" + + $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234 + check_result $? $df "DONTFRAG $prot $ovr" + done + done +done + +# IP_TOS + IPV6_TCLASS + +test_dscp() { + local -r IPVER=$1 + local -r TGT=$2 + local -r MATCH=$3 + + local -r TOS=0x10 + local -r TOS2=0x20 + local -r ECN=0x3 + + ip $IPVER -netns $NS rule add tos $TOS lookup 300 + ip $IPVER -netns $NS route add table 300 prohibit any + + for ovr in setsock cmsg both diff; do + for p in u U i r; do + [ $p == "u" ] && prot=UDP + [ $p == "U" ] && prot=UDP + [ $p == "i" ] && prot=ICMP + [ $p == "r" ] && prot=RAW + + [ $ovr == "setsock" ] && m="-C" + [ $ovr == "cmsg" ] && m="-c" + [ $ovr == "both" ] && m="-C $((TOS2)) -c" + [ $ovr == "diff" ] && m="-C $((TOS )) -c" + + $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null & + BG=$! + sleep 0.05 + + $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS2)) $TGT 1234 + check_result $? 0 "$MATCH $prot $ovr - pass" + + while [ -d /proc/$BG ]; do + $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS2)) $TGT 1234 + done + + tcpdump -r $TMPF -v 2>&1 | grep "$MATCH $TOS2" >> /dev/null + check_result $? 0 "$MATCH $prot $ovr - packet data" + rm $TMPF + + [ $ovr == "both" ] && m="-C $((TOS )) -c" + [ $ovr == "diff" ] && m="-C $((TOS2)) -c" + + # Match prohibit rule: expect failure + $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS)) -s $TGT 1234 + check_result $? 1 "$MATCH $prot $ovr - rejection" + + # Match prohibit rule: IPv4 masks ECN: expect failure + if [[ "$IPVER" == "-4" ]]; then + $NSEXE ./cmsg_sender $IPVER -p $p $m "$((TOS | ECN))" -s $TGT 1234 + check_result $? 1 "$MATCH $prot $ovr - rejection (ECN)" + fi + done + done +} + +test_dscp -4 $TGT4 tos +test_dscp -6 $TGT6 class + +# IP_TTL + IPV6_HOPLIMIT +test_ttl_hoplimit() { + local -r IPVER=$1 + local -r TGT=$2 + local -r MATCH=$3 + + local -r LIM=4 + + for ovr in setsock cmsg both diff; do + for p in u U i r; do + [ $p == "u" ] && prot=UDP + [ $p == "U" ] && prot=UDP + [ $p == "i" ] && prot=ICMP + [ $p == "r" ] && prot=RAW + + [ $ovr == "setsock" ] && m="-L" + [ $ovr == "cmsg" ] && m="-l" + [ $ovr == "both" ] && m="-L $LIM -l" + [ $ovr == "diff" ] && m="-L $((LIM + 1)) -l" + + $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null & + BG=$! + sleep 0.05 + + $NSEXE ./cmsg_sender $IPVER -p $p $m $LIM $TGT 1234 + check_result $? 0 "$MATCH $prot $ovr - pass" + + while [ -d /proc/$BG ]; do + $NSEXE ./cmsg_sender $IPVER -p $p $m $LIM $TGT 1234 + done + + tcpdump -r $TMPF -v 2>&1 | grep "$MATCH $LIM[^0-9]" >> /dev/null + check_result $? 0 "$MATCH $prot $ovr - packet data" + rm $TMPF + done + done +} + +test_ttl_hoplimit -4 $TGT4 ttl +test_ttl_hoplimit -6 $TGT6 hlim + +# IPV6 exthdr +for p in u U i r; do + # Very basic "does it crash" test + for h in h d r; do + $NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234 + check_result $? 0 "ExtHdr $prot $ovr - pass" + done +done + +# Summary +if [ $BAD -ne 0 ]; then + echo "FAIL - $BAD/$TOTAL cases failed" + exit 1 +else + echo "OK" + exit 0 +fi diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh deleted file mode 100755 index 8bc23fb4c82b..000000000000 --- a/tools/testing/selftests/net/cmsg_ipv6.sh +++ /dev/null @@ -1,154 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -source lib.sh - -IP6=2001:db8:1::1/64 -TGT6=2001:db8:1::2 -TMPF=$(mktemp --suffix ".pcap") - -cleanup() -{ - rm -f $TMPF - cleanup_ns $NS -} - -trap cleanup EXIT - -tcpdump -h | grep immediate-mode >> /dev/null -if [ $? -ne 0 ]; then - echo "SKIP - tcpdump with --immediate-mode option required" - exit $ksft_skip -fi - -# Namespaces -setup_ns NS -NSEXE="ip netns exec $NS" - -$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null - -# Connectivity -ip -netns $NS link add type dummy -ip -netns $NS link set dev dummy0 up -ip -netns $NS addr add $IP6 dev dummy0 - -# Test -BAD=0 -TOTAL=0 - -check_result() { - ((TOTAL++)) - if [ $1 -ne $2 ]; then - echo " Case $3 returned $1, expected $2" - ((BAD++)) - fi -} - -# IPV6_DONTFRAG -for ovr in setsock cmsg both diff; do - for df in 0 1; do - for p in u i r; do - [ $p == "u" ] && prot=UDP - [ $p == "i" ] && prot=ICMP - [ $p == "r" ] && prot=RAW - - [ $ovr == "setsock" ] && m="-F $df" - [ $ovr == "cmsg" ] && m="-f $df" - [ $ovr == "both" ] && m="-F $df -f $df" - [ $ovr == "diff" ] && m="-F $((1 - df)) -f $df" - - $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234 - check_result $? $df "DONTFRAG $prot $ovr" - done - done -done - -# IPV6_TCLASS -TOS=0x10 -TOS2=0x20 - -ip -6 -netns $NS rule add tos $TOS lookup 300 -ip -6 -netns $NS route add table 300 prohibit any - -for ovr in setsock cmsg both diff; do - for p in u i r; do - [ $p == "u" ] && prot=UDP - [ $p == "i" ] && prot=ICMP - [ $p == "r" ] && prot=RAW - - [ $ovr == "setsock" ] && m="-C" - [ $ovr == "cmsg" ] && m="-c" - [ $ovr == "both" ] && m="-C $((TOS2)) -c" - [ $ovr == "diff" ] && m="-C $((TOS )) -c" - - $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null & - BG=$! - sleep 0.05 - - $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234 - check_result $? 0 "TCLASS $prot $ovr - pass" - - while [ -d /proc/$BG ]; do - $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234 - done - - tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null - check_result $? 0 "TCLASS $prot $ovr - packet data" - rm $TMPF - - [ $ovr == "both" ] && m="-C $((TOS )) -c" - [ $ovr == "diff" ] && m="-C $((TOS2)) -c" - - $NSEXE ./cmsg_sender -6 -p $p $m $((TOS)) -s $TGT6 1234 - check_result $? 1 "TCLASS $prot $ovr - rejection" - done -done - -# IPV6_HOPLIMIT -LIM=4 - -for ovr in setsock cmsg both diff; do - for p in u i r; do - [ $p == "u" ] && prot=UDP - [ $p == "i" ] && prot=ICMP - [ $p == "r" ] && prot=RAW - - [ $ovr == "setsock" ] && m="-L" - [ $ovr == "cmsg" ] && m="-l" - [ $ovr == "both" ] && m="-L $LIM -l" - [ $ovr == "diff" ] && m="-L $((LIM + 1)) -l" - - $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null & - BG=$! - sleep 0.05 - - $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234 - check_result $? 0 "HOPLIMIT $prot $ovr - pass" - - while [ -d /proc/$BG ]; do - $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234 - done - - tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null - check_result $? 0 "HOPLIMIT $prot $ovr - packet data" - rm $TMPF - done -done - -# IPV6 exthdr -for p in u i r; do - # Very basic "does it crash" test - for h in h d r; do - $NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234 - check_result $? 0 "ExtHdr $prot $ovr - pass" - done -done - -# Summary -if [ $BAD -ne 0 ]; then - echo "FAIL - $BAD/$TOTAL cases failed" - exit 1 -else - echo "OK" - exit 0 -fi diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index bc314382e4e1..67a72b1a2f3d 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -16,7 +16,7 @@ #include <linux/udp.h> #include <sys/socket.h> -#include "../kselftest.h" +#include "kselftest.h" enum { ERN_SUCCESS = 0, @@ -33,6 +33,7 @@ enum { ERN_RECVERR, ERN_CMSG_RD, ERN_CMSG_RCV, + ERN_SEND_MORE, }; struct option_cmsg_u32 { @@ -46,6 +47,7 @@ struct options { const char *service; unsigned int size; unsigned int num_pkt; + bool msg_more; struct { unsigned int mark; unsigned int dontfrag; @@ -72,7 +74,7 @@ struct options { struct option_cmsg_u32 tclass; struct option_cmsg_u32 hlimit; struct option_cmsg_u32 exthdr; - } v6; + } cmsg; } opt = { .size = 13, .num_pkt = 1, @@ -94,7 +96,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin) "\t\t-S send() size\n" "\t\t-4/-6 Force IPv4 / IPv6 only\n" "\t\t-p prot Socket protocol\n" - "\t\t (u = UDP (default); i = ICMP; r = RAW)\n" + "\t\t (u = UDP (default); i = ICMP; r = RAW;\n" + "\t\t U = UDP with MSG_MORE)\n" "\n" "\t\t-m val Set SO_MARK with given value\n" "\t\t-M val Set SO_MARK via setsockopt\n" @@ -104,13 +107,13 @@ static void __attribute__((noreturn)) cs_usage(const char *bin) "\t\t-t Enable time stamp reporting\n" "\t\t-f val Set don't fragment via cmsg\n" "\t\t-F val Set don't fragment via setsockopt\n" - "\t\t-c val Set TCLASS via cmsg\n" - "\t\t-C val Set TCLASS via setsockopt\n" - "\t\t-l val Set HOPLIMIT via cmsg\n" - "\t\t-L val Set HOPLIMIT via setsockopt\n" + "\t\t-c val Set TOS/TCLASS via cmsg\n" + "\t\t-C val Set TOS/TCLASS via setsockopt\n" + "\t\t-l val Set TTL/HOPLIMIT via cmsg\n" + "\t\t-L val Set TTL/HOPLIMIT via setsockopt\n" "\t\t-H type Add an IPv6 header option\n" - "\t\t (h = HOP; d = DST; r = RTDST)" - ""); + "\t\t (h = HOP; d = DST; r = RTDST)\n" + "\n"); exit(ERN_HELP); } @@ -133,8 +136,11 @@ static void cs_parse_args(int argc, char *argv[]) opt.sock.family = AF_INET6; break; case 'p': - if (*optarg == 'u' || *optarg == 'U') { + if (*optarg == 'u') { opt.sock.proto = IPPROTO_UDP; + } else if (*optarg == 'U') { + opt.sock.proto = IPPROTO_UDP; + opt.msg_more = true; } else if (*optarg == 'i' || *optarg == 'I') { opt.sock.proto = IPPROTO_ICMP; } else if (*optarg == 'r') { @@ -169,37 +175,37 @@ static void cs_parse_args(int argc, char *argv[]) opt.ts.ena = true; break; case 'f': - opt.v6.dontfrag.ena = true; - opt.v6.dontfrag.val = atoi(optarg); + opt.cmsg.dontfrag.ena = true; + opt.cmsg.dontfrag.val = atoi(optarg); break; case 'F': opt.sockopt.dontfrag = atoi(optarg); break; case 'c': - opt.v6.tclass.ena = true; - opt.v6.tclass.val = atoi(optarg); + opt.cmsg.tclass.ena = true; + opt.cmsg.tclass.val = atoi(optarg); break; case 'C': opt.sockopt.tclass = atoi(optarg); break; case 'l': - opt.v6.hlimit.ena = true; - opt.v6.hlimit.val = atoi(optarg); + opt.cmsg.hlimit.ena = true; + opt.cmsg.hlimit.val = atoi(optarg); break; case 'L': opt.sockopt.hlimit = atoi(optarg); break; case 'H': - opt.v6.exthdr.ena = true; + opt.cmsg.exthdr.ena = true; switch (optarg[0]) { case 'h': - opt.v6.exthdr.val = IPV6_HOPOPTS; + opt.cmsg.exthdr.val = IPV6_HOPOPTS; break; case 'd': - opt.v6.exthdr.val = IPV6_DSTOPTS; + opt.cmsg.exthdr.val = IPV6_DSTOPTS; break; case 'r': - opt.v6.exthdr.val = IPV6_RTHDRDSTOPTS; + opt.cmsg.exthdr.val = IPV6_RTHDRDSTOPTS; break; default: printf("Error: hdr type: %s\n", optarg); @@ -261,12 +267,20 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) SOL_SOCKET, SO_MARK, &opt.mark); ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, SOL_SOCKET, SO_PRIORITY, &opt.priority); - ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, - SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag); - ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, - SOL_IPV6, IPV6_TCLASS, &opt.v6.tclass); - ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, - SOL_IPV6, IPV6_HOPLIMIT, &opt.v6.hlimit); + + if (opt.sock.family == AF_INET) { + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IP, IP_TOS, &opt.cmsg.tclass); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IP, IP_TTL, &opt.cmsg.hlimit); + } else { + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IPV6, IPV6_DONTFRAG, &opt.cmsg.dontfrag); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IPV6, IPV6_TCLASS, &opt.cmsg.tclass); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IPV6, IPV6_HOPLIMIT, &opt.cmsg.hlimit); + } if (opt.txtime.ena) { __u64 txtime; @@ -297,14 +311,14 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) *(__u32 *)CMSG_DATA(cmsg) = SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE; } - if (opt.v6.exthdr.ena) { + if (opt.cmsg.exthdr.ena) { cmsg = (struct cmsghdr *)(cbuf + cmsg_len); cmsg_len += CMSG_SPACE(8); if (cbuf_sz < cmsg_len) error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small"); cmsg->cmsg_level = SOL_IPV6; - cmsg->cmsg_type = opt.v6.exthdr.val; + cmsg->cmsg_type = opt.cmsg.exthdr.val; cmsg->cmsg_len = CMSG_LEN(8); *(__u64 *)CMSG_DATA(cmsg) = 0; } @@ -405,23 +419,35 @@ static void ca_set_sockopts(int fd) setsockopt(fd, SOL_SOCKET, SO_MARK, &opt.sockopt.mark, sizeof(opt.sockopt.mark))) error(ERN_SOCKOPT, errno, "setsockopt SO_MARK"); - if (opt.sockopt.dontfrag && - setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG, - &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag))) - error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG"); - if (opt.sockopt.tclass && - setsockopt(fd, SOL_IPV6, IPV6_TCLASS, - &opt.sockopt.tclass, sizeof(opt.sockopt.tclass))) - error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS"); - if (opt.sockopt.hlimit && - setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS, - &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit))) - error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT"); if (opt.sockopt.priority && setsockopt(fd, SOL_SOCKET, SO_PRIORITY, &opt.sockopt.priority, sizeof(opt.sockopt.priority))) error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY"); + if (opt.sock.family == AF_INET) { + if (opt.sockopt.tclass && + setsockopt(fd, SOL_IP, IP_TOS, + &opt.sockopt.tclass, sizeof(opt.sockopt.tclass))) + error(ERN_SOCKOPT, errno, "setsockopt IP_TOS"); + if (opt.sockopt.hlimit && + setsockopt(fd, SOL_IP, IP_TTL, + &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit))) + error(ERN_SOCKOPT, errno, "setsockopt IP_TTL"); + } else { + if (opt.sockopt.dontfrag && + setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG, + &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag))) + error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG"); + if (opt.sockopt.tclass && + setsockopt(fd, SOL_IPV6, IPV6_TCLASS, + &opt.sockopt.tclass, sizeof(opt.sockopt.tclass))) + error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS"); + if (opt.sockopt.hlimit && + setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS, + &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit))) + error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT"); + } + if (opt.txtime.ena) { struct sock_txtime so_txtime = { .clockid = CLOCK_MONOTONIC, @@ -465,7 +491,8 @@ int main(int argc, char *argv[]) if (err) { fprintf(stderr, "Can't resolve address [%s]:%s\n", opt.host, opt.service); - return ERN_SOCK_CREATE; + err = ERN_SOCK_CREATE; + goto err_free_buff; } if (ai->ai_family == AF_INET6 && opt.sock.proto == IPPROTO_ICMP) @@ -474,8 +501,8 @@ int main(int argc, char *argv[]) fd = socket(ai->ai_family, opt.sock.type, opt.sock.proto); if (fd < 0) { fprintf(stderr, "Can't open socket: %s\n", strerror(errno)); - freeaddrinfo(ai); - return ERN_RESOLVE; + err = ERN_RESOLVE; + goto err_free_info; } if (opt.sock.proto == IPPROTO_ICMP) { @@ -511,7 +538,7 @@ int main(int argc, char *argv[]) cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf)); for (i = 0; i < opt.num_pkt; i++) { - err = sendmsg(fd, &msg, 0); + err = sendmsg(fd, &msg, opt.msg_more ? MSG_MORE : 0); if (err < 0) { if (!opt.silent_send) fprintf(stderr, "send failed: %s\n", strerror(errno)); @@ -522,6 +549,14 @@ int main(int argc, char *argv[]) err = ERN_SEND_SHORT; goto err_out; } + if (opt.msg_more) { + err = write(fd, NULL, 0); + if (err < 0) { + fprintf(stderr, "send more: %s\n", strerror(errno)); + err = ERN_SEND_MORE; + goto err_out; + } + } } err = ERN_SUCCESS; @@ -540,6 +575,9 @@ int main(int argc, char *argv[]) err_out: close(fd); +err_free_info: freeaddrinfo(ai); +err_free_buff: + free(buf); return err; } diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 5b9baf708950..2a390cae41bf 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -1,109 +1,135 @@ -CONFIG_USER_NS=y -CONFIG_NET_NS=y +CONFIG_AMT=m +CONFIG_BAREUDP=m CONFIG_BONDING=m CONFIG_BPF_SYSCALL=y -CONFIG_TEST_BPF=m -CONFIG_NUMA=y -CONFIG_RPS=y -CONFIG_SYSFS=y -CONFIG_PROC_SYSCTL=y -CONFIG_NET_VRF=y -CONFIG_NET_L3_MASTER_DEV=y -CONFIG_IPV6=y -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_VETH=y -CONFIG_NET_IPVTI=y -CONFIG_IPV6_VTI=y -CONFIG_DUMMY=y -CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_BRIDGE=y +CONFIG_BRIDGE_VLAN_FILTERING=y +CONFIG_CAN=m +CONFIG_CAN_DEV=m +CONFIG_CAN_VXCAN=m +CONFIG_CRYPTO_ARIA=y CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_VLAN_8021Q=y +CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_SM4_GENERIC=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_BTF_MODULES=n +CONFIG_DUMMY=y CONFIG_GENEVE=m CONFIG_IFB=y CONFIG_INET_DIAG=y CONFIG_INET_ESP=y CONFIG_INET_ESP_OFFLOAD=y -CONFIG_NET_FOU=y -CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_NETFILTER=y -CONFIG_NETFILTER_ADVANCED=y -CONFIG_NF_CONNTRACK=m -CONFIG_IPV6_MROUTE=y -CONFIG_IPV6_SIT=y -CONFIG_IP_DCCP=m -CONFIG_NF_NAT=m +CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP_NF_IPTABLES=m +CONFIG_IP6_NF_IPTABLES_LEGACY=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_RAW=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_IPTABLES_LEGACY=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_SCTP=m +CONFIG_IPV6=y CONFIG_IPV6_GRE=m +CONFIG_IPV6_ILA=m +CONFIG_IPV6_IOAM6_LWTUNNEL=y +CONFIG_IPV6_MROUTE=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_RPL_LWTUNNEL=y CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_IPV6_SIT=y +CONFIG_IPV6_VTI=y +CONFIG_IPVLAN=m +CONFIG_IPVTAP=m +CONFIG_KALLSYMS=y +CONFIG_L2TP=m CONFIG_L2TP_ETH=m CONFIG_L2TP_IP=m -CONFIG_L2TP=m CONFIG_L2TP_V3=y CONFIG_MACSEC=m CONFIG_MACVLAN=y CONFIG_MACVTAP=y CONFIG_MPLS=y +CONFIG_MPLS_IPTUNNEL=m +CONFIG_MPLS_ROUTING=m CONFIG_MPTCP=y -CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_IPV6=y -CONFIG_NF_TABLES_IPV4=y -CONFIG_NFT_NAT=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m +CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_U32=m -CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NETDEVSIM=m +CONFIG_NET_DROP_MONITOR=m +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_XTABLES_LEGACY=y +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_NAT=m +CONFIG_NETFILTER_XT_TARGET_HL=m +CONFIG_NET_FOU=y +CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPIP=y +CONFIG_NET_IPVTI=y +CONFIG_NETKIT=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_NS=y +CONFIG_NET_PKTGEN=m +CONFIG_NET_SCH_ETF=m +CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_FQ=m -CONFIG_NET_SCH_ETF=m +CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_NETEM=y CONFIG_NET_SCH_PRIO=m -CONFIG_NFT_COMPAT=m +CONFIG_NET_VRF=y +CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_OVS=y CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_NAT=m +CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_IPV4=y +CONFIG_NF_TABLES_IPV6=y +CONFIG_NFT_COMPAT=m +CONFIG_NFT_NAT=m +CONFIG_NUMA=y CONFIG_OPENVSWITCH=m CONFIG_OPENVSWITCH_GENEVE=m CONFIG_OPENVSWITCH_GRE=m CONFIG_OPENVSWITCH_VXLAN=m +CONFIG_PROC_SYSCTL=y CONFIG_PSAMPLE=m +CONFIG_RPS=y +CONFIG_SYSFS=y +CONFIG_TAP=m CONFIG_TCP_MD5SIG=y CONFIG_TEST_BLACKHOLE_DEV=m -CONFIG_KALLSYMS=y +CONFIG_TEST_BPF=m CONFIG_TLS=m CONFIG_TRACEPOINTS=y -CONFIG_NET_DROP_MONITOR=m -CONFIG_NETDEVSIM=m -CONFIG_MPLS_ROUTING=m -CONFIG_MPLS_IPTUNNEL=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_CLS_FLOWER=m -CONFIG_NET_ACT_TUNNEL_KEY=m -CONFIG_NET_ACT_MIRRED=m -CONFIG_BAREUDP=m -CONFIG_IPV6_IOAM6_LWTUNNEL=y -CONFIG_CRYPTO_SM4_GENERIC=y -CONFIG_AMT=m CONFIG_TUN=y +CONFIG_USER_NS=y +CONFIG_VETH=y +CONFIG_VLAN_8021Q=y CONFIG_VXLAN=m -CONFIG_IP_SCTP=m -CONFIG_NETFILTER_XT_MATCH_POLICY=m -CONFIG_CRYPTO_ARIA=y CONFIG_XFRM_INTERFACE=m CONFIG_XFRM_USER=m -CONFIG_IP_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RPFILTER=m diff --git a/tools/testing/selftests/net/double_udp_encap.sh b/tools/testing/selftests/net/double_udp_encap.sh new file mode 100755 index 000000000000..9aaf97cdf141 --- /dev/null +++ b/tools/testing/selftests/net/double_udp_encap.sh @@ -0,0 +1,393 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +# shellcheck disable=SC2155 # prefer RO variable over return value from cmd +readonly CLI="$(dirname "$(readlink -f "$0")")/../../../net/ynl/pyynl/cli.py" + +readonly SRC=1 +readonly DST=2 + +readonly NET_V4=192.168.1. +readonly NET_V6=2001:db8:: +readonly OL1_NET_V4=172.16.1. +readonly OL1_NET_V6=2001:db8:1:: +readonly OL2_NET_V4=172.16.2. +readonly OL2_NET_V6=2001:db8:2:: + +trap cleanup_all_ns EXIT + +# shellcheck disable=SC2329 # can't figure out usage trough a variable +is_ipv6() { + if [[ $1 =~ .*:.* ]]; then + return 0 + fi + return 1 +} + +# shellcheck disable=SC2329 # can't figure out usage trough a variable +create_gnv_endpoint() { + local -r netns=$1 + local -r bm_rem_addr=$2 + local -r gnv_dev=$3 + local -r gnv_id=$4 + local opts=$5 + local gnv_json + local rem + + if is_ipv6 "$bm_rem_addr"; then + rem=remote6 + else + rem=remote + fi + + # add ynl opt separator, if needed + [ -n "$opts" ] && opts=", $opts" + + gnv_json="{ \"id\": $gnv_id, \"$rem\": \"$bm_rem_addr\"$opts }" + ip netns exec "$netns" "$CLI" --family rt-link --create --excl \ + --do newlink --json "{\"ifname\": \"$gnv_dev\", + \"linkinfo\": {\"kind\":\"geneve\", + \"data\": $gnv_json } }" > /dev/null + ip -n "$netns" link set dev "$gnv_dev" up +} + +# shellcheck disable=SC2329 # can't figure out usage trough a variable +create_vxlan_endpoint() { + local -r netns=$1 + local -r bm_rem_addr=$2 + local -r vxlan_dev=$3 + local -r vxlan_id=$4 + local -r opts_str=$5 + local oldifs + local -a opts + local opt + + # convert the arguments from yaml format + oldifs=$IFS + IFS=',' + for opt in $opts_str; do + local pattern='"port":' + + [ -n "$opt" ] || continue + + opts+=("${opt/$pattern*/dstport}" "${opt/$pattern/}") + done + IFS=$oldifs + [ ${#opts[@]} -gt 0 ] || opts+=("dstport" "4789") + + ip -n "$netns" link add "$vxlan_dev" type vxlan id "$vxlan_id" \ + remote "$bm_rem_addr" "${opts[@]}" + ip -n "$netns" link set dev "$vxlan_dev" up +} + +create_ns() { + local nested_opt='"port":6082' + local create_endpoint + local options="$1" + local feature + local dev + local id + local ns + + RET=0 + + # +-------------+ +-------------+ + # | NS_SRC | | NS_NST_DST | + # | | | | + # | gnv_nst1 | | gnv_nst2 | + # | + | | + | + # | | | | | | + # | + | | + | + # | gnv1 | | gnv2 | + # | + | | + | + # | | | | | | + # | + veth1 +--------+ veth2 + | + # | | | | + # +-------------+ +-------------+ + + setup_ns NS_SRC NS_DST + + # concatenate caller provided options and default one + [ -n "$2" ] && nested_opt="$nested_opt,$2" + + ip link add name "veth$SRC" netns "$NS_SRC" type veth \ + peer name "veth$DST" netns "$NS_DST" + case "$ENCAP" in + vxlan) + create_endpoint=create_vxlan_endpoint + dev=vx + ;; + geneve) + create_endpoint=create_gnv_endpoint + dev=gnv + ;; + esac + + id=1 + for ns in "${NS_LIST[@]}"; do + ip -n "$ns" link set dev "veth$id" up + + # ensure the sender can do large write just after 3whs + ip netns exec "$ns" \ + sysctl -qw net.ipv4.tcp_wmem="4096 4194304 4194304" + + # note that 3 - $SRC == $DST and 3 - $DST == $SRC + if [ $FAMILY = "4" ]; then + ip -n "$ns" addr add dev "veth$id" "$NET_V4$id/24" + $create_endpoint "$ns" "$NET_V4$((3 - id))" \ + "$dev$id" 4 "$options" + ip -n "$ns" addr add dev "$dev$id" "$OL1_NET_V4$id/24" + + # nested tunnel devices + # pmtu can't be propagated to upper layer devices; + # need manual adjust + $create_endpoint "$ns" "$OL1_NET_V4$((3 - id))" \ + "$dev"_nst"$id" 40 "$nested_opt" + ip -n "$ns" addr add dev "$dev"_nst"$id" \ + "$OL2_NET_V4$id/24" + ip -n "$ns" link set dev "$dev"_nst"$id" mtu 1392 + else + ip -n "$ns" addr add dev "veth$id" "$NET_V6$id/64" \ + nodad + $create_endpoint "$ns" "$NET_V6$((3 - id))" \ + "$dev"6"$id" 6 "$options" + ip -n "$ns" addr add dev "$dev"6"$id" \ + "$OL1_NET_V6$id/64" nodad + + $create_endpoint "$ns" "$OL1_NET_V6$((3 - id))" \ + "$dev"6_nst"$id" 60 "$nested_opt" + ip -n "$ns" addr add dev "$dev"6_nst"$id" \ + "$OL2_NET_V6$id/64" nodad + ip -n "$ns" link set dev "$dev"6_nst"$id" mtu 1352 + fi + id=$((id+1)) + done + + # enable GRO heuristic on the veth peer and ensure UDP L4 over tunnel is + # actually segmented + for feature in tso tx-udp_tnl-segmentation; do + ip netns exec "$NS_SRC" ethtool -K "veth$SRC" \ + "$feature" off 2>/dev/null + done +} + +create_ns_gso() { + local dev + + create_ns "$@" + if [ "$ENCAP" = "geneve" ]; then + dev=gnv + else + dev=vx + fi + [ "$FAMILY" = "6" ] && dev="$dev"6 + ip netns exec "$NS_SRC" ethtool -K "$dev$SRC" \ + tx-gso-partial on \ + tx-udp_tnl-segmentation on \ + tx-udp_tnl-csum-segmentation on +} + +create_ns_gso_gro() { + create_ns_gso "$@" + ip netns exec "$NS_DST" ethtool -K "veth$DST" gro on + ip netns exec "$NS_SRC" ethtool -K "veth$SRC" tx off >/dev/null 2>&1 +} + +run_test() { + local -r dst=$NET$DST + local -r msg=$1 + local -r total_size=$2 + local -r encappkts=$3 + local inner_proto_offset=0 + local inner_maclen=14 + local rx_family="-4" + local ipt=iptables + local bpf_filter + local -a rx_args + local wire_pkts + local rcvpkts + local encl=8 + local dport + local pkts + local snd + + if [ $FAMILY = "6" ]; then + ipt=ip6tables + else + # rx program does not support '-6' and implies ipv6 usage by + # default + rx_args=("$rx_family") + fi + + # The received can only check fixed size packet + pkts=$((total_size / GSO_SIZE)) + if [ -n "$4" ]; then + wire_pkts=$4 + elif [ $((total_size % GSO_SIZE)) -eq 0 ]; then + wire_pkts=1 + rx_args+=("-l" "$GSO_SIZE") + else + wire_pkts=2 + pkts=$((pkts + 1)) + fi + + if [ "$ENCAP" = "geneve" ]; then + dport=6081 + else + dport=4789 + fi + + # Either: + # - IPv4, nested tunnel carries UDP over IPv4, with dport 6082, + # innermost is TCP over IPv4 on port 8000 + # - IPv6, nested tunnel carries UDP over IPv6, with dport 6082, + # innermost is TCP over IPv6 on port 8000 + # The nested tunnel port is 6082 and the nested encap len is 8 + # regardless of the encap type (no geneve opts). + # In inherit protocol mode there is no nested mac hdr and the nested + # l3 protocol type field belongs to the geneve hdr. + [ "$USE_HINT" = true ] && encl=16 + [ "$INHERIT" = true ] && inner_maclen=0 + [ "$INHERIT" = true ] && inner_proto_offset=-4 + local inner=$((inner_maclen+encl)) + local proto=$((inner_maclen+encl+inner_proto_offset)) + bpf_filter=$(nfbpf_compile "(ip && + ip[$((40+encl))] == 0x08 && ip[$((41+encl))] == 0x00 && + ip[$((51+encl))] == 0x11 && + ip[$((64+encl))] == 0x17 && ip[$((65+encl))] == 0xc2 && + ip[$((76+proto))] == 0x08 && ip[$((77+proto))] == 0x00 && + ip[$((87+inner))] == 0x6 && + ip[$((100+inner))] == 0x1f && ip[$((101+inner))] == 0x40) || + (ip6 && + ip6[$((60+encl))] == 0x86 && ip6[$((61+encl))] == 0xdd && + ip6[$((68+encl))] == 0x11 && + ip6[$((104+encl))] == 0x17 && ip6[$((105+encl))] == 0xc2 && + ip6[$((116+proto))] == 0x86 && ip6[$((117+proto))] == 0xdd && + ip6[$((124+inner))] == 0x6 && + ip6[$((160+inner))] == 0x1f && ip6[$((161+inner))] == 0x40)") + + # ignore shorts packet, to avoid arp/mld induced noise + ip netns exec "$NS_SRC" "$ipt" -A OUTPUT -p udp --dport "$dport" \ + -m length --length 600:65535 -m bpf --bytecode "$bpf_filter" + ip netns exec "$NS_DST" "$ipt" -A INPUT -p udp --dport "$dport" \ + -m length --length 600:65535 -m bpf --bytecode "$bpf_filter" + ip netns exec "$NS_DST" ./udpgso_bench_rx -C 2000 -t -R 100 \ + -n "$pkts" "${rx_args[@]}" & + local pid=$! + wait_local_port_listen "$NS_DST" 8000 tcp + ip netns exec "$NS_SRC" ./udpgso_bench_tx -"$FAMILY" -t -M 1 \ + -s "$total_size" -D "$dst" + local ret=$? + check_err "$ret" "client failure exit code $ret" + wait "$pid" + ret=$? + check_err "$ret" "sever failure exit code $ret" + + snd=$(ip netns exec "$NS_SRC" "$ipt"-save -c | + grep "dport $dport" | sed -e 's/\[//' -e 's/:.*//') + + [ "$snd" = "$wire_pkts" ] + # shellcheck disable=SC2319 # known false positive + check_err $? "send $snd packets on the lowest link, expected $wire_pkts" + + rcvpkts=$(ip netns exec "$NS_DST" "$ipt"-save -c | \ + grep "dport $dport" | sed -e 's/\[//' -e 's/:.*//') + + [ "$rcvpkts" = "$encappkts" ] + check_err $? "received $rcvpkts $ENCAP packets, expected $encappkts" + log_test "$msg" +} + +run_tests() { + for FAMILY in 4 6; do + NET=$OL2_NET_V4 + GSO_SIZE=1340 # 1392 - 20 - 32 + + if [ $FAMILY = 6 ]; then + NET=$OL2_NET_V6 + GSO_SIZE=1280 # 1352 - 40 - 32 + fi + + echo "IPv$FAMILY" + + unset USE_HINT + unset INHERIT + + # "geneve" must be last encap in list, so that later + # test cases will run on it + for ENCAP in "vxlan" "geneve"; do + create_ns + run_test "No GSO - $ENCAP" $((GSO_SIZE * 4)) 4 4 + cleanup_all_ns + + create_ns_gso + run_test "GSO without GRO - $ENCAP" $((GSO_SIZE * 4)) \ + 4 1 + cleanup_all_ns + + # IPv4 only test + [ $FAMILY = "4" ] || continue + create_ns_gso + ip netns exec "$NS_SRC" \ + sysctl -qw net.ipv4.ip_no_pmtu_disc=1 + run_test "GSO disable due to no fixedid - $ENCAP" \ + $((GSO_SIZE * 4)) 4 4 + cleanup_all_ns + done + + # GRO tests imply/require geneve encap, the only one providing + # GRO hints + create_ns_gso_gro + run_test "double tunnel GRO, no hints" $((GSO_SIZE * 4)) 4 + cleanup_all_ns + + # hint option is expected for all the following tests in the RX + # path + USE_HINT=true + create_ns_gso_gro \ + '"gro-hint":1,"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1' \ + '"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1' + run_test "double tunnel GRO" $((GSO_SIZE * 4)) 1 + cleanup_all_ns + + create_ns_gso_gro '"gro-hint":1,"udp-csum":1' '"udp-csum":1' + run_test "double tunnel GRO - csum complete" $((GSO_SIZE * 4))\ + 1 + cleanup_all_ns + + create_ns_gso_gro '"gro-hint":1' \ + '"udp-csum":0,"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1' + run_test "double tunnel GRO - no nested csum" \ + $((GSO_SIZE * 4)) 1 + cleanup_all_ns + + create_ns_gso_gro \ + '"gro-hint":1,"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1' \ + '"udp-csum":1' + run_test "double tunnel GRO - nested csum, outer 0-csum, skip"\ + $((GSO_SIZE * 4)) 4 + cleanup_all_ns + + INHERIT=true + create_ns_gso_gro '"gro-hint":1,"udp-csum":1' \ + '"udp-csum":1,"inner-proto-inherit":1' + run_test "double tunnel GRO - nested inherit proto" \ + $((GSO_SIZE * 4)) 1 + cleanup_all_ns + unset INHERIT + + create_ns_gso_gro '"gro-hint":1' + run_test "double tunnel GRO - short last pkt" \ + $((GSO_SIZE * 4 + GSO_SIZE / 2)) 2 + cleanup_all_ns + done +} + +require_command nfbpf_compile +require_command jq + +# tcp retransmisions will break the accounting +xfail_on_slow run_tests +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/epoll_busy_poll.c b/tools/testing/selftests/net/epoll_busy_poll.c index 16e457c2f877..adf8dd0b5e0b 100644 --- a/tools/testing/selftests/net/epoll_busy_poll.c +++ b/tools/testing/selftests/net/epoll_busy_poll.c @@ -23,7 +23,7 @@ #include <sys/ioctl.h> #include <sys/socket.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" /* if the headers haven't been updated, we need to define some things */ #if !defined(EPOLL_IOC_TYPE) diff --git a/tools/testing/selftests/net/fcnal-ipv4.sh b/tools/testing/selftests/net/fcnal-ipv4.sh new file mode 100755 index 000000000000..82f9c867c3e8 --- /dev/null +++ b/tools/testing/selftests/net/fcnal-ipv4.sh @@ -0,0 +1,2 @@ +#!/bin/sh +./fcnal-test.sh -t ipv4 diff --git a/tools/testing/selftests/net/fcnal-ipv6.sh b/tools/testing/selftests/net/fcnal-ipv6.sh new file mode 100755 index 000000000000..ab1fc7aa3caf --- /dev/null +++ b/tools/testing/selftests/net/fcnal-ipv6.sh @@ -0,0 +1,2 @@ +#!/bin/sh +./fcnal-test.sh -t ipv6 diff --git a/tools/testing/selftests/net/fcnal-other.sh b/tools/testing/selftests/net/fcnal-other.sh new file mode 100755 index 000000000000..a840cf80b32e --- /dev/null +++ b/tools/testing/selftests/net/fcnal-other.sh @@ -0,0 +1,2 @@ +#!/bin/sh +./fcnal-test.sh -t other diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 899dbad0104b..890c3f8e51bb 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -189,7 +189,7 @@ show_hint() kill_procs() { killall nettest ping ping6 >/dev/null 2>&1 - sleep 1 + slowwait 2 sh -c 'test -z "$(pgrep '"'^(nettest|ping|ping6)$'"')"' } set_ping_group() @@ -424,6 +424,8 @@ create_ns() ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.accept_dad=0 } # create veth pair to connect namespaces and apply addresses. @@ -875,7 +877,7 @@ ipv4_tcp_md5_novrf() # basic use case log_start run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: Single address config" @@ -883,7 +885,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Server no config, client uses password" @@ -891,7 +893,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Client uses wrong password" @@ -899,7 +901,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s -M ${MD5_PW} -m ${NSB_LO_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Client address does not match address configured with password" @@ -910,7 +912,7 @@ ipv4_tcp_md5_novrf() # client in prefix log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: Prefix config" @@ -918,7 +920,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Prefix config, client uses wrong password" @@ -926,7 +928,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Prefix config, client address not in configured prefix" } @@ -943,7 +945,7 @@ ipv4_tcp_md5() # basic use case log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" @@ -951,7 +953,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since server does not have MD5 auth" run_cmd nettest -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Server no config, client uses password" @@ -959,7 +961,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" @@ -967,7 +969,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since server config differs from client" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" @@ -978,7 +980,7 @@ ipv4_tcp_md5() # client in prefix log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config" @@ -986,7 +988,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" @@ -994,7 +996,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client address is outside of prefix" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" @@ -1005,14 +1007,14 @@ ipv4_tcp_md5() log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" @@ -1020,7 +1022,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" @@ -1028,21 +1030,21 @@ ipv4_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF" @@ -1050,7 +1052,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw" @@ -1058,7 +1060,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw" @@ -1082,14 +1084,14 @@ test_ipv4_md5_vrf__vrf_server__no_bind_ifindex() log_start show_hint "Simulates applications using VRF without TCP_MD5SIG_FLAG_IFINDEX" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: VRF-bound server, unbound key accepts connection" log_start show_hint "Binding both the socket and the key is not required but it works" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: VRF-bound server, bound key accepts connection" } @@ -1103,25 +1105,25 @@ test_ipv4_md5_vrf__global_server__bind_ifindex0() log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Global server, Key bound to ifindex=0 rejects VRF connection" log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Global server, key bound to ifindex=0 accepts non-VRF connection" log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts VRF connection" log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts non-VRF connection" @@ -1193,7 +1195,7 @@ ipv4_tcp_novrf() do log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -1201,7 +1203,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1221,13 +1223,13 @@ ipv4_tcp_novrf() do log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client" log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" @@ -1249,7 +1251,7 @@ ipv4_tcp_novrf() do log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -0 ${a} -1 ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -1257,7 +1259,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -1266,7 +1268,7 @@ ipv4_tcp_novrf() log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" run_cmd nettest -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" done @@ -1274,7 +1276,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -0 ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Global server, device client, local connection" @@ -1283,7 +1285,7 @@ ipv4_tcp_novrf() log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, device client, local connection" done @@ -1291,7 +1293,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" @@ -1323,19 +1325,19 @@ ipv4_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 1 "Global server" log_start run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1352,7 +1354,7 @@ ipv4_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, local connection" @@ -1374,14 +1376,14 @@ ipv4_tcp_vrf() log_start show_hint "client socket should be bound to VRF" run_cmd nettest -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Global server" log_start show_hint "client socket should be bound to VRF" run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -1396,7 +1398,7 @@ ipv4_tcp_vrf() log_start show_hint "client socket should be bound to device" run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1406,7 +1408,7 @@ ipv4_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since client is not bound to VRF" run_cmd nettest -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "Global server, local connection" done @@ -1418,13 +1420,13 @@ ipv4_tcp_vrf() do log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -d ${VRF} log_test_addr ${a} $? 0 "Client, VRF bind" log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" @@ -1443,7 +1445,7 @@ ipv4_tcp_vrf() do log_start run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local connection" done @@ -1451,26 +1453,26 @@ ipv4_tcp_vrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "VRF server, device client, local connection" log_start show_hint "Should fail 'No route to host' since client is out of VRF scope" run_cmd nettest -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "VRF server, unbound client, local connection" log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local connection" log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" } @@ -1509,7 +1511,7 @@ ipv4_udp_novrf() do log_start run_cmd nettest -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -1522,7 +1524,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1533,31 +1535,31 @@ ipv4_udp_novrf() do log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client, device bind" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client, device send via cmsg" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP} -U log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF, with connect()" @@ -1580,7 +1582,7 @@ ipv4_udp_novrf() do log_start run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -0 ${a} -1 ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -1588,7 +1590,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -1597,7 +1599,7 @@ ipv4_udp_novrf() log_start show_hint "Should fail 'Connection refused' since address is out of device scope" run_cmd nettest -s -D -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" done @@ -1605,25 +1607,25 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Global server, device client, local connection" log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -C -r ${a} log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection" log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection" log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} -U log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection, with connect()" @@ -1636,28 +1638,28 @@ ipv4_udp_novrf() log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 2 "Global server, device client, local connection" log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection" log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection" log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -U log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()" @@ -1667,7 +1669,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -D -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -1709,19 +1711,19 @@ ipv4_udp_vrf() log_start show_hint "Fails because ingress is in a VRF and global server is disabled" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 1 "Global server" log_start run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -1733,7 +1735,7 @@ ipv4_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is out of scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 1 "Global server, VRF client, local connection" done @@ -1741,26 +1743,26 @@ ipv4_udp_vrf() a=${NSA_IP} log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, enslaved device client, local connection" a=${NSA_IP} log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -1775,19 +1777,19 @@ ipv4_udp_vrf() do log_start run_cmd nettest -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Global server" log_start run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -1802,13 +1804,13 @@ ipv4_udp_vrf() # log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -d ${VRF} -D -r ${NSB_IP} -1 ${NSA_IP} log_test $? 0 "VRF client" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -d ${NSA_DEV} -D -r ${NSB_IP} -1 ${NSA_IP} log_test $? 0 "Enslaved device client" @@ -1829,31 +1831,31 @@ ipv4_udp_vrf() a=${NSA_IP} log_start run_cmd nettest -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -1861,7 +1863,7 @@ ipv4_udp_vrf() do log_start run_cmd nettest -D -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" done @@ -1870,7 +1872,7 @@ ipv4_udp_vrf() do log_start run_cmd nettest -s -D -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" done @@ -2093,7 +2095,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2107,7 +2109,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2120,7 +2122,7 @@ ipv4_rt() a=${NSA_IP} log_start run_cmd nettest ${varg} -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2134,7 +2136,7 @@ ipv4_rt() # log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP} & sleep 3 run_cmd ip link del ${VRF} @@ -2145,7 +2147,7 @@ ipv4_rt() log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP} & sleep 3 run_cmd ip link del ${VRF} @@ -2161,7 +2163,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2175,7 +2177,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2189,7 +2191,7 @@ ipv4_rt() log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2200,7 +2202,7 @@ ipv4_rt() log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2211,7 +2213,7 @@ ipv4_rt() log_start run_cmd nettest ${varg} -I ${NSA_DEV} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2325,6 +2327,13 @@ ipv6_ping_novrf() log_test_addr ${a} $? 2 "ping local, device bind" done + for a in ${NSA_LO_IP6} ${NSA_LINKIP6}%${NSA_DEV} ${NSA_IP6} + do + log_start + run_cmd ${ping6} -c1 -w1 -I ::1 ${a} + log_test_addr ${a} $? 0 "ping local, from localhost" + done + # # ip rule blocks address # @@ -2561,7 +2570,7 @@ ipv6_tcp_md5_novrf() # basic use case log_start run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: Single address config" @@ -2569,7 +2578,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Server no config, client uses password" @@ -2577,7 +2586,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Client uses wrong password" @@ -2585,7 +2594,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_LO_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Client address does not match address configured with password" @@ -2596,7 +2605,7 @@ ipv6_tcp_md5_novrf() # client in prefix log_start run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: Prefix config" @@ -2604,7 +2613,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Prefix config, client uses wrong password" @@ -2612,7 +2621,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Prefix config, client address not in configured prefix" } @@ -2629,7 +2638,7 @@ ipv6_tcp_md5() # basic use case log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" @@ -2637,7 +2646,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since server does not have MD5 auth" run_cmd nettest -6 -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Server no config, client uses password" @@ -2645,7 +2654,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" @@ -2653,7 +2662,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since server config differs from client" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" @@ -2664,7 +2673,7 @@ ipv6_tcp_md5() # client in prefix log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config" @@ -2672,7 +2681,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" @@ -2680,7 +2689,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client address is outside of prefix" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" @@ -2691,14 +2700,14 @@ ipv6_tcp_md5() log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" @@ -2706,7 +2715,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" @@ -2714,21 +2723,21 @@ ipv6_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF" @@ -2736,7 +2745,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw" @@ -2744,7 +2753,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw" @@ -2772,7 +2781,7 @@ ipv6_tcp_novrf() do log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -2793,7 +2802,7 @@ ipv6_tcp_novrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 0 "Client" done @@ -2802,7 +2811,7 @@ ipv6_tcp_novrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" done @@ -2822,7 +2831,7 @@ ipv6_tcp_novrf() do log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -2830,7 +2839,7 @@ ipv6_tcp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -2839,7 +2848,7 @@ ipv6_tcp_novrf() log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" run_cmd nettest -6 -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" done @@ -2847,7 +2856,7 @@ ipv6_tcp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Global server, device client, local connection" @@ -2856,7 +2865,7 @@ ipv6_tcp_novrf() log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, device client, local connection" done @@ -2865,7 +2874,7 @@ ipv6_tcp_novrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" done @@ -2898,7 +2907,7 @@ ipv6_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 1 "Global server" done @@ -2907,7 +2916,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -2916,7 +2925,7 @@ ipv6_tcp_vrf() a=${NSA_LINKIP6}%${NSB_DEV} log_start run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2924,7 +2933,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Device server" done @@ -2943,7 +2952,7 @@ ipv6_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, local connection" @@ -2964,7 +2973,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -2973,7 +2982,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -2982,13 +2991,13 @@ ipv6_tcp_vrf() a=${NSA_LINKIP6}%${NSB_DEV} log_start run_cmd nettest -6 -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" log_start run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2996,7 +3005,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Device server" done @@ -3016,7 +3025,7 @@ ipv6_tcp_vrf() log_start show_hint "Fails 'Connection refused' since client is not in VRF" run_cmd nettest -6 -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "Global server, local connection" done @@ -3029,7 +3038,7 @@ ipv6_tcp_vrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} log_test_addr ${a} $? 0 "Client, VRF bind" done @@ -3038,7 +3047,7 @@ ipv6_tcp_vrf() log_start show_hint "Fails since VRF device does not allow linklocal addresses" run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} log_test_addr ${a} $? 1 "Client, VRF bind" @@ -3046,7 +3055,7 @@ ipv6_tcp_vrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" done @@ -3071,7 +3080,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local connection" done @@ -3079,7 +3088,7 @@ ipv6_tcp_vrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "VRF server, device client, local connection" @@ -3087,13 +3096,13 @@ ipv6_tcp_vrf() log_start show_hint "Should fail since unbound client is out of VRF scope" run_cmd nettest -6 -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "VRF server, unbound client, local connection" log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local connection" @@ -3101,7 +3110,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" done @@ -3141,13 +3150,13 @@ ipv6_udp_novrf() do log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Device server" done @@ -3155,7 +3164,7 @@ ipv6_udp_novrf() a=${NSA_LO_IP6} log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -3165,7 +3174,7 @@ ipv6_udp_novrf() #log_start #show_hint "Should fail since loopback address is out of scope" #run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - #sleep 1 + wait_local_port_listen ${NSA} 12345 udp #run_cmd_nsb nettest -6 -D -r ${a} #log_test_addr ${a} $? 1 "Device server" @@ -3185,25 +3194,25 @@ ipv6_udp_novrf() do log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client" log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client, device bind" log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client, device send via cmsg" log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client, device bind via IPV6_UNICAST_IF" @@ -3225,7 +3234,7 @@ ipv6_udp_novrf() do log_start run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -0 ${a} -1 ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -3233,7 +3242,7 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -3242,7 +3251,7 @@ ipv6_udp_novrf() log_start show_hint "Should fail 'Connection refused' since address is out of device scope" run_cmd nettest -6 -s -D -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} log_test_addr ${a} $? 1 "Device server, local connection" done @@ -3250,19 +3259,19 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Global server, device client, local connection" log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -C -r ${a} log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection" log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -S -r ${a} log_test_addr ${a} $? 0 "Global server, device client via IPV6_UNICAST_IF, local connection" @@ -3271,28 +3280,28 @@ ipv6_udp_novrf() log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, device client, local connection" log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection" log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection" log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -U log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()" done @@ -3300,7 +3309,7 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -D -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -3314,7 +3323,7 @@ ipv6_udp_novrf() run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV} log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${NSA_IP6} log_test $? 0 "UDP in - LLA to GUA" @@ -3338,7 +3347,7 @@ ipv6_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is disabled" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 1 "Global server" done @@ -3347,7 +3356,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -3356,7 +3365,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" done @@ -3378,7 +3387,7 @@ ipv6_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is disabled" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 1 "Global server, VRF client, local conn" done @@ -3387,7 +3396,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" done @@ -3396,25 +3405,25 @@ ipv6_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is disabled" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 1 "Global server, device client, local conn" log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -3429,7 +3438,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -3438,7 +3447,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -3447,7 +3456,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" done @@ -3465,7 +3474,7 @@ ipv6_udp_vrf() # log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${NSB_IP6} log_test $? 0 "VRF client" @@ -3476,7 +3485,7 @@ ipv6_udp_vrf() log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_IP6} log_test $? 0 "Enslaved device client" @@ -3491,13 +3500,13 @@ ipv6_udp_vrf() a=${NSA_IP6} log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" #log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3505,13 +3514,13 @@ ipv6_udp_vrf() a=${VRF_IP6} log_start run_cmd nettest -6 -D -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3527,25 +3536,25 @@ ipv6_udp_vrf() a=${NSA_IP6} log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Global server, device client, local conn" log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -3557,7 +3566,7 @@ ipv6_udp_vrf() # link local addresses log_start run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -d ${NSB_DEV} -r ${NSA_LINKIP6} log_test $? 0 "Global server, linklocal IP" @@ -3568,7 +3577,7 @@ ipv6_udp_vrf() log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_LINKIP6} log_test $? 0 "Enslaved device client, linklocal IP" @@ -3579,7 +3588,7 @@ ipv6_udp_vrf() log_start run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSA_LINKIP6} log_test $? 0 "Enslaved device client, local conn - linklocal IP" @@ -3592,7 +3601,7 @@ ipv6_udp_vrf() run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV} log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${NSA_IP6} log_test $? 0 "UDP in - LLA to GUA" @@ -3667,7 +3676,7 @@ ipv6_addr_bind_novrf() # when it really should not a=${NSA_LO_IP6} log_start - show_hint "Tecnically should fail since address is not on device but kernel allows" + show_hint "Technically should fail since address is not on device but kernel allows" run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address" } @@ -3724,7 +3733,7 @@ ipv6_addr_bind_vrf() # passes when it really should not a=${VRF_IP6} log_start - show_hint "Tecnically should fail since address is not on device but kernel allows" + show_hint "Technically should fail since address is not on device but kernel allows" run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind" @@ -3771,7 +3780,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3785,7 +3794,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3799,7 +3808,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -I ${NSA_DEV} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3814,7 +3823,7 @@ ipv6_rt() # log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP6} & sleep 3 run_cmd ip link del ${VRF} @@ -3825,7 +3834,7 @@ ipv6_rt() log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP6} & sleep 3 run_cmd ip link del ${VRF} @@ -3842,7 +3851,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3856,7 +3865,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3869,7 +3878,7 @@ ipv6_rt() a=${NSA_IP6} log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3880,7 +3889,7 @@ ipv6_rt() log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3891,7 +3900,7 @@ ipv6_rt() log_start run_cmd nettest ${varg} -I ${NSA_DEV} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3950,7 +3959,7 @@ netfilter_tcp_reset() do log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx" done @@ -3968,7 +3977,7 @@ netfilter_icmp() do log_start run_cmd nettest ${arg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${arg} -r ${a} log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach" done @@ -4007,7 +4016,7 @@ netfilter_tcp6_reset() do log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx" done @@ -4025,7 +4034,7 @@ netfilter_icmp6() do log_start run_cmd nettest -6 -s ${arg} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 ${arg} -r ${a} log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach" done @@ -4221,12 +4230,12 @@ use_case_snat_on_vrf() run_cmd ip6tables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF} run_cmd_nsb nettest -s -l ${NSB_IP} -p ${port} & - sleep 1 + wait_local_port_listen ${NSB} ${port} tcp run_cmd nettest -d ${VRF} -r ${NSB_IP} -p ${port} log_test $? 0 "IPv4 TCP connection over VRF with SNAT" run_cmd_nsb nettest -6 -s -l ${NSB_IP6} -p ${port} & - sleep 1 + wait_local_port_listen ${NSB} ${port} tcp run_cmd nettest -6 -d ${VRF} -r ${NSB_IP6} -p ${port} log_test $? 0 "IPv6 TCP connection over VRF with SNAT" @@ -4272,6 +4281,7 @@ EOF TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter" TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter" TESTS_OTHER="use_cases" +# note: each TEST_ group needs a dedicated runner, e.g. fcnal-ipv4.sh PAUSE_ON_FAIL=no PAUSE=no @@ -4302,6 +4312,8 @@ elif [ "$TESTS" = "ipv4" ]; then TESTS="$TESTS_IPV4" elif [ "$TESTS" = "ipv6" ]; then TESTS="$TESTS_IPV6" +elif [ "$TESTS" = "other" ]; then + TESTS="$TESTS_OTHER" fi check_gen_prog "nettest" diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh index d5e3abb8658c..9931a1e36e3d 100755 --- a/tools/testing/selftests/net/fdb_flush.sh +++ b/tools/testing/selftests/net/fdb_flush.sh @@ -583,7 +583,7 @@ vxlan_test_flush_by_remote_attributes() $IP link del dev vx10 $IP link add name vx10 type vxlan dstport "$VXPORT" external - # For multicat FDB entries, the VXLAN driver stores a linked list of + # For multicast FDB entries, the VXLAN driver stores a linked list of # remotes for a given key. Verify that only the expected remotes are # flushed. multicast_fdb_entries_add diff --git a/tools/testing/selftests/net/fdb_notify.sh b/tools/testing/selftests/net/fdb_notify.sh index c159230c9b62..0b8a2465dd04 100755 --- a/tools/testing/selftests/net/fdb_notify.sh +++ b/tools/testing/selftests/net/fdb_notify.sh @@ -40,16 +40,16 @@ do_test_dup() test_dup_bridge() { - ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add br up type bridge vlan_filtering 1 do_test_dup add "bridge" dev br self do_test_dup del "bridge" dev br self } test_dup_vxlan_self() { - ip_link_add br up type bridge vlan_filtering 1 - ip_link_add vx up type vxlan id 2000 dstport 4789 - ip_link_set_master vx br + adf_ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add vx up type vxlan id 2000 dstport 4789 + adf_ip_link_set_master vx br do_test_dup add "vxlan" dev vx self dst 192.0.2.1 do_test_dup del "vxlan" dev vx self dst 192.0.2.1 @@ -57,9 +57,9 @@ test_dup_vxlan_self() test_dup_vxlan_master() { - ip_link_add br up type bridge vlan_filtering 1 - ip_link_add vx up type vxlan id 2000 dstport 4789 - ip_link_set_master vx br + adf_ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add vx up type vxlan id 2000 dstport 4789 + adf_ip_link_set_master vx br do_test_dup add "vxlan master" dev vx master do_test_dup del "vxlan master" dev vx master @@ -67,8 +67,8 @@ test_dup_vxlan_master() test_dup_macvlan_self() { - ip_link_add dd up type dummy - ip_link_add mv up link dd type macvlan mode passthru + adf_ip_link_add dd up type dummy + adf_ip_link_add mv up link dd type macvlan mode passthru do_test_dup add "macvlan self" dev mv self do_test_dup del "macvlan self" dev mv self @@ -76,10 +76,10 @@ test_dup_macvlan_self() test_dup_macvlan_master() { - ip_link_add br up type bridge vlan_filtering 1 - ip_link_add dd up type dummy - ip_link_add mv up link dd type macvlan mode passthru - ip_link_set_master mv br + adf_ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add dd up type dummy + adf_ip_link_add mv up link dd type macvlan mode passthru + adf_ip_link_set_master mv br do_test_dup add "macvlan master" dev mv self do_test_dup del "macvlan master" dev mv self diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh index ec2d6ceb1f08..e0d45292a298 100755 --- a/tools/testing/selftests/net/fib-onlink-tests.sh +++ b/tools/testing/selftests/net/fib-onlink-tests.sh @@ -72,7 +72,8 @@ declare -A TEST_NET4IN6IN6 TEST_NET4IN6[1]=10.1.1.254 TEST_NET4IN6[2]=10.2.1.254 -# mcast address +# mcast addresses +MCAST4=233.252.0.1 MCAST6=ff02::1 VRF=lisa @@ -120,7 +121,7 @@ log_subsection() run_cmd() { - local cmd="$*" + local cmd="$1" local out local rc @@ -145,7 +146,7 @@ get_linklocal() local pfx local addr - addr=$(${pfx} ip -6 -br addr show dev ${dev} | \ + addr=$(${pfx} ${IP} -6 -br addr show dev ${dev} | \ awk '{ for (i = 3; i <= NF; ++i) { if ($i ~ /^fe80/) @@ -173,58 +174,48 @@ setup() set -e - # create namespace - setup_ns PEER_NS + # create namespaces + setup_ns ns1 + IP="ip -netns $ns1" + setup_ns ns2 # add vrf table - ip li add ${VRF} type vrf table ${VRF_TABLE} - ip li set ${VRF} up - ip ro add table ${VRF_TABLE} unreachable default metric 8192 - ip -6 ro add table ${VRF_TABLE} unreachable default metric 8192 + ${IP} li add ${VRF} type vrf table ${VRF_TABLE} + ${IP} li set ${VRF} up + ${IP} ro add table ${VRF_TABLE} unreachable default metric 8192 + ${IP} -6 ro add table ${VRF_TABLE} unreachable default metric 8192 # create test interfaces - ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]} - ip li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]} - ip li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]} - ip li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]} + ${IP} li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]} + ${IP} li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]} + ${IP} li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]} + ${IP} li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]} # enslave vrf interfaces for n in 5 7; do - ip li set ${NETIFS[p${n}]} vrf ${VRF} + ${IP} li set ${NETIFS[p${n}]} vrf ${VRF} done # add addresses for n in 1 3 5 7; do - ip li set ${NETIFS[p${n}]} up - ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]} - ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad + ${IP} li set ${NETIFS[p${n}]} up + ${IP} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]} + ${IP} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad done # move peer interfaces to namespace and add addresses for n in 2 4 6 8; do - ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up - ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]} - ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad + ${IP} li set ${NETIFS[p${n}]} netns ${ns2} up + ip -netns $ns2 addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]} + ip -netns $ns2 addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad done - ip -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64} - ip -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64} + ${IP} -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64} + ${IP} -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64} set +e } -cleanup() -{ - # make sure we start from a clean slate - cleanup_ns ${PEER_NS} 2>/dev/null - for n in 1 3 5 7; do - ip link del ${NETIFS[p${n}]} 2>/dev/null - done - ip link del ${VRF} 2>/dev/null - ip ro flush table ${VRF_TABLE} - ip -6 ro flush table ${VRF_TABLE} -} - ################################################################################ # IPv4 tests # @@ -241,7 +232,7 @@ run_ip() # dev arg may be empty [ -n "${dev}" ] && dev="dev ${dev}" - run_cmd ip ro add table "${table}" "${prefix}"/32 via "${gw}" "${dev}" onlink + run_cmd "${IP} ro add table ${table} ${prefix}/32 via ${gw} ${dev} onlink" log_test $? ${exp_rc} "${desc}" } @@ -257,8 +248,8 @@ run_ip_mpath() # dev arg may be empty [ -n "${dev}" ] && dev="dev ${dev}" - run_cmd ip ro add table "${table}" "${prefix}"/32 \ - nexthop via ${nh1} nexthop via ${nh2} + run_cmd "${IP} ro add table ${table} ${prefix}/32 \ + nexthop via ${nh1} nexthop via ${nh2}" log_test $? ${exp_rc} "${desc}" } @@ -270,11 +261,15 @@ valid_onlink_ipv4() run_ip 254 ${TEST_NET4[1]}.1 ${CONGW[1]} ${NETIFS[p1]} 0 "unicast connected" run_ip 254 ${TEST_NET4[1]}.2 ${RECGW4[1]} ${NETIFS[p1]} 0 "unicast recursive" + run_ip 254 ${TEST_NET4[1]}.9 ${CONGW[1]} ${NETIFS[p3]} 0 \ + "nexthop device mismatch" log_subsection "VRF ${VRF}" run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected" run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive" + run_ip ${VRF_TABLE} ${TEST_NET4[2]}.10 ${CONGW[3]} ${NETIFS[p7]} 0 \ + "nexthop device mismatch" log_subsection "VRF device, PBR table" @@ -310,17 +305,15 @@ invalid_onlink_ipv4() { run_ip 254 ${TEST_NET4[1]}.11 ${V4ADDRS[p1]} ${NETIFS[p1]} 2 \ "Invalid gw - local unicast address" + run_ip 254 ${TEST_NET4[1]}.12 ${MCAST4} ${NETIFS[p1]} 2 \ + "Invalid gw - multicast address" run_ip ${VRF_TABLE} ${TEST_NET4[2]}.11 ${V4ADDRS[p5]} ${NETIFS[p5]} 2 \ "Invalid gw - local unicast address, VRF" + run_ip ${VRF_TABLE} ${TEST_NET4[2]}.12 ${MCAST4} ${NETIFS[p5]} 2 \ + "Invalid gw - multicast address, VRF" run_ip 254 ${TEST_NET4[1]}.101 ${V4ADDRS[p1]} "" 2 "No nexthop device given" - - run_ip 254 ${TEST_NET4[1]}.102 ${V4ADDRS[p3]} ${NETIFS[p1]} 2 \ - "Gateway resolves to wrong nexthop device" - - run_ip ${VRF_TABLE} ${TEST_NET4[2]}.103 ${V4ADDRS[p7]} ${NETIFS[p5]} 2 \ - "Gateway resolves to wrong nexthop device - VRF" } ################################################################################ @@ -339,7 +332,7 @@ run_ip6() # dev arg may be empty [ -n "${dev}" ] && dev="dev ${dev}" - run_cmd ip -6 ro add table "${table}" "${prefix}"/128 via "${gw}" "${dev}" onlink + run_cmd "${IP} -6 ro add table ${table} ${prefix}/128 via ${gw} ${dev} onlink" log_test $? ${exp_rc} "${desc}" } @@ -353,8 +346,8 @@ run_ip6_mpath() local exp_rc="$6" local desc="$7" - run_cmd ip -6 ro add table "${table}" "${prefix}"/128 "${opts}" \ - nexthop via ${nh1} nexthop via ${nh2} + run_cmd "${IP} -6 ro add table ${table} ${prefix}/128 ${opts} \ + nexthop via ${nh1} nexthop via ${nh2}" log_test $? ${exp_rc} "${desc}" } @@ -367,12 +360,16 @@ valid_onlink_ipv6() run_ip6 254 ${TEST_NET6[1]}::1 ${V6ADDRS[p1]/::*}::64 ${NETIFS[p1]} 0 "unicast connected" run_ip6 254 ${TEST_NET6[1]}::2 ${RECGW6[1]} ${NETIFS[p1]} 0 "unicast recursive" run_ip6 254 ${TEST_NET6[1]}::3 ::ffff:${TEST_NET4IN6[1]} ${NETIFS[p1]} 0 "v4-mapped" + run_ip6 254 ${TEST_NET6[1]}::a ${V6ADDRS[p1]/::*}::64 ${NETIFS[p3]} 0 \ + "nexthop device mismatch" log_subsection "VRF ${VRF}" run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::1 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected" run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::2 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive" run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::3 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped" + run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::b ${V6ADDRS[p5]/::*}::64 \ + ${NETIFS[p7]} 0 "nexthop device mismatch" log_subsection "VRF device, PBR table" @@ -438,13 +435,6 @@ invalid_onlink_ipv6() run_ip6 254 ${TEST_NET6[1]}::101 ${V6ADDRS[p1]} "" 2 \ "No nexthop device given" - - # default VRF validation is done against LOCAL table - # run_ip6 254 ${TEST_NET6[1]}::102 ${V6ADDRS[p3]/::[0-9]/::64} ${NETIFS[p1]} 2 \ - # "Gateway resolves to wrong nexthop device" - - run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::103 ${V6ADDRS[p7]/::[0-9]/::64} ${NETIFS[p5]} 2 \ - "Gateway resolves to wrong nexthop device - VRF" } run_onlink_tests() @@ -491,10 +481,9 @@ do esac done -cleanup setup run_onlink_tests -cleanup +cleanup_ns ${ns1} ${ns2} if [ "$TESTS" != "none" ]; then printf "\nTests passed: %3d\n" ${nsuccess} diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 77c83d9508d3..6eb7f95e70e1 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -76,11 +76,13 @@ log_test() printf "TEST: %-60s [ OK ]\n" "${msg}" nsuccess=$((nsuccess+1)) else - ret=1 - nfail=$((nfail+1)) if [[ $rc -eq $ksft_skip ]]; then + [[ $ret -eq 0 ]] && ret=$ksft_skip + nskip=$((nskip+1)) printf "TEST: %-60s [SKIP]\n" "${msg}" else + ret=1 + nfail=$((nfail+1)) printf "TEST: %-60s [FAIL]\n" "${msg}" fi @@ -465,8 +467,8 @@ ipv6_fdb_grp_fcnal() log_test $? 0 "Get Fdb nexthop group by id" # fdb nexthop group can only contain fdb nexthops - run_cmd "$IP nexthop add id 63 via 2001:db8:91::4" - run_cmd "$IP nexthop add id 64 via 2001:db8:91::5" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::4 dev veth1" + run_cmd "$IP nexthop add id 64 via 2001:db8:91::5 dev veth1" run_cmd "$IP nexthop add id 103 group 63/64 fdb" log_test $? 2 "Fdb Nexthop group with non-fdb nexthops" @@ -492,6 +494,26 @@ ipv6_fdb_grp_fcnal() run_cmd "$IP nexthop add id 69 encap mpls 101 via 2001:db8:91::8 dev veth1 fdb" log_test $? 2 "Fdb Nexthop with encap" + # Replace FDB nexthop to non-FDB and vice versa + run_cmd "$IP nexthop add id 70 via 2001:db8:91::2 fdb" + run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 dev veth1" + log_test $? 0 "Replace FDB nexthop to non-FDB nexthop" + run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 fdb" + log_test $? 0 "Replace non-FDB nexthop to FDB nexthop" + + # Replace FDB nexthop address while in a group + run_cmd "$IP nexthop add id 71 group 70 fdb" + run_cmd "$IP nexthop replace id 70 via 2001:db8:91::3 fdb" + log_test $? 0 "Replace FDB nexthop address while in a group" + + # Cannot replace FDB nexthop to non-FDB and vice versa while in a group + run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 dev veth1" + log_test $? 2 "Replace FDB nexthop to non-FDB nexthop while in a group" + run_cmd "$IP nexthop add id 72 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 73 group 72" + run_cmd "$IP nexthop replace id 72 via 2001:db8:91::2 fdb" + log_test $? 2 "Replace non-FDB nexthop to FDB nexthop while in a group" + run_cmd "$IP link add name vx10 type vxlan id 1010 local 2001:db8:91::9 remote 2001:db8:91::10 dstport 4789 nolearning noudpcsum tos inherit ttl 100" run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self" log_test $? 0 "Fdb mac add with nexthop group" @@ -545,15 +567,15 @@ ipv4_fdb_grp_fcnal() log_test $? 0 "Get Fdb nexthop group by id" # fdb nexthop group can only contain fdb nexthops - run_cmd "$IP nexthop add id 14 via 172.16.1.2" - run_cmd "$IP nexthop add id 15 via 172.16.1.3" + run_cmd "$IP nexthop add id 14 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 15 via 172.16.1.3 dev veth1" run_cmd "$IP nexthop add id 103 group 14/15 fdb" log_test $? 2 "Fdb Nexthop group with non-fdb nexthops" # Non fdb nexthop group can not contain fdb nexthops run_cmd "$IP nexthop add id 16 via 172.16.1.2 fdb" run_cmd "$IP nexthop add id 17 via 172.16.1.3 fdb" - run_cmd "$IP nexthop add id 104 group 14/15" + run_cmd "$IP nexthop add id 104 group 16/17" log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops" # fdb nexthop cannot have blackhole @@ -572,6 +594,26 @@ ipv4_fdb_grp_fcnal() run_cmd "$IP nexthop add id 17 encap mpls 101 via 172.16.1.2 dev veth1 fdb" log_test $? 2 "Fdb Nexthop with encap" + # Replace FDB nexthop to non-FDB and vice versa + run_cmd "$IP nexthop add id 18 via 172.16.1.2 fdb" + run_cmd "$IP nexthop replace id 18 via 172.16.1.2 dev veth1" + log_test $? 0 "Replace FDB nexthop to non-FDB nexthop" + run_cmd "$IP nexthop replace id 18 via 172.16.1.2 fdb" + log_test $? 0 "Replace non-FDB nexthop to FDB nexthop" + + # Replace FDB nexthop address while in a group + run_cmd "$IP nexthop add id 19 group 18 fdb" + run_cmd "$IP nexthop replace id 18 via 172.16.1.3 fdb" + log_test $? 0 "Replace FDB nexthop address while in a group" + + # Cannot replace FDB nexthop to non-FDB and vice versa while in a group + run_cmd "$IP nexthop replace id 18 via 172.16.1.2 dev veth1" + log_test $? 2 "Replace FDB nexthop to non-FDB nexthop while in a group" + run_cmd "$IP nexthop add id 20 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 21 group 20" + run_cmd "$IP nexthop replace id 20 via 172.16.1.2 fdb" + log_test $? 2 "Replace non-FDB nexthop to FDB nexthop while in a group" + run_cmd "$IP link add name vx10 type vxlan id 1010 local 10.0.0.1 remote 10.0.0.2 dstport 4789 nolearning noudpcsum tos inherit ttl 100" run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self" log_test $? 0 "Fdb mac add with nexthop group" @@ -580,7 +622,7 @@ ipv4_fdb_grp_fcnal() run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 12 self" log_test $? 255 "Fdb mac add with nexthop" - run_cmd "$IP ro add 172.16.0.0/22 nhid 15" + run_cmd "$IP ro add 172.16.0.0/22 nhid 16" log_test $? 2 "Route add with fdb nexthop" run_cmd "$IP ro add 172.16.0.0/22 nhid 103" @@ -741,7 +783,7 @@ ipv6_fcnal() run_cmd "$IP nexthop add id 52 via 2001:db8:92::3" log_test $? 2 "Create nexthop - gw only" - # gw is not reachable throught given dev + # gw is not reachable through given dev run_cmd "$IP nexthop add id 53 via 2001:db8:3::3 dev veth1" log_test $? 2 "Create nexthop - invalid gw+dev combination" @@ -758,6 +800,14 @@ ipv6_fcnal() set +e check_nexthop "dev veth1" "" log_test $? 0 "Nexthops removed on admin down" + + # error routes should be deleted when their nexthop is deleted + run_cmd "$IP li set dev veth1 up" + run_cmd "$IP -6 nexthop add id 58 dev veth1" + run_cmd "$IP ro add blackhole 2001:db8:101::1/128 nhid 58" + run_cmd "$IP nexthop del id 58" + check_route6 "2001:db8:101::1" "" + log_test $? 0 "Error route removed on nexthop deletion" } ipv6_grp_refs() @@ -1417,6 +1467,13 @@ ipv4_fcnal() run_cmd "$IP ro del 172.16.102.0/24" log_test $? 0 "Delete route when not specifying nexthop attributes" + + # error routes should be deleted when their nexthop is deleted + run_cmd "$IP nexthop add id 23 dev veth1" + run_cmd "$IP ro add blackhole 172.16.102.100/32 nhid 23" + run_cmd "$IP nexthop del id 23" + check_route "172.16.102.100" "" + log_test $? 0 "Error route removed on nexthop deletion" } ipv4_grp_fcnal() @@ -1615,6 +1672,17 @@ ipv4_withv6_fcnal() run_cmd "$IP ro replace 172.16.101.1/32 via inet6 2001:db8:50::1 dev veth1" log_test $? 2 "IPv4 route with invalid IPv6 gateway" + + # Test IPv4 route with loopback IPv6 nexthop + # Regression test: loopback IPv6 nexthop was misclassified as reject + # route, skipping nhc_pcpu_rth_output allocation, causing panic when + # an IPv4 route references it and triggers __mkroute_output(). + run_cmd "$IP -6 nexthop add id 20 dev lo" + run_cmd "$IP ro add 172.20.20.0/24 nhid 20" + run_cmd "ip netns exec $me ping -c1 -W1 172.20.20.1" + log_test $? 1 "IPv4 route with loopback IPv6 nexthop (no crash)" + run_cmd "$IP ro del 172.20.20.0/24" + run_cmd "$IP nexthop del id 20" } ipv4_fcnal_runtime() @@ -2528,6 +2596,7 @@ done if [ "$TESTS" != "none" ]; then printf "\nTests passed: %3d\n" ${nsuccess} printf "Tests failed: %3d\n" ${nfail} + printf "Tests skipped: %2d\n" ${nskip} fi exit $ret diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 847936363a12..5fbdd2a0b537 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -256,6 +256,24 @@ fib_rule6_test() fib_rule6_test_match_n_redirect "$match" "$match" \ "$getnomatch" "sport and dport redirect to table" \ "sport and dport no redirect to table" + + match="sport 100-200 dport 300-400" + getmatch="sport 100 dport 400" + getnomatch="sport 100 dport 401" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" \ + "sport and dport range redirect to table" \ + "sport and dport range no redirect to table" + fi + + ip rule help 2>&1 | grep sport | grep -q MASK + if [ $? -eq 0 ]; then + match="sport 0x0f00/0xff00 dport 0x000f/0x00ff" + getmatch="sport 0x0f11 dport 0x220f" + getnomatch="sport 0x1f11 dport 0x221f" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "sport and dport masked redirect to table" \ + "sport and dport masked no redirect to table" fi fib_check_iproute_support "ipproto" "ipproto" @@ -292,6 +310,25 @@ fib_rule6_test() "iif dscp no redirect to table" fi + ip rule help 2>&1 | grep -q "DSCP\[/MASK\]" + if [ $? -eq 0 ]; then + match="dscp 0x0f/0x0f" + tosmatch=$(printf 0x"%x" $((0x1f << 2))) + tosnomatch=$(printf 0x"%x" $((0x1e << 2))) + getmatch="tos $tosmatch" + getnomatch="tos $tosnomatch" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "dscp masked redirect to table" \ + "dscp masked no redirect to table" + + match="dscp 0x0f/0x0f" + getmatch="from $SRC_IP6 iif $DEV tos $tosmatch" + getnomatch="from $SRC_IP6 iif $DEV tos $tosnomatch" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif dscp masked redirect to table" \ + "iif dscp masked no redirect to table" + fi + fib_check_iproute_support "flowlabel" "flowlabel" if [ $? -eq 0 ]; then match="flowlabel 0xfffff" @@ -322,6 +359,23 @@ fib_rule6_test() "$getnomatch" "iif flowlabel masked redirect to table" \ "iif flowlabel masked no redirect to table" fi + + $IP link show dev $DEV | grep -q vrf0 + if [ $? -eq 0 ]; then + match="oif vrf0" + getmatch="oif $DEV" + getnomatch="oif lo" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF oif redirect to table" \ + "VRF oif no redirect to table" + + match="from $SRC_IP6 iif vrf0" + getmatch="from $SRC_IP6 iif $DEV" + getnomatch="from $SRC_IP6 iif lo" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF iif redirect to table" \ + "VRF iif no redirect to table" + fi } fib_rule6_vrf_test() @@ -462,10 +516,7 @@ fib_rule4_test() fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \ "oif redirect to table" "oif no redirect to table" - # Enable forwarding and disable rp_filter as all the addresses are in - # the same subnet and egress device == ingress device. ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1 - ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0 match="from $SRC_IP iif $DEV" getnomatch="from $SRC_IP iif lo" fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \ @@ -525,6 +576,24 @@ fib_rule4_test() fib_rule4_test_match_n_redirect "$match" "$match" \ "$getnomatch" "sport and dport redirect to table" \ "sport and dport no redirect to table" + + match="sport 100-200 dport 300-400" + getmatch="sport 100 dport 400" + getnomatch="sport 100 dport 401" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" \ + "sport and dport range redirect to table" \ + "sport and dport range no redirect to table" + fi + + ip rule help 2>&1 | grep sport | grep -q MASK + if [ $? -eq 0 ]; then + match="sport 0x0f00/0xff00 dport 0x000f/0x00ff" + getmatch="sport 0x0f11 dport 0x220f" + getnomatch="sport 0x1f11 dport 0x221f" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "sport and dport masked redirect to table" \ + "sport and dport masked no redirect to table" fi fib_check_iproute_support "ipproto" "ipproto" @@ -561,6 +630,42 @@ fib_rule4_test() "$getnomatch" "iif dscp redirect to table" \ "iif dscp no redirect to table" fi + + ip rule help 2>&1 | grep -q "DSCP\[/MASK\]" + if [ $? -eq 0 ]; then + match="dscp 0x0f/0x0f" + tosmatch=$(printf 0x"%x" $((0x1f << 2))) + tosnomatch=$(printf 0x"%x" $((0x1e << 2))) + getmatch="tos $tosmatch" + getnomatch="tos $tosnomatch" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "dscp masked redirect to table" \ + "dscp masked no redirect to table" + + match="dscp 0x0f/0x0f" + getmatch="from $SRC_IP iif $DEV tos $tosmatch" + getnomatch="from $SRC_IP iif $DEV tos $tosnomatch" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif dscp masked redirect to table" \ + "iif dscp masked no redirect to table" + fi + + $IP link show dev $DEV | grep -q vrf0 + if [ $? -eq 0 ]; then + match="oif vrf0" + getmatch="oif $DEV" + getnomatch="oif lo" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF oif redirect to table" \ + "VRF oif no redirect to table" + + match="from $SRC_IP iif vrf0" + getmatch="from $SRC_IP iif $DEV" + getnomatch="from $SRC_IP iif lo" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF iif redirect to table" \ + "VRF iif no redirect to table" + fi } fib_rule4_vrf_test() diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 3ea6f886a210..af64f93bb2e1 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -11,7 +11,8 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \ ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \ ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \ ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \ - ipv4_mpath_list ipv6_mpath_list" + ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance \ + ipv4_mpath_balance_preferred fib6_ra_to_static" VERBOSE=0 PAUSE_ON_FAIL=no @@ -544,7 +545,7 @@ fib4_nexthop() fib6_nexthop() { local lldummy=$(get_linklocal dummy0) - local llv1=$(get_linklocal dummy0) + local llv1=$(get_linklocal veth1) if [ -z "$lldummy" ]; then echo "Failed to get linklocal address for dummy0" @@ -867,6 +868,64 @@ fib6_gc_test() check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l) log_test $ret 0 "ipv6 route garbage collection (replace with permanent)" + # Delete dummy_10 and remove all routes + $IP link del dev dummy_10 + + # rd6 is required for the next test. (ipv6toolkit) + if [ ! -x "$(command -v rd6)" ]; then + echo "SKIP: rd6 not found." + set +e + cleanup &> /dev/null + return + fi + + setup_ns ns2 + $IP link add veth1 type veth peer veth2 netns $ns2 + $IP link set veth1 up + ip -netns $ns2 link set veth2 up + $IP addr add fe80:dead::1/64 dev veth1 + ip -netns $ns2 addr add fe80:dead::2/64 dev veth2 + + # Add NTF_ROUTER neighbour to prevent rt6_age_examine_exception() + # from removing not-yet-expired exceptions. + ip -netns $ns2 link set veth2 address 00:11:22:33:44:55 + $IP neigh add fe80:dead::3 lladdr 00:11:22:33:44:55 dev veth1 router + + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_redirects=1 + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.forwarding=0 + + # Temporary routes + for i in $(seq 1 5); do + # Expire route after $EXPIRE seconds + $IP -6 route add 2001:10::$i \ + via fe80:dead::2 dev veth1 expires $EXPIRE + + ip netns exec $ns2 rd6 -i veth2 \ + -s fe80:dead::2 -d fe80:dead::1 \ + -r 2001:10::$i -t fe80:dead::3 -p ICMP6 + done + + check_rt_num 5 $($IP -6 route list | grep expires | grep 2001:10:: | wc -l) + + # Promote to permanent routes by "prepend" (w/o NLM_F_EXCL and NLM_F_REPLACE) + for i in $(seq 1 5); do + # -EEXIST, but the temporary route becomes the permanent route. + $IP -6 route append 2001:10::$i \ + via fe80:dead::2 dev veth1 2>/dev/null || true + done + + check_rt_num 5 $($IP -6 route list | grep -v expires | grep 2001:10:: | wc -l) + check_rt_num 5 $($IP -6 route list cache | grep 2001:10:: | wc -l) + + # Trigger GC instead of waiting $GC_WAIT_TIME. + # rt6_nh_dump_exceptions() just skips expired exceptions. + $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + check_rt_num 0 $($IP -6 route list cache | grep 2001:10:: | wc -l) + log_test $ret 0 "ipv6 route garbage collection (promote to permanent routes)" + + $IP neigh del fe80:dead::3 lladdr 00:11:22:33:44:55 dev veth1 router + $IP link del veth1 + # ra6 is required for the next test. (ipv6toolkit) if [ ! -x "$(command -v ra6)" ]; then echo "SKIP: ra6 not found." @@ -875,9 +934,6 @@ fib6_gc_test() return fi - # Delete dummy_10 and remove all routes - $IP link del dev dummy_10 - # Create a pair of veth devices to send a RA message from one # device to another. $IP link add veth1 type veth peer name veth2 @@ -1085,6 +1141,35 @@ route_setup() set +e } +forwarding_cleanup() +{ + cleanup_ns $ns3 + + route_cleanup +} + +# extend route_setup with an ns3 reachable through ns2 over both devices +forwarding_setup() +{ + forwarding_cleanup + + route_setup + + setup_ns ns3 + + ip link add veth5 netns $ns3 type veth peer name veth6 netns $ns2 + ip -netns $ns3 link set veth5 up + ip -netns $ns2 link set veth6 up + + ip -netns $ns3 -4 addr add dev veth5 172.16.105.1/24 + ip -netns $ns2 -4 addr add dev veth6 172.16.105.2/24 + ip -netns $ns3 -4 route add 172.16.100.0/22 via 172.16.105.2 + + ip -netns $ns3 -6 addr add dev veth5 2001:db8:105::1/64 nodad + ip -netns $ns2 -6 addr add dev veth6 2001:db8:105::2/64 nodad + ip -netns $ns3 -6 route add 2001:db8:101::/33 via 2001:db8:105::2 +} + # assumption is that basic add of a single path route works # otherwise just adding an address on an interface is broken ipv6_rt_add() @@ -1447,6 +1532,85 @@ ipv6_route_metrics_test() route_cleanup } +fib6_ra_to_static() +{ + setup + + echo + echo "Fib6 route promotion from RA-learned to static test" + set -e + + # ra6 is required for the test. (ipv6toolkit) + if [ ! -x "$(command -v ra6)" ]; then + echo "SKIP: ra6 not found." + set +e + cleanup &> /dev/null + return + fi + + # Create a pair of veth devices to send a RA message from one + # device to another. + $IP link add veth1 type veth peer name veth2 + $IP link set dev veth1 up + $IP link set dev veth2 up + $IP -6 address add 2001:10::1/64 dev veth1 nodad + $IP -6 address add 2001:10::2/64 dev veth2 nodad + + # Make veth1 ready to receive RA messages. + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra=2 + + # Send a RA message with a prefix from veth2. + $NS_EXEC ra6 -i veth2 -d 2001:10::1 -P 2001:12::/64\#LA\#120\#60 + + # Wait for the RA message. + sleep 1 + + # systemd may mess up the test. Make sure that + # systemd-networkd.service and systemd-networkd.socket are stopped. + check_rt_num_clean 2 $($IP -6 route list|grep expires|wc -l) || return + + # Configure static address on the same prefix + $IP -6 address add 2001:12::dead/64 dev veth1 nodad + + # On-link route won't expire anymore, default route still owned by RA + check_rt_num 1 $($IP -6 route list |grep expires|wc -l) + + # Send a second RA message with a prefix from veth2. + $NS_EXEC ra6 -i veth2 -d 2001:10::1 -P 2001:12::/64\#LA\#120\#60 + sleep 1 + + # Expire is not back, on-link route is still static + check_rt_num 1 $($IP -6 route list |grep expires|wc -l) + + $IP -6 address del 2001:12::dead/64 dev veth1 nodad + + # Expire is back, on-link route is now owned by RA again + check_rt_num 2 $($IP -6 route list |grep expires|wc -l) + + log_test $ret 0 "ipv6 promote RA route to static" + + # Prepare for RA route with gateway + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra_rt_info_max_plen=64 + + # Add initial route to cause ECMP merging + $IP -6 route add 2001:12::/64 via fe80::dead:beef dev veth1 + + $NS_EXEC ra6 -i veth2 -d 2001:10::1 -R 2001:12::/64#1#120 + + # Routes are not merged as RA routes are not elegible for ECMP + check_rt_num 2 "$($IP -6 route list | grep -c "2001:12::/64 via")" + + $IP -6 route append 2001:12::/64 via fe80::dead:feeb dev veth1 + + check_rt_num 2 "$($IP -6 route list | grep -c "nexthop via")" + + log_test "$ret" 0 "ipv6 RA route with nexthop do not merge into ECMP with static" + + set +e + + cleanup &> /dev/null +} + # add route for a prefix, flushing any existing routes first # expected to be the first step of a test add_route() @@ -2531,9 +2695,6 @@ ipv4_mpath_list_test() run_cmd "ip -n $ns2 route add 203.0.113.0/24 nexthop via 172.16.201.2 nexthop via 172.16.202.2" run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" - run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0" - run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0" - run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0" set +e local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]') @@ -2600,6 +2761,160 @@ ipv6_mpath_list_test() route_cleanup } +tc_set_flower_counter__saddr_syn() { + tc_set_flower_counter $1 $2 $3 "src_ip $4 ip_proto tcp tcp_flags 0x2" +} + +ip_mpath_balance_dep_check() +{ + if [ ! -x "$(command -v socat)" ]; then + echo "socat command not found. Skipping test" + return 1 + fi + + if [ ! -x "$(command -v jq)" ]; then + echo "jq command not found. Skipping test" + return 1 + fi +} + +ip_mpath_balance() { + local -r ipver=$1 + local -r daddr=$2 + local -r num_conn=20 + + for i in $(seq 1 $num_conn); do + ip netns exec $ns3 socat $ipver TCP-LISTEN:8000 STDIO >/dev/null & + sleep 0.02 + echo -n a | ip netns exec $ns1 socat $ipver STDIO TCP:$daddr:8000 + done + + local -r syn0="$(tc_get_flower_counter $ns1 veth1)" + local -r syn1="$(tc_get_flower_counter $ns1 veth3)" + local -r syns=$((syn0+syn1)) + + [ "$VERBOSE" = "1" ] && echo "multipath: syns seen: ($syn0,$syn1)" + + [[ $syns -ge $num_conn ]] && [[ $syn0 -gt 0 ]] && [[ $syn1 -gt 0 ]] +} + +ipv4_mpath_balance_test() +{ + echo + echo "IPv4 multipath load balance test" + + ip_mpath_balance_dep_check || return 1 + forwarding_setup + + $IP route add 172.16.105.1 \ + nexthop via 172.16.101.2 \ + nexthop via 172.16.103.2 + + ip netns exec $ns1 \ + sysctl -q -w net.ipv4.fib_multipath_hash_policy=1 + + tc_set_flower_counter__saddr_syn $ns1 4 veth1 172.16.101.1 + tc_set_flower_counter__saddr_syn $ns1 4 veth3 172.16.103.1 + + ip_mpath_balance -4 172.16.105.1 + + log_test $? 0 "IPv4 multipath loadbalance" + + forwarding_cleanup +} + +get_route_dev_src() +{ + local pfx="$1" + local src="$2" + local out + + if out=$($IP -j route get "$pfx" from "$src" | jq -re ".[0].dev"); then + echo "$out" + fi +} + +ipv4_mpath_preferred() +{ + local src_ip=$1 + local pref_dev=$2 + local dev routes + local route0=0 + local route1=0 + local pref_route=0 + num_routes=254 + + for i in $(seq 1 $num_routes) ; do + dev=$(get_route_dev_src 172.16.105.$i $src_ip) + if [ "$dev" = "$pref_dev" ]; then + pref_route=$((pref_route+1)) + elif [ "$dev" = "veth1" ]; then + route0=$((route0+1)) + elif [ "$dev" = "veth3" ]; then + route1=$((route1+1)) + fi + done + + routes=$((route0+route1)) + + [ "$VERBOSE" = "1" ] && echo "multipath: routes seen: ($route0,$route1,$pref_route)" + + if [ x"$pref_dev" = x"" ]; then + [[ $routes -ge $num_routes ]] && [[ $route0 -gt 0 ]] && [[ $route1 -gt 0 ]] + else + [[ $pref_route -ge $num_routes ]] + fi + +} + +ipv4_mpath_balance_preferred_test() +{ + echo + echo "IPv4 multipath load balance preferred route" + + forwarding_setup + + $IP route add 172.16.105.0/24 \ + nexthop via 172.16.101.2 \ + nexthop via 172.16.103.2 + + ipv4_mpath_preferred 172.16.101.1 veth1 + log_test $? 0 "IPv4 multipath loadbalance from veth1" + + ipv4_mpath_preferred 172.16.103.1 veth3 + log_test $? 0 "IPv4 multipath loadbalance from veth3" + + ipv4_mpath_preferred 198.51.100.1 + log_test $? 0 "IPv4 multipath loadbalance from dummy" + + forwarding_cleanup +} + +ipv6_mpath_balance_test() +{ + echo + echo "IPv6 multipath load balance test" + + ip_mpath_balance_dep_check || return 1 + forwarding_setup + + $IP route add 2001:db8:105::1\ + nexthop via 2001:db8:101::2 \ + nexthop via 2001:db8:103::2 + + ip netns exec $ns1 \ + sysctl -q -w net.ipv6.fib_multipath_hash_policy=1 + + tc_set_flower_counter__saddr_syn $ns1 6 veth1 2001:db8:101::1 + tc_set_flower_counter__saddr_syn $ns1 6 veth3 2001:db8:103::1 + + ip_mpath_balance -6 "[2001:db8:105::1]" + + log_test $? 0 "IPv6 multipath loadbalance" + + forwarding_cleanup +} + ################################################################################ # usage @@ -2683,6 +2998,10 @@ do fib6_gc_test|ipv6_gc) fib6_gc_test;; ipv4_mpath_list) ipv4_mpath_list_test;; ipv6_mpath_list) ipv6_mpath_list_test;; + ipv4_mpath_balance) ipv4_mpath_balance_test;; + ipv6_mpath_balance) ipv6_mpath_balance_test;; + ipv4_mpath_balance_preferred) ipv4_mpath_balance_preferred_test;; + fib6_ra_to_static) fib6_ra_to_static;; help) echo "Test names: $TESTS"; exit 0;; esac diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore index 2dea317f12e7..418ff96c52ef 100644 --- a/tools/testing/selftests/net/forwarding/.gitignore +++ b/tools/testing/selftests/net/forwarding/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only forwarding.config +ipmr diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 00bde7b6f39e..bbaf4d937dd8 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -1,6 +1,9 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT -TEST_PROGS = bridge_fdb_learning_limit.sh \ +TEST_PROGS := \ + bridge_activity_notify.sh \ + bridge_fdb_learning_limit.sh \ + bridge_fdb_local_vlan_0.sh \ bridge_igmp.sh \ bridge_locked_port.sh \ bridge_mdb.sh \ @@ -18,64 +21,64 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ gre_custom_multipath_hash.sh \ gre_inner_v4_multipath.sh \ gre_inner_v6_multipath.sh \ - gre_multipath_nh_res.sh \ - gre_multipath_nh.sh \ gre_multipath.sh \ + gre_multipath_nh.sh \ + gre_multipath_nh_res.sh \ ip6_forward_instats_vrf.sh \ ip6gre_custom_multipath_hash.sh \ + ip6gre_flat.sh \ ip6gre_flat_key.sh \ ip6gre_flat_keys.sh \ - ip6gre_flat.sh \ + ip6gre_hier.sh \ ip6gre_hier_key.sh \ ip6gre_hier_keys.sh \ - ip6gre_hier.sh \ ip6gre_inner_v4_multipath.sh \ ip6gre_inner_v6_multipath.sh \ + ipip_flat_gre.sh \ ipip_flat_gre_key.sh \ ipip_flat_gre_keys.sh \ - ipip_flat_gre.sh \ + ipip_hier_gre.sh \ ipip_hier_gre_key.sh \ ipip_hier_gre_keys.sh \ - ipip_hier_gre.sh \ lib_sh_test.sh \ local_termination.sh \ min_max_mtu.sh \ + mirror_gre.sh \ mirror_gre_bound.sh \ mirror_gre_bridge_1d.sh \ mirror_gre_bridge_1d_vlan.sh \ - mirror_gre_bridge_1q_lag.sh \ mirror_gre_bridge_1q.sh \ + mirror_gre_bridge_1q_lag.sh \ mirror_gre_changes.sh \ mirror_gre_flower.sh \ mirror_gre_lag_lacp.sh \ mirror_gre_neigh.sh \ mirror_gre_nh.sh \ - mirror_gre.sh \ - mirror_gre_vlan_bridge_1q.sh \ mirror_gre_vlan.sh \ + mirror_gre_vlan_bridge_1q.sh \ mirror_vlan.sh \ no_forwarding.sh \ pedit_dsfield.sh \ pedit_ip.sh \ pedit_l4port.sh \ - q_in_vni_ipv6.sh \ q_in_vni.sh \ + q_in_vni_ipv6.sh \ + router.sh \ router_bridge.sh \ router_bridge_1d.sh \ router_bridge_1d_lag.sh \ router_bridge_lag.sh \ + router_bridge_pvid_vlan_upper.sh \ router_bridge_vlan.sh \ router_bridge_vlan_upper.sh \ - router_bridge_pvid_vlan_upper.sh \ router_bridge_vlan_upper_pvid.sh \ router_broadcast.sh \ - router_mpath_nh_res.sh \ router_mpath_nh.sh \ + router_mpath_nh_res.sh \ router_mpath_seed.sh \ router_multicast.sh \ router_multipath.sh \ router_nh.sh \ - router.sh \ router_vid_1.sh \ sch_ets.sh \ sch_red.sh \ @@ -85,31 +88,34 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ skbedit_priority.sh \ tc_actions.sh \ tc_chains.sh \ - tc_flower_router.sh \ tc_flower.sh \ - tc_flower_l2_miss.sh \ tc_flower_cfm.sh \ + tc_flower_l2_miss.sh \ tc_flower_port_range.sh \ + tc_flower_router.sh \ tc_mpls_l2vpn.sh \ tc_police.sh \ tc_shblocks.sh \ tc_tunnel_key.sh \ tc_vlan_modify.sh \ - vxlan_asymmetric_ipv6.sh \ vxlan_asymmetric.sh \ + vxlan_asymmetric_ipv6.sh \ + vxlan_bridge_1d.sh \ vxlan_bridge_1d_ipv6.sh \ - vxlan_bridge_1d_port_8472_ipv6.sh \ vxlan_bridge_1d_port_8472.sh \ - vxlan_bridge_1d.sh \ + vxlan_bridge_1d_port_8472_ipv6.sh \ + vxlan_bridge_1q.sh \ vxlan_bridge_1q_ipv6.sh \ - vxlan_bridge_1q_port_8472_ipv6.sh \ + vxlan_bridge_1q_mc_ul.sh \ vxlan_bridge_1q_port_8472.sh \ - vxlan_bridge_1q.sh \ + vxlan_bridge_1q_port_8472_ipv6.sh \ vxlan_reserved.sh \ + vxlan_symmetric.sh \ vxlan_symmetric_ipv6.sh \ - vxlan_symmetric.sh +# end of TEST_PROGS -TEST_FILES := devlink_lib.sh \ +TEST_FILES := \ + devlink_lib.sh \ fib_offload_lib.sh \ forwarding.config.sample \ ip6gre_lib.sh \ @@ -124,10 +130,16 @@ TEST_FILES := devlink_lib.sh \ sch_ets_tests.sh \ sch_tbf_core.sh \ sch_tbf_etsprio.sh \ - tc_common.sh + tc_common.sh \ +# end of TEST_FILES + +TEST_GEN_PROGS := \ + ipmr +# end of TEST_GEN_PROGS TEST_INCLUDES := \ + $(wildcard ../lib/sh/*.sh) \ ../lib.sh \ - $(wildcard ../lib/sh/*.sh) +# end of TEST_INCLUDES include ../../lib.mk diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README index a652429bfd53..392a5a91ed37 100644 --- a/tools/testing/selftests/net/forwarding/README +++ b/tools/testing/selftests/net/forwarding/README @@ -6,7 +6,7 @@ to easily create and test complex environments. Unfortunately, these namespaces can not be used with actual switching ASICs, as their ports can not be migrated to other network namespaces -(dev->netns_local) and most of them probably do not support the +(dev->netns_immutable) and most of them probably do not support the L1-separation provided by namespaces. However, a similar kind of flexibility can be achieved by using VRFs and @@ -57,6 +57,21 @@ o Code shall be checked using ShellCheck [1] prior to submission. 1. https://www.shellcheck.net/ +Cleanups +-------- + +o lib.sh brings in defer.sh (by way of ../lib.sh) by default. Consider + making use of the defer primitive to schedule automatic cleanups. This + makes it harder to forget to remove a temporary netdevice, kill a running + process or perform other cleanup when the test script is interrupted. + +o When adding a helper that dirties the environment, but schedules all + necessary cleanups through defer, consider prefixing it adf_ for + consistency with lib.sh and ../lib.sh helpers. This serves as an + immediately visible bit of documentation about the helper API. + +o Definitely do the above for any new code in lib.sh, if practical. + Customization ============= diff --git a/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh new file mode 100755 index 000000000000..522a5b1b046c --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh @@ -0,0 +1,170 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | 192.0.2.1/28 | | 192.0.2.2/28 | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1d) + $swp2 | | +# | | | | +# | +-----------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + new_inactive_test + existing_active_test + norefresh_test +" + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + adf_simple_if_init "$h1" 192.0.2.1/28 +} + +h2_create() +{ + adf_simple_if_init "$h2" 192.0.2.2/28 +} + +switch_create() +{ + adf_ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 \ + ageing_time "$LOW_AGEING_TIME" + adf_ip_link_set_up br1 + + adf_ip_link_set_master "$swp1" br1 + adf_ip_link_set_up "$swp1" + + adf_ip_link_set_master "$swp2" br1 + adf_ip_link_set_up "$swp2" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + adf_vrf_prepare + + h1_create + h2_create + switch_create +} + +fdb_active_wait() +{ + local mac=$1; shift + + bridge -d fdb get "$mac" br br1 | grep -q -v "inactive" +} + +fdb_inactive_wait() +{ + local mac=$1; shift + + bridge -d fdb get "$mac" br br1 | grep -q "inactive" +} + +new_inactive_test() +{ + local mac="00:11:22:33:44:55" + + # Add a new FDB entry as static and inactive and check that it + # becomes active upon traffic. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master static activity_notify inactive + bridge -d fdb get "$mac" br br1 | grep -q "inactive" + check_err $? "FDB entry not present as \"inactive\" when should" + + $MZ "$h1" -c 1 -p 64 -a "$mac" -b bcast -t ip -q + + busywait "$BUSYWAIT_TIMEOUT" fdb_active_wait "$mac" + check_err $? "FDB entry present as \"inactive\" when should not" + + log_test "Transition from inactive to active" + + bridge fdb del "$mac" dev "$swp1" master +} + +existing_active_test() +{ + local mac="00:11:22:33:44:55" + local ageing_time + + # Enable activity notifications on an existing dynamic FDB entry and + # check that it becomes inactive after the ageing time passed. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master dynamic + bridge fdb replace "$mac" dev "$swp1" master static activity_notify norefresh + + bridge -d fdb get "$mac" br br1 | grep -q "activity_notify" + check_err $? "FDB entry not present as \"activity_notify\" when should" + + bridge -d fdb get "$mac" br br1 | grep -q "inactive" + check_fail $? "FDB entry present as \"inactive\" when should not" + + ageing_time=$(bridge_ageing_time_get br1) + slowwait $((ageing_time * 2)) fdb_inactive_wait "$mac" + check_err $? "FDB entry not present as \"inactive\" when should" + + log_test "Transition from active to inactive" + + bridge fdb del "$mac" dev "$swp1" master +} + +norefresh_test() +{ + local mac="00:11:22:33:44:55" + local updated_time + + # Check that the "updated" time is reset when replacing an FDB entry + # without the "norefresh" keyword and that it is not reset when + # replacing with the "norefresh" keyword. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master static + sleep 1 + + bridge fdb replace "$mac" dev "$swp1" master static activity_notify + updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]') + if [[ $updated_time -ne 0 ]]; then + check_err 1 "\"updated\" time was not reset when should" + fi + + sleep 1 + bridge fdb replace "$mac" dev "$swp1" master static norefresh + updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]') + if [[ $updated_time -eq 0 ]]; then + check_err 1 "\"updated\" time was reset when should not" + fi + + log_test "Resetting of \"updated\" time" + + bridge fdb del "$mac" dev "$swp1" master +} + +if ! bridge fdb help 2>&1 | grep -q "activity_notify"; then + echo "SKIP: iproute2 too old, missing bridge FDB activity notification control" + exit "$ksft_skip" +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh new file mode 100755 index 000000000000..694de8ba97e4 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh @@ -0,0 +1,387 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +-----------------------+ +-----------------------+ +# | H1 (vrf) | | H2 (vrf) | | H3 (vrf) | +# | + $h1 | | + $h2 | | + $h3 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | | | 192.0.2.18/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | | | 2001:db8:2::2/64 | +# | | | | | | | | | +# +----|------------------+ +----|------------------+ +----|------------------+ +# | | | +# +----|-------------------------|-------------------------|------------------+ +# | +--|-------------------------|------------------+ | | +# | | + $swp1 + $swp2 | + $swp3 | +# | | | 192.0.2.17/28 | +# | | BR1 (802.1q) | 2001:db8:2::1/64 | +# | | 192.0.2.3/28 | | +# | | 2001:db8:1::3/64 | | +# | +-----------------------------------------------+ SW | +# +---------------------------------------------------------------------------+ +# +#shellcheck disable=SC2317 # SC doesn't see our uses of functions. +#shellcheck disable=SC2034 # ... and global variables + +ALL_TESTS=" + test_d_no_sharing + test_d_sharing + test_q_no_sharing + test_q_sharing + test_addr_set +" + +NUM_NETIFS=6 +source lib.sh + +pMAC=00:11:22:33:44:55 +bMAC=00:11:22:33:44:66 +mMAC=00:11:22:33:44:77 +xMAC=00:11:22:33:44:88 + +host_create() +{ + local h=$1; shift + local ipv4=$1; shift + local ipv6=$1; shift + + adf_simple_if_init "$h" "$ipv4" "$ipv6" + adf_ip_route_add vrf "v$h" 192.0.2.16/28 nexthop via 192.0.2.3 + adf_ip_route_add vrf "v$h" 2001:db8:2::/64 nexthop via 2001:db8:1::3 +} + +h3_create() +{ + adf_simple_if_init "$h3" 192.0.2.18/28 2001:db8:2::2/64 + adf_ip_route_add vrf "v$h3" 192.0.2.0/28 nexthop via 192.0.2.17 + adf_ip_route_add vrf "v$h3" 2001:db8:1::/64 nexthop via 2001:db8:2::1 + + tc qdisc add dev "$h3" clsact + defer tc qdisc del dev "$h3" clsact + + tc filter add dev "$h3" ingress proto ip pref 104 \ + flower skip_hw ip_proto udp dst_port 4096 \ + action pass + defer tc filter del dev "$h3" ingress proto ip pref 104 + + tc qdisc add dev "$h2" clsact + defer tc qdisc del dev "$h2" clsact + + tc filter add dev "$h2" ingress proto ip pref 104 \ + flower skip_hw ip_proto udp dst_port 4096 \ + action pass + defer tc filter del dev "$h2" ingress proto ip pref 104 +} + +switch_create() +{ + adf_ip_link_set_up "$swp1" + + adf_ip_link_set_up "$swp2" + + adf_ip_addr_add "$swp3" 192.0.2.17/28 + adf_ip_addr_add "$swp3" 2001:db8:2::1/64 + adf_ip_link_set_up "$swp3" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + adf_vrf_prepare + adf_forwarding_enable + + host_create "$h1" 192.0.2.1/28 2001:db8:1::1/64 + host_create "$h2" 192.0.2.2/28 2001:db8:1::2/64 + h3_create + + switch_create +} + +adf_bridge_configure() +{ + local dev + + adf_ip_addr_add br 192.0.2.3/28 + adf_ip_addr_add br 2001:db8:1::3/64 + + adf_bridge_vlan_add dev br vid 1 pvid untagged self + adf_bridge_vlan_add dev br vid 2 self + adf_bridge_vlan_add dev br vid 3 self + + for dev in "$swp1" "$swp2"; do + adf_ip_link_set_master "$dev" br + adf_bridge_vlan_add dev "$dev" vid 1 pvid untagged + adf_bridge_vlan_add dev "$dev" vid 2 + adf_bridge_vlan_add dev "$dev" vid 3 + done +} + +adf_bridge_create() +{ + local mac + + adf_ip_link_add br up type bridge vlan_default_pvid 0 "$@" + mac=$(mac_get br) + adf_bridge_configure + adf_ip_link_set_addr br "$mac" +} + +check_fdb_local_vlan_0_support() +{ + if adf_ip_link_add XXbr up type bridge vlan_filtering 1 \ + fdb_local_vlan_0 1 &>/dev/null; then + return 0 + fi + + log_test_skip "FDB sharing" \ + "iproute 2 or the kernel do not support fdb_local_vlan_0" +} + +check_mac_presence() +{ + local should_fail=$1; shift + local dev=$1; shift + local vlan=$1; shift + local mac + + mac=$(mac_get "$dev") + + if ((vlan == 0)); then + vlan=null + fi + + bridge -j fdb show dev "$dev" | + jq -e --arg mac "$mac" --argjson vlan "$vlan" \ + '.[] | select(.mac == $mac) | select(.vlan == $vlan)' > /dev/null + check_err_fail "$should_fail" $? "FDB dev $dev vid $vlan addr $mac exists" +} + +do_sharing_test() +{ + local should_fail=$1; shift + local what=$1; shift + local dev + + RET=0 + + for dev in "$swp1" "$swp2" br; do + check_mac_presence 0 "$dev" 0 + check_mac_presence "$should_fail" "$dev" 1 + check_mac_presence "$should_fail" "$dev" 2 + check_mac_presence "$should_fail" "$dev" 3 + done + + log_test "$what" +} + +do_end_to_end_test() +{ + local mac=$1; shift + local what=$1; shift + local probe_dev=${1-$h3}; shift + local expect=${1-10}; shift + + local t0 + local t1 + local dd + + RET=0 + + # In mausezahn, use $dev MAC as the destination MAC. In the MAC sharing + # context, that will cause an FDB miss on VLAN 1 and prompt a second + # lookup in VLAN 0. + + t0=$(tc_rule_stats_get "$probe_dev" 104 ingress) + + $MZ "$h1" -c 10 -p 64 -a own -b "$mac" \ + -A 192.0.2.1 -B 192.0.2.18 -t udp "dp=4096,sp=2048" -q + sleep 1 + + t1=$(tc_rule_stats_get "$probe_dev" 104 ingress) + dd=$((t1 - t0)) + + ((dd == expect)) + check_err $? "Expected $expect packets on $probe_dev got $dd" + + log_test "$what" +} + +do_tests() +{ + local should_fail=$1; shift + local what=$1; shift + local swp1_mac + local br_mac + + swp1_mac=$(mac_get "$swp1") + br_mac=$(mac_get br) + + do_sharing_test "$should_fail" "$what" + do_end_to_end_test "$swp1_mac" "$what: end to end, $swp1 MAC" + do_end_to_end_test "$br_mac" "$what: end to end, br MAC" +} + +bridge_standard() +{ + local vlan_filtering=$1; shift + + if ((vlan_filtering)); then + echo 802.1q + else + echo 802.1d + fi +} + +nonexistent_fdb_test() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + # We expect flooding, so $h2 should get the traffic. + do_end_to_end_test "$xMAC" "$standard: Nonexistent FDB" "$h2" +} + +misleading_fdb_test() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + defer_scope_push + # Add an FDB entry on VLAN 0. The lookup on VLAN-aware bridge + # shouldn't pick this up even with fdb_local_vlan_0 enabled, so + # the traffic should be flooded. This all holds on + # vlan_filtering bridge, on non-vlan_filtering one the FDB entry + # is expected to be found as usual, no flooding takes place. + # + # Adding only on VLAN 0 is a bit tricky, because bridge is + # trying to be nice and interprets the request as if the FDB + # should be added on each VLAN. + + bridge fdb add "$mMAC" dev "$swp1" master + bridge fdb del "$mMAC" dev "$swp1" vlan 1 master + bridge fdb del "$mMAC" dev "$swp1" vlan 2 master + bridge fdb del "$mMAC" dev "$swp1" vlan 3 master + + local expect=$((vlan_filtering ? 10 : 0)) + do_end_to_end_test "$mMAC" \ + "$standard: Lookup of non-local MAC on VLAN 0" \ + "$h2" "$expect" + defer_scope_pop +} + +change_mac() +{ + local dev=$1; shift + local mac=$1; shift + local cur_mac + + cur_mac=$(mac_get "$dev") + + log_info "Change $dev MAC $cur_mac -> $mac" + adf_ip_link_set_addr "$dev" "$mac" + defer log_info "Change $dev MAC back" +} + +do_test_no_sharing() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + adf_bridge_create vlan_filtering "$vlan_filtering" + setup_wait + + do_tests 0 "$standard, no FDB sharing" + + change_mac "$swp1" "$pMAC" + change_mac br "$bMAC" + + do_tests 0 "$standard, no FDB sharing after MAC change" + + in_defer_scope check_fdb_local_vlan_0_support || return + + log_info "Set fdb_local_vlan_0=1" + ip link set dev br type bridge fdb_local_vlan_0 1 + + do_tests 1 "$standard, fdb sharing after toggle" +} + +do_test_sharing() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + in_defer_scope check_fdb_local_vlan_0_support || return + + adf_bridge_create vlan_filtering "$vlan_filtering" fdb_local_vlan_0 1 + setup_wait + + do_tests 1 "$standard, FDB sharing" + + nonexistent_fdb_test "$vlan_filtering" + misleading_fdb_test "$vlan_filtering" + + change_mac "$swp1" "$pMAC" + change_mac br "$bMAC" + + do_tests 1 "$standard, FDB sharing after MAC change" + + log_info "Set fdb_local_vlan_0=0" + ip link set dev br type bridge fdb_local_vlan_0 0 + + do_tests 0 "$standard, No FDB sharing after toggle" +} + +test_d_no_sharing() +{ + do_test_no_sharing 0 +} + +test_d_sharing() +{ + do_test_sharing 0 +} + +test_q_no_sharing() +{ + do_test_no_sharing 1 +} + +test_q_sharing() +{ + do_test_sharing 1 +} + +adf_addr_set_bridge_create() +{ + adf_ip_link_add br up type bridge vlan_filtering 0 + adf_ip_link_set_addr br "$(mac_get br)" + adf_bridge_configure +} + +test_addr_set() +{ + adf_addr_set_bridge_create + setup_wait + + do_end_to_end_test "$(mac_get br)" "NET_ADDR_SET: end to end, br MAC" +} + +trap cleanup EXIT + +setup_prepare +tests_run diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index e6a3e04fd83f..d4e7dd659354 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -1,10 +1,24 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ - v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \ - v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \ - v3exc_timeout_test v3star_ex_auto_add_test" +ALL_TESTS=" + v2reportleave_test + v3include_test + v3inc_allow_test + v3inc_is_include_test + v3inc_is_exclude_test + v3inc_to_exclude_test + v3exc_allow_test + v3exc_is_include_test + v3exc_is_exclude_test + v3exc_to_exclude_test + v3inc_block_test + v3exc_block_test + v3exc_timeout_test + v3star_ex_auto_add_test + v2per_vlan_snooping_port_stp_test + v2per_vlan_snooping_vlan_stp_test +" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -554,6 +568,64 @@ v3star_ex_auto_add_test() v3cleanup $swp2 $TEST_GROUP } +v2per_vlan_snooping_stp_test() +{ + local is_port=$1 + + local msg="port" + [[ $is_port -ne 1 ]] && msg="vlan" + + ip link set br0 up type bridge vlan_filtering 1 \ + mcast_igmp_version 2 \ + mcast_snooping 1 \ + mcast_vlan_snooping 1 \ + mcast_querier 1 \ + mcast_stats_enabled 1 + bridge vlan global set vid 1 dev br0 \ + mcast_snooping 1 \ + mcast_querier 1 \ + mcast_query_interval 100 \ + mcast_startup_query_count 0 + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4 + sleep 5 + local tx_s=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]') + + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3 + sleep 5 + local tx_e=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]') + + RET=0 + local tx=$(expr $tx_e - $tx_s) + test $tx -gt 0 + check_err $? "No IGMP queries after STP state becomes forwarding" + log_test "per vlan snooping with $msg stp state change" + + # restore settings + bridge vlan global set vid 1 dev br0 \ + mcast_querier 0 \ + mcast_query_interval 12500 \ + mcast_startup_query_count 2 + ip link set br0 up type bridge vlan_filtering 0 \ + mcast_vlan_snooping 0 \ + mcast_stats_enabled 0 +} + +v2per_vlan_snooping_port_stp_test() +{ + v2per_vlan_snooping_stp_test 1 +} + +v2per_vlan_snooping_vlan_stp_test() +{ + v2per_vlan_snooping_stp_test 0 +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index d9d587454d20..e86d77946585 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -28,6 +28,7 @@ ALL_TESTS=" cfg_test fwd_test ctrl_test + disable_test " NUM_NETIFS=4 @@ -64,7 +65,10 @@ h2_destroy() switch_create() { - ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + local vlan_filtering=$1; shift + + ip link add name br0 type bridge \ + vlan_filtering "$vlan_filtering" vlan_default_pvid 0 \ mcast_snooping 1 mcast_igmp_version 3 mcast_mld_version 2 bridge vlan add vid 10 dev br0 self bridge vlan add vid 20 dev br0 self @@ -118,7 +122,7 @@ setup_prepare() h1_create h2_create - switch_create + switch_create 1 } cleanup() @@ -149,7 +153,7 @@ cfg_test_host_common() check_err $? "Failed to add $name host entry" bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null - check_fail $? "Managed to replace $name host entry" + check_err $? "Failed to replace $name host entry" bridge mdb del dev br0 port br0 grp $grp $state vid 10 bridge mdb get dev br0 grp $grp vid 10 &> /dev/null @@ -1357,6 +1361,98 @@ ctrl_test() ctrl_mldv2_is_in_test } +check_group() +{ + local group=$1; shift + local vid=$1; shift + local should_fail=$1; shift + local when=$1; shift + local -a vidkws + + if ((vid)); then + vidkws=(vid "$vid") + fi + + bridge mdb get dev br0 grp "$group" "${vidkws[@]}" 2>/dev/null | + grep -q "port $swp1" + check_err_fail "$should_fail" $? "$group seen $when snooping disable:" +} + +__disable_test() +{ + local vid=$1; shift + local what=$1; shift + local -a vidkws + + if ((vid)); then + vidkws=(vid "$vid") + fi + + RET=0 + + bridge mdb add dev br0 port "$swp1" grp ff0e::1 permanent \ + "${vidkws[@]}" filter_mode include source_list 2001:db8:1::1 + bridge mdb add dev br0 port "$swp1" grp ff0e::2 permanent \ + "${vidkws[@]}" filter_mode exclude + + bridge mdb add dev br0 port "$swp1" grp ff0e::3 \ + "${vidkws[@]}" filter_mode include source_list 2001:db8:1::2 + bridge mdb add dev br0 port "$swp1" grp ff0e::4 \ + "${vidkws[@]}" filter_mode exclude + + bridge mdb add dev br0 port "$swp1" grp 239.1.1.1 permanent \ + "${vidkws[@]}" filter_mode include source_list 192.0.2.1 + bridge mdb add dev br0 port "$swp1" grp 239.1.1.2 permanent \ + "${vidkws[@]}" filter_mode exclude + + bridge mdb add dev br0 port "$swp1" grp 239.1.1.3 \ + "${vidkws[@]}" filter_mode include source_list 192.0.2.2 + bridge mdb add dev br0 port "$swp1" grp 239.1.1.4 \ + "${vidkws[@]}" filter_mode exclude + + check_group ff0e::1 "$vid" 0 "before" + check_group ff0e::2 "$vid" 0 "before" + check_group ff0e::3 "$vid" 0 "before" + check_group ff0e::4 "$vid" 0 "before" + + check_group 239.1.1.1 "$vid" 0 "before" + check_group 239.1.1.2 "$vid" 0 "before" + check_group 239.1.1.3 "$vid" 0 "before" + check_group 239.1.1.4 "$vid" 0 "before" + + ip link set dev br0 type bridge mcast_snooping 0 + + check_group ff0e::1 "$vid" 0 "after" + check_group ff0e::2 "$vid" 0 "after" + check_group ff0e::3 "$vid" 1 "after" + check_group ff0e::4 "$vid" 1 "after" + + check_group 239.1.1.1 "$vid" 0 "after" + check_group 239.1.1.2 "$vid" 0 "after" + check_group 239.1.1.3 "$vid" 1 "after" + check_group 239.1.1.4 "$vid" 1 "after" + + log_test "$what: Flush after disable" + + ip link set dev br0 type bridge mcast_snooping 1 + sleep 10 +} + +disable_test() +{ + __disable_test 10 802.1q + + switch_destroy + switch_create 0 + setup_wait + + __disable_test 0 802.1d + + switch_destroy + switch_create 1 + setup_wait +} + if ! bridge mdb help 2>&1 | grep -q "flush"; then echo "SKIP: iproute2 too old, missing bridge mdb flush support" exit $ksft_skip diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh index 3da9d93ab36f..625162fd7e8b 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh @@ -28,6 +28,7 @@ ALL_TESTS=" test_8021d test_8021q test_8021qvs + test_mdb_count_warning " NUM_NETIFS=4 @@ -83,8 +84,6 @@ switch_create_8021q() { local br_flags=$1; shift - log_info "802.1q $br_flags${br_flags:+ }tests" - ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \ mcast_snooping 1 $br_flags \ mcast_igmp_version 3 mcast_mld_version 2 @@ -106,6 +105,7 @@ switch_create_8021q() switch_create_8021qvs() { + log_info "802.1q mcast_vlan_snooping 1 tests" switch_create_8021q "mcast_vlan_snooping 1" bridge vlan global set dev br0 vid 10 mcast_igmp_version 3 bridge vlan global set dev br0 vid 10 mcast_mld_version 2 @@ -1272,6 +1272,76 @@ test_8021qvs_toggle_vlan_snooping() test_toggle_vlan_snooping_permanent } +mdb_count_check_warn() +{ + local msg=$1; shift + + dmesg | grep -q "WARNING:.*br_multicast_port_ngroups_dec.*" + check_fail $? "$msg" +} + +test_mdb_count_mcast_vlan_snooping_flush() +{ + RET=0 + + # check if we already have a warning + mdb_count_check_warn "Check MDB entries count warning before test" + + bridge mdb add dev br0 port "$swp1" grp 239.0.0.1 permanent vid 10 + ip link set dev br0 down + ip link set dev br0 type bridge mcast_vlan_snooping 1 + bridge mdb flush dev br0 + + mdb_count_check_warn "Check MDB entries count warning after test" + + ip link set dev br0 type bridge mcast_vlan_snooping 0 + ip link set dev br0 up + + log_test "MDB count warning: mcast_vlan_snooping and MDB flush" +} + +test_mdb_count_mcast_snooping_flush() +{ + RET=0 + + # check if we already have a warning + mdb_count_check_warn "Check MDB entries count warning before test" + + bridge mdb add dev br0 port "$swp1" grp 239.0.0.1 permanent vid 10 + ip link set dev br0 type bridge mcast_snooping 0 + ip link set dev br0 type bridge mcast_vlan_snooping 1 + bridge mdb flush dev br0 + + mdb_count_check_warn "Check MDB entries count warning after test" + + ip link set dev br0 type bridge mcast_vlan_snooping 0 + ip link set dev br0 type bridge mcast_snooping 1 + + log_test "MDB count warning: mcast_snooping and MDB flush" +} + +test_mdb_count_vlan_state_flush() +{ + RET=0 + + # check if we already have a warning + mdb_count_check_warn "Check MDB entries count warning before test" + + bridge mdb add dev br0 port "$swp1" grp 239.0.0.1 permanent vid 10 + ip link set dev br0 down + bridge vlan set vid 10 dev "$swp1" state blocking + ip link set dev br0 type bridge mcast_vlan_snooping 1 + ip link set dev br0 up + bridge mdb flush dev br0 + + mdb_count_check_warn "Check MDB entries count warning after test" + + bridge vlan set vid 10 dev "$swp1" state forwarding + ip link set dev br0 type bridge mcast_vlan_snooping 0 + + log_test "MDB count warning: disabled vlan state and MDB flush" +} + # test groups test_8021d() @@ -1297,6 +1367,7 @@ test_8021q() { # Tests for vlan_filtering 1 mcast_vlan_snooping 0. + log_info "802.1q tests" switch_create_8021q setup_wait @@ -1334,6 +1405,21 @@ test_8021qvs() switch_destroy } +test_mdb_count_warning() +{ + # Tests for mdb_n_entries warning + + log_info "MDB count warning tests" + switch_create_8021q + setup_wait + + test_mdb_count_mcast_vlan_snooping_flush + test_mdb_count_mcast_snooping_flush + test_mdb_count_vlan_state_flush + + switch_destroy +} + if ! bridge link help 2>&1 | grep -q "mcast_max_groups"; then echo "SKIP: iproute2 too old, missing bridge \"mcast_max_groups\" support" exit $ksft_skip diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index f84ab2e65754..4cacef5a813a 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -1,10 +1,23 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \ - mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \ - mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \ - mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test" +ALL_TESTS=" + mldv2include_test + mldv2inc_allow_test + mldv2inc_is_include_test + mldv2inc_is_exclude_test + mldv2inc_to_exclude_test + mldv2exc_allow_test + mldv2exc_is_include_test + mldv2exc_is_exclude_test + mldv2exc_to_exclude_test + mldv2inc_block_test + mldv2exc_block_test + mldv2exc_timeout_test + mldv2star_ex_auto_add_test + mldv2per_vlan_snooping_port_stp_test + mldv2per_vlan_snooping_vlan_stp_test +" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="ff02::cc" @@ -554,6 +567,66 @@ mldv2star_ex_auto_add_test() mldv2cleanup $swp2 } +mldv2per_vlan_snooping_stp_test() +{ + local is_port=$1 + + local msg="port" + [[ $is_port -ne 1 ]] && msg="vlan" + + ip link set br0 up type bridge vlan_filtering 1 \ + mcast_mld_version 2 \ + mcast_snooping 1 \ + mcast_vlan_snooping 1 \ + mcast_querier 1 \ + mcast_stats_enabled 1 + bridge vlan global set vid 1 dev br0 \ + mcast_mld_version 2 \ + mcast_snooping 1 \ + mcast_querier 1 \ + mcast_query_interval 100 \ + mcast_startup_query_count 0 + + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4 + sleep 5 + local tx_s=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["mld_queries"]["tx_v2"]') + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3 + sleep 5 + local tx_e=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["mld_queries"]["tx_v2"]') + + RET=0 + local tx=$(expr $tx_e - $tx_s) + test $tx -gt 0 + check_err $? "No MLD queries after STP state becomes forwarding" + log_test "per vlan snooping with $msg stp state change" + + # restore settings + bridge vlan global set vid 1 dev br0 \ + mcast_querier 0 \ + mcast_query_interval 12500 \ + mcast_startup_query_count 2 \ + mcast_mld_version 1 + ip link set br0 up type bridge vlan_filtering 0 \ + mcast_vlan_snooping 0 \ + mcast_stats_enabled 0 +} + +mldv2per_vlan_snooping_port_stp_test() +{ + mldv2per_vlan_snooping_stp_test 1 +} + +mldv2per_vlan_snooping_vlan_stp_test() +{ + mldv2per_vlan_snooping_stp_test 0 +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index 90f8a244ea90..e59fba366a0a 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid 8021p drop_untagged" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -194,6 +194,100 @@ other_tpid() tc qdisc del dev $h2 clsact } +8021p_do() +{ + local should_fail=$1; shift + local mac=de:ad:be:ef:13:37 + + tc filter add dev $h2 ingress protocol all pref 1 handle 101 \ + flower dst_mac $mac action drop + + $MZ -q $h1 -c 1 -b $mac -a own "81:00 00:00 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err_fail $should_fail $? "802.1p-tagged reception" + + tc filter del dev $h2 ingress pref 1 +} + +8021p() +{ + RET=0 + + tc qdisc add dev $h2 clsact + ip link set $h2 promisc on + + # Test that with the default_pvid, 1, packets tagged with VID 0 are + # accepted. + 8021p_do 0 + + # Test that packets tagged with VID 0 are still accepted after changing + # the default_pvid. + ip link set br0 type bridge vlan_default_pvid 10 + 8021p_do 0 + + log_test "Reception of 802.1p-tagged traffic" + + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + +send_untagged_and_8021p() +{ + ping_do $h1 192.0.2.2 + check_fail $? + + 8021p_do 1 +} + +drop_untagged() +{ + RET=0 + + tc qdisc add dev $h2 clsact + ip link set $h2 promisc on + + # Test that with no PVID, untagged and 802.1p-tagged traffic is + # dropped. + ip link set br0 type bridge vlan_default_pvid 1 + + # First we reconfigure the default_pvid, 1, as a non-PVID VLAN. + bridge vlan add dev $swp1 vid 1 untagged + send_untagged_and_8021p + bridge vlan add dev $swp1 vid 1 pvid untagged + + # Next we try to delete VID 1 altogether + bridge vlan del dev $swp1 vid 1 + send_untagged_and_8021p + bridge vlan add dev $swp1 vid 1 pvid untagged + + # Set up the bridge without a default_pvid, then check that the 8021q + # module, when the bridge port goes down and then up again, does not + # accidentally re-enable untagged packet reception. + ip link set br0 type bridge vlan_default_pvid 0 + ip link set $swp1 down + ip link set $swp1 up + setup_wait + send_untagged_and_8021p + + # Remove swp1 as a bridge port and let it rejoin the bridge while it + # has no default_pvid. + ip link set $swp1 nomaster + ip link set $swp1 master br0 + send_untagged_and_8021p + + # Restore settings + ip link set br0 type bridge vlan_default_pvid 1 + + log_test "Dropping of untagged and 802.1p-tagged traffic with no PVID" + + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh index 72dfbeaf56b9..e8031f68200a 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh @@ -414,6 +414,7 @@ vlmc_querier_intvl_test() bridge vlan add vid 10 dev br1 self pvid untagged ip link set dev $h1 master br1 ip link set dev br1 up + setup_wait_dev $h1 0 bridge vlan add vid 10 dev $h1 master bridge vlan global set vid 10 dev br1 mcast_snooping 1 mcast_querier 1 sleep 2 diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index 8d7a1a004b7c..75a6c3d3c1da 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -1,23 +1,23 @@ +CONFIG_BPF_SYSCALL=y CONFIG_BRIDGE=m -CONFIG_VLAN_8021Q=m +CONFIG_BRIDGE_IGMP_SNOOPING=y CONFIG_BRIDGE_VLAN_FILTERING=y -CONFIG_NET_L3_MASTER_DEV=y -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_NET_VRF=m -CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y CONFIG_DUMMY=m +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y CONFIG_IPV6=y CONFIG_IPV6_GRE=m CONFIG_IPV6_MROUTE=y CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y +CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_PIMSM_V2=y -CONFIG_IP_MROUTE=y -CONFIG_IP_MROUTE_MULTIPLE_TABLES=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y CONFIG_MACVLAN=m +CONFIG_NAMESPACES=y CONFIG_NET_ACT_CT=m +CONFIG_NET_ACT_GACT=m CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_MPLS=m CONFIG_NET_ACT_PEDIT=m @@ -26,29 +26,31 @@ CONFIG_NET_ACT_SAMPLE=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_VLAN=m +CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_FLOWER=m CONFIG_NET_CLS_MATCHALL=m -CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_U32=m CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_META=m +CONFIG_NETFILTER=y CONFIG_NET_IPGRE=m CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPIP=m +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_NS=y CONFIG_NET_SCH_ETS=m CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_ACT_GACT=m CONFIG_NET_SCH_PRIO=m CONFIG_NET_SCH_RED=m CONFIG_NET_SCH_TBF=m CONFIG_NET_TC_SKB_EXT=y CONFIG_NET_TEAM=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y -CONFIG_NETFILTER=y +CONFIG_NET_VRF=m CONFIG_NF_CONNTRACK=m CONFIG_NF_FLOW_TABLE=m CONFIG_NF_TABLES=m CONFIG_VETH=m -CONFIG_NAMESPACES=y -CONFIG_NET_NS=y +CONFIG_VLAN_8021Q=m CONFIG_VXLAN=m CONFIG_XFRM_USER=m diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh index 7d531f7091e6..5dbfab0e23e3 100755 --- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh @@ -226,7 +226,7 @@ send_flowlabel() # Generate 16384 echo requests, each with a random flow label. ip vrf exec v$h1 sh -c \ "for _ in {1..16384}; do \ - $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + $PING6 -F 0 -c 1 -q 2001:db8:4::2 >/dev/null 2>&1; \ done" } diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh index dda11a4a9450..b4f17a5bbc61 100755 --- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh @@ -321,7 +321,7 @@ send_flowlabel() # Generate 16384 echo requests, each with a random flow label. ip vrf exec v$h1 sh -c \ "for _ in {1..16384}; do \ - $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + $PING6 -F 0 -c 1 -q 2001:db8:2::2 >/dev/null 2>&1; \ done" } diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh index 57531c1d884d..ce4ae74843d9 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath.sh @@ -65,7 +65,7 @@ source lib.sh h1_create() { - simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + simple_if_init $h1 192.0.2.1/28 ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2 } diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh index 7d5b2b9cc133..c667b81da37f 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh @@ -80,7 +80,7 @@ h1_destroy() { ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2 - simple_if_fini $h1 192.0.2.1/28 + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 } sw1_create() diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh index 370f9925302d..d04bad58a96a 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh @@ -80,7 +80,7 @@ h1_destroy() { ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2 - simple_if_fini $h1 192.0.2.1/28 + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 } sw1_create() diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh index 49fa94b53a1c..25036e38043c 100755 --- a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh +++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh @@ -95,7 +95,7 @@ ipv6_in_too_big_err() # Send too big packets ip vrf exec $vrf_name \ - $PING6 -s 1300 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + $PING6 -s 1300 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null local t1=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors) test "$((t1 - t0))" -ne 0 @@ -131,7 +131,7 @@ ipv6_in_addr_err() # Disable forwarding temporary while sending the packet sysctl -qw net.ipv6.conf.all.forwarding=0 ip vrf exec $vrf_name \ - $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + $PING6 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null sysctl -qw net.ipv6.conf.all.forwarding=1 local t1=$(ipv6_stats_get $rtr1 Ip6InAddrErrors) @@ -150,7 +150,7 @@ ipv6_in_discard() # Add a policy to discard ip xfrm policy add dst 2001:1:2::2/128 dir fwd action block ip vrf exec $vrf_name \ - $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + $PING6 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null ip xfrm policy del dst 2001:1:2::2/128 dir fwd local t1=$(ipv6_stats_get $rtr1 Ip6InDiscards) diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh index e28b4a079e52..b24acfa52a3a 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh @@ -323,7 +323,7 @@ send_flowlabel() # Generate 16384 echo requests, each with a random flow label. ip vrf exec v$h1 sh -c \ "for _ in {1..16384}; do \ - $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + $PING6 -F 0 -c 1 -q 2001:db8:2::2 >/dev/null 2>&1; \ done" } diff --git a/tools/testing/selftests/net/forwarding/ipip_lib.sh b/tools/testing/selftests/net/forwarding/ipip_lib.sh index 01e62c4ac94d..b255646b737a 100644 --- a/tools/testing/selftests/net/forwarding/ipip_lib.sh +++ b/tools/testing/selftests/net/forwarding/ipip_lib.sh @@ -144,7 +144,7 @@ h1_create() { - simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + simple_if_init $h1 192.0.2.1/28 ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2 } diff --git a/tools/testing/selftests/net/forwarding/ipmr.c b/tools/testing/selftests/net/forwarding/ipmr.c new file mode 100644 index 000000000000..df870aad9ead --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ipmr.c @@ -0,0 +1,455 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2026 Google LLC */ + +#include <linux/if.h> +#include <linux/mroute.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/socket.h> +#include <sched.h> +#include <sys/ioctl.h> +#include <sys/socket.h> + +#include "kselftest_harness.h" + +FIXTURE(ipmr) +{ + int netlink_sk; + int raw_sk; + int veth_ifindex; +}; + +FIXTURE_VARIANT(ipmr) +{ + int family; + int protocol; + int level; + int opts[MRT_MAX - MRT_BASE + 1]; +}; + +FIXTURE_VARIANT_ADD(ipmr, ipv4) +{ + .family = AF_INET, + .protocol = IPPROTO_IGMP, + .level = IPPROTO_IP, + .opts = { + MRT_INIT, + MRT_DONE, + MRT_ADD_VIF, + MRT_DEL_VIF, + MRT_ADD_MFC, + MRT_DEL_MFC, + MRT_VERSION, + MRT_ASSERT, + MRT_PIM, + MRT_TABLE, + MRT_ADD_MFC_PROXY, + MRT_DEL_MFC_PROXY, + MRT_FLUSH, + }, +}; + +struct mfc_attr { + int table; + __u32 origin; + __u32 group; + int ifindex; + bool proxy; +}; + +static struct rtattr *nl_add_rtattr(struct nlmsghdr *nlmsg, struct rtattr *rta, + int type, const void *data, int len) +{ + int unused = 0; + + rta->rta_type = type; + rta->rta_len = RTA_LENGTH(len); + memcpy(RTA_DATA(rta), data, len); + + nlmsg->nlmsg_len += NLMSG_ALIGN(rta->rta_len); + + return RTA_NEXT(rta, unused); +} + +static int nl_sendmsg_mfc(struct __test_metadata *_metadata, FIXTURE_DATA(ipmr) *self, + __u16 nlmsg_type, struct mfc_attr *mfc_attr) +{ + struct { + struct nlmsghdr nlmsg; + struct rtmsg rtm; + char buf[4096]; + } req = { + .nlmsg = { + .nlmsg_len = NLMSG_LENGTH(sizeof(req.rtm)), + /* ipmr does not care about NLM_F_CREATE and NLM_F_EXCL ... */ + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + .nlmsg_type = nlmsg_type, + }, + .rtm = { + /* hard requirements in rtm_to_ipmr_mfcc() */ + .rtm_family = RTNL_FAMILY_IPMR, + .rtm_dst_len = 32, + .rtm_type = RTN_MULTICAST, + .rtm_scope = RT_SCOPE_UNIVERSE, + .rtm_protocol = RTPROT_MROUTED, + }, + }; + struct nlmsghdr *nlmsg = &req.nlmsg; + struct nlmsgerr *errmsg; + struct rtattr *rta; + int err; + + rta = (struct rtattr *)&req.buf; + rta = nl_add_rtattr(nlmsg, rta, RTA_TABLE, &mfc_attr->table, sizeof(mfc_attr->table)); + rta = nl_add_rtattr(nlmsg, rta, RTA_SRC, &mfc_attr->origin, sizeof(mfc_attr->origin)); + rta = nl_add_rtattr(nlmsg, rta, RTA_DST, &mfc_attr->group, sizeof(mfc_attr->group)); + if (mfc_attr->ifindex) + rta = nl_add_rtattr(nlmsg, rta, RTA_IIF, &mfc_attr->ifindex, sizeof(mfc_attr->ifindex)); + if (mfc_attr->proxy) + rta = nl_add_rtattr(nlmsg, rta, RTA_PREFSRC, NULL, 0); + + err = send(self->netlink_sk, &req, req.nlmsg.nlmsg_len, 0); + ASSERT_EQ(err, req.nlmsg.nlmsg_len); + + memset(&req, 0, sizeof(req)); + + err = recv(self->netlink_sk, &req, sizeof(req), 0); + ASSERT_TRUE(NLMSG_OK(nlmsg, err)); + ASSERT_EQ(NLMSG_ERROR, nlmsg->nlmsg_type); + + errmsg = (struct nlmsgerr *)NLMSG_DATA(nlmsg); + return errmsg->error; +} + +FIXTURE_SETUP(ipmr) +{ + struct ifreq ifr = { + .ifr_name = "veth0", + }; + int err; + + err = unshare(CLONE_NEWNET); + ASSERT_EQ(0, err); + + self->netlink_sk = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + ASSERT_LE(0, self->netlink_sk); + + self->raw_sk = socket(variant->family, SOCK_RAW, variant->protocol); + ASSERT_LT(0, self->raw_sk); + + err = system("ip link add veth0 type veth peer veth1"); + ASSERT_EQ(0, err); + + err = ioctl(self->raw_sk, SIOCGIFINDEX, &ifr); + ASSERT_EQ(0, err); + + self->veth_ifindex = ifr.ifr_ifindex; +} + +FIXTURE_TEARDOWN(ipmr) +{ + close(self->raw_sk); + close(self->netlink_sk); +} + +TEST_F(ipmr, mrt_init) +{ + int err, val = 0; /* any value is ok, but size must be int for MRT_INIT. */ + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_INIT - MRT_BASE], + &val, sizeof(val)); + ASSERT_EQ(0, err); + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_DONE - MRT_BASE], + &val, sizeof(val)); + ASSERT_EQ(0, err); +} + +TEST_F(ipmr, mrt_add_vif_register) +{ + struct vifctl vif = { + .vifc_vifi = 0, + .vifc_flags = VIFF_REGISTER, + }; + int err; + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); + + err = system("cat /proc/net/ip_mr_vif | grep -q pimreg"); + ASSERT_EQ(0, err); + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_DEL_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); +} + +TEST_F(ipmr, mrt_del_vif_unreg) +{ + struct vifctl vif = { + .vifc_vifi = 0, + .vifc_flags = VIFF_USE_IFINDEX, + .vifc_lcl_ifindex = self->veth_ifindex, + }; + int err; + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); + + err = system("cat /proc/net/ip_mr_vif | grep -q veth0"); + ASSERT_EQ(0, err); + + /* VIF is removed along with its device. */ + err = system("ip link del veth0"); + ASSERT_EQ(0, err); + + /* mrt->vif_table[veth_ifindex]->dev is NULL. */ + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_DEL_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(-1, err); + ASSERT_EQ(EADDRNOTAVAIL, errno); +} + +TEST_F(ipmr, mrt_del_vif_netns_dismantle) +{ + struct vifctl vif = { + .vifc_vifi = 0, + .vifc_flags = VIFF_USE_IFINDEX, + .vifc_lcl_ifindex = self->veth_ifindex, + }; + int err; + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); + + /* Let cleanup_net() remove veth0 and VIF. */ +} + +TEST_F(ipmr, mrt_add_mfc) +{ + struct mfcctl mfc = {}; + int err; + + /* MRT_ADD_MFC / MRT_ADD_MFC_PROXY does not need vif to exist (unlike netlink). */ + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_MFC - MRT_BASE], + &mfc, sizeof(mfc)); + ASSERT_EQ(0, err); + + /* (0.0.0.0 -> 0.0.0.0) */ + err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); + ASSERT_EQ(0, err); + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_DEL_MFC - MRT_BASE], + &mfc, sizeof(mfc)); +} + +TEST_F(ipmr, mrt_add_mfc_proxy) +{ + struct mfcctl mfc = {}; + int err; + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_MFC_PROXY - MRT_BASE], + &mfc, sizeof(mfc)); + ASSERT_EQ(0, err); + + err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); + ASSERT_EQ(0, err); + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_DEL_MFC_PROXY - MRT_BASE], + &mfc, sizeof(mfc)); +} + +TEST_F(ipmr, mrt_add_mfc_netlink) +{ + struct vifctl vif = { + .vifc_vifi = 0, + .vifc_flags = VIFF_USE_IFINDEX, + .vifc_lcl_ifindex = self->veth_ifindex, + }; + struct mfc_attr mfc_attr = { + .table = RT_TABLE_DEFAULT, + .origin = 0, + .group = 0, + .ifindex = self->veth_ifindex, + .proxy = false, + }; + int err; + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); + + err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); + ASSERT_EQ(0, err); + + err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); + ASSERT_EQ(0, err); + + err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr); + ASSERT_EQ(0, err); +} + +TEST_F(ipmr, mrt_add_mfc_netlink_proxy) +{ + struct vifctl vif = { + .vifc_vifi = 0, + .vifc_flags = VIFF_USE_IFINDEX, + .vifc_lcl_ifindex = self->veth_ifindex, + }; + struct mfc_attr mfc_attr = { + .table = RT_TABLE_DEFAULT, + .origin = 0, + .group = 0, + .ifindex = self->veth_ifindex, + .proxy = true, + }; + int err; + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); + + err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); + ASSERT_EQ(0, err); + + err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); + ASSERT_EQ(0, err); + + err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr); + ASSERT_EQ(0, err); +} + +TEST_F(ipmr, mrt_add_mfc_netlink_no_vif) +{ + struct mfc_attr mfc_attr = { + .table = RT_TABLE_DEFAULT, + .origin = 0, + .group = 0, + .proxy = false, + }; + int err; + + /* netlink always requires RTA_IIF of an existing vif. */ + mfc_attr.ifindex = 0; + err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); + ASSERT_EQ(-ENFILE, err); + + /* netlink always requires RTA_IIF of an existing vif. */ + mfc_attr.ifindex = self->veth_ifindex; + err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); + ASSERT_EQ(-ENFILE, err); +} + +TEST_F(ipmr, mrt_del_mfc_netlink_netns_dismantle) +{ + struct vifctl vifs[2] = { + { + .vifc_vifi = 0, + .vifc_flags = VIFF_USE_IFINDEX, + .vifc_lcl_ifindex = self->veth_ifindex, + }, + { + .vifc_vifi = 1, + .vifc_flags = VIFF_REGISTER, + } + }; + struct mfc_attr mfc_attr = { + .table = RT_TABLE_DEFAULT, + .origin = 0, + .group = 0, + .ifindex = self->veth_ifindex, + .proxy = false, + }; + int i, err; + + for (i = 0; i < 2; i++) { + /* Create 2 VIFs just to avoid -ENFILE later. */ + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vifs[i], sizeof(vifs[i])); + ASSERT_EQ(0, err); + } + + /* Create a MFC for mrt->vif_table[0]. */ + err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); + ASSERT_EQ(0, err); + + err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); + ASSERT_EQ(0, err); + + /* Remove mrt->vif_table[0]. */ + err = system("ip link del veth0"); + ASSERT_EQ(0, err); + + /* MFC entry is NOT removed even if the tied VIF is removed... */ + err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); + ASSERT_EQ(0, err); + + /* ... and netlink is not capable of removing such an entry + * because netlink always requires a valid RTA_IIF ... :/ + */ + err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr); + ASSERT_EQ(-ENODEV, err); + + /* It can be removed by setsockopt(), but let cleanup_net() remove this time. */ +} + +TEST_F(ipmr, mrt_table_flush) +{ + struct vifctl vif = { + .vifc_vifi = 0, + .vifc_flags = VIFF_USE_IFINDEX, + .vifc_lcl_ifindex = self->veth_ifindex, + }; + struct mfc_attr mfc_attr = { + .origin = 0, + .group = 0, + .ifindex = self->veth_ifindex, + .proxy = false, + }; + int table_id = 92; + int err, flags; + + /* Set a random table id rather than RT_TABLE_DEFAULT. + * Note that /proc/net/ip_mr_{vif,cache} only supports RT_TABLE_DEFAULT. + */ + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_TABLE - MRT_BASE], + &table_id, sizeof(table_id)); + ASSERT_EQ(0, err); + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); + + mfc_attr.table = table_id; + err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); + ASSERT_EQ(0, err); + + /* Flush mrt->vif_table[] and all caches. */ + flags = MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | + MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC; + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_FLUSH - MRT_BASE], + &flags, sizeof(flags)); + ASSERT_EQ(0, err); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 8de80acf249e..ac8358bcb22c 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1,5 +1,6 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +#shellcheck disable=SC2034 # SC doesn't see our uses of global variables ############################################################################## # Topology description. p1 looped back to p2, p3 to p4 and so on. @@ -37,6 +38,7 @@ declare -A NETIFS=( : "${TEAMD:=teamd}" : "${MCD:=smcrouted}" : "${MC_CLI:=smcroutectl}" +: "${MCD_TABLE_NAME:=selftests}" # Constants for netdevice bring-up: # Default time in seconds to wait for an interface to come up before giving up @@ -141,6 +143,20 @@ check_tc_version() fi } +check_tc_erspan_support() +{ + local dev=$1; shift + + tc filter add dev $dev ingress pref 1 handle 1 flower \ + erspan_opts 1:0:0:0 &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc is missing erspan support" + return $ksft_skip + fi + tc filter del dev $dev ingress pref 1 handle 1 flower \ + erspan_opts 1:0:0:0 &> /dev/null +} + # Old versions of tc don't understand "mpls_uc" check_tc_mpls_support() { @@ -291,16 +307,6 @@ if [[ "$CHECK_TC" = "yes" ]]; then check_tc_version fi -require_command() -{ - local cmd=$1; shift - - if [[ ! -x "$(command -v "$cmd")" ]]; then - echo "SKIP: $cmd not installed" - exit $ksft_skip - fi -} - # IPv6 support was added in v3.0 check_mtools_version() { @@ -335,17 +341,145 @@ fi ############################################################################## # Command line options handling -count=0 +check_env() { + if [[ ! (( -n "$LOCAL_V4" && -n "$REMOTE_V4") || + ( -n "$LOCAL_V6" && -n "$REMOTE_V6" )) ]]; then + echo "SKIP: Invalid environment, missing or inconsistent LOCAL_V4/REMOTE_V4/LOCAL_V6/REMOTE_V6" + echo "Please see tools/testing/selftests/drivers/net/README.rst" + exit "$ksft_skip" + fi + + if [[ -z "$REMOTE_TYPE" ]]; then + echo "SKIP: Invalid environment, missing REMOTE_TYPE" + exit "$ksft_skip" + fi + + if [[ -z "$REMOTE_ARGS" ]]; then + echo "SKIP: Invalid environment, missing REMOTE_ARGS" + exit "$ksft_skip" + fi +} + +__run_on() +{ + local target=$1; shift + local type args + + IFS=':' read -r type args <<< "$target" + + case "$type" in + netns) + # Execute command in network namespace + # args contains the namespace name + ip netns exec "$args" "$@" + ;; + ssh) + # Execute command via SSH args contains user@host + ssh -n "$args" "$@" + ;; + local|*) + # Execute command locally. This is also the fallback + # case for when the interface's target is not found in + # the TARGETS array. + "$@" + ;; + esac +} + +run_on() +{ + local iface=$1; shift + local target="local:" + + if [ "${DRIVER_TEST_CONFORMANT}" = "yes" ]; then + target="${TARGETS[$iface]}" + fi + + __run_on "$target" "$@" +} + +get_ifname_by_ip() +{ + local target=$1; shift + local ip_addr=$1; shift + + __run_on "$target" ip -j addr show to "$ip_addr" | jq -r '.[].ifname' +} + +# Whether the test is conforming to the requirements and usage described in +# drivers/net/README.rst. +: "${DRIVER_TEST_CONFORMANT:=no}" + +declare -A TARGETS + +# Based on DRIVER_TEST_CONFORMANT, decide if to source drivers/net/net.config +# or not. In the "yes" case, the test expects to pass the arguments through the +# variables specified in drivers/net/README.rst file. If not, fallback on +# parsing the script arguments for interface names. +if [ "${DRIVER_TEST_CONFORMANT}" = "yes" ]; then + if [[ -f $net_forwarding_dir/../../drivers/net/net.config ]]; then + source "$net_forwarding_dir/../../drivers/net/net.config" + fi + + if (( NUM_NETIFS > 2)); then + echo "SKIP: DRIVER_TEST_CONFORMANT=yes and NUM_NETIFS is bigger than 2" + exit "$ksft_skip" + fi + + check_env + + # Populate the NETIFS and TARGETS arrays automatically based on the + # environment variables. The TARGETS array is indexed by the network + # interface name keeping track of the target on which the interface + # resides. Values will be strings of the following format - + # <type>:<args>. + # + # TARGETS[eth0]="local:" - meaning that the eth0 interface is + # accessible locally + # TARGETS[eth1]="netns:foo" - eth1 is in the foo netns + # TARGETS[eth2]="ssh:root@10.0.0.2" - eth2 is accessible through + # running the 'ssh root@10.0.0.2' command. -while [[ $# -gt 0 ]]; do - if [[ "$count" -eq "0" ]]; then + unset NETIFS + declare -A NETIFS + + NETIFS[p1]="$NETIF" + TARGETS[$NETIF]="local:" + + # Locate the name of the remote interface + remote_target="$REMOTE_TYPE:$REMOTE_ARGS" + if [[ -v REMOTE_V4 ]]; then + remote_netif=$(get_ifname_by_ip "$remote_target" "$REMOTE_V4") + else + remote_netif=$(get_ifname_by_ip "$remote_target" "$REMOTE_V6") + fi + if [[ ! -n "$remote_netif" ]]; then + echo "SKIP: cannot find remote interface" + exit "$ksft_skip" + fi + + if [[ "$NETIF" == "$remote_netif" ]]; then + echo "SKIP: local and remote interfaces cannot have the same name" + exit "$ksft_skip" + fi + + NETIFS[p2]="$remote_netif" + TARGETS[$remote_netif]="$REMOTE_TYPE:$REMOTE_ARGS" +else + count=0 + # Prime NETIFS from the command line, but retain if none given. + if [[ $# -gt 0 ]]; then unset NETIFS declare -A NETIFS + + while [[ $# -gt 0 ]]; do + count=$((count + 1)) + NETIFS[p$count]="$1" + TARGETS[$1]="local:" + shift + done fi - count=$((count + 1)) - NETIFS[p$count]="$1" - shift -done +fi ############################################################################## # Network interfaces configuration @@ -413,10 +547,11 @@ mac_addr_prepare() dev=${NETIFS[p$i]} new_addr=$(printf "00:01:02:03:04:%02x" $i) - MAC_ADDR_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].address') + MAC_ADDR_ORIG["$dev"]=$(run_on "$dev" \ + ip -j link show dev "$dev" | jq -e '.[].address') # Strip quotes MAC_ADDR_ORIG["$dev"]=${MAC_ADDR_ORIG["$dev"]//\"/} - ip link set dev $dev address $new_addr + run_on "$dev" ip link set dev "$dev" address $new_addr done } @@ -426,7 +561,8 @@ mac_addr_restore() for ((i = 1; i <= NUM_NETIFS; ++i)); do dev=${NETIFS[p$i]} - ip link set dev $dev address ${MAC_ADDR_ORIG["$dev"]} + run_on "$dev" \ + ip link set dev "$dev" address ${MAC_ADDR_ORIG["$dev"]} done } @@ -439,7 +575,9 @@ if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then fi for ((i = 1; i <= NUM_NETIFS; ++i)); do - ip link show dev ${NETIFS[p$i]} &> /dev/null + int="${NETIFS[p$i]}" + + run_on "$int" ip link show dev "$int" &> /dev/null if [[ $? -ne 0 ]]; then echo "SKIP: could not find all required interfaces" exit $ksft_skip @@ -522,7 +660,7 @@ setup_wait_dev_with_timeout() local i for ((i = 1; i <= $max_iterations; ++i)); do - ip link show dev $dev up \ + run_on "$dev" ip link show dev "$dev" up \ | grep 'state UP' &> /dev/null if [[ $? -ne 0 ]]; then sleep 1 @@ -535,9 +673,9 @@ setup_wait_dev_with_timeout() return 1 } -setup_wait() +setup_wait_n() { - local num_netifs=${1:-$NUM_NETIFS} + local num_netifs=$1; shift local i for ((i = 1; i <= num_netifs; ++i)); do @@ -548,6 +686,11 @@ setup_wait() sleep $WAIT_TIME } +setup_wait() +{ + setup_wait_n "$NUM_NETIFS" +} + wait_for_dev() { local dev=$1; shift @@ -561,30 +704,6 @@ wait_for_dev() fi } -cmd_jq() -{ - local cmd=$1 - local jq_exp=$2 - local jq_opts=$3 - local ret - local output - - output="$($cmd)" - # it the command fails, return error right away - ret=$? - if [[ $ret -ne 0 ]]; then - return $ret - fi - output=$(echo $output | jq -r $jq_opts "$jq_exp") - ret=$? - if [[ $ret -ne 0 ]]; then - return $ret - fi - echo $output - # return success only in case of non-empty output - [ ! -z "$output" ] -} - pre_cleanup() { if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then @@ -613,6 +732,12 @@ vrf_cleanup() ip -4 rule del pref 32765 } +adf_vrf_prepare() +{ + vrf_prepare + defer vrf_cleanup +} + __last_tb_id=0 declare -A __TB_IDS @@ -725,6 +850,12 @@ simple_if_fini() vrf_destroy $vrf_name } +adf_simple_if_init() +{ + simple_if_init "$@" + defer simple_if_fini "$@" +} + tunnel_create() { local name=$1; shift @@ -833,8 +964,15 @@ ethtool_std_stats_get() local name=$1; shift local src=$1; shift - ethtool --json -S $dev --groups $grp -- --src $src | \ - jq '.[]."'"$grp"'"."'$name'"' + if [[ "$grp" == "pause" ]]; then + run_on "$dev" ethtool -I --json -a "$dev" --src "$src" | \ + jq --arg name "$name" '.[].statistics[$name]' + return + fi + + run_on "$dev" \ + ethtool --json -S "$dev" --groups "$grp" -- --src "$src" | \ + jq --arg grp "$grp" --arg name "$name" '.[][$grp][$name]' } qdisc_stats_get() @@ -1025,6 +1163,12 @@ forwarding_restore() sysctl_restore net.ipv4.conf.all.forwarding } +adf_forwarding_enable() +{ + forwarding_enable + defer forwarding_restore +} + declare -A MTU_ORIG mtu_set() { @@ -1281,8 +1425,8 @@ ping_do() vrf_name=$(master_name_get $if_name) ip vrf exec $vrf_name \ - $PING $args $dip -c $PING_COUNT -i 0.1 \ - -w $PING_TIMEOUT &> /dev/null + $PING $args -c $PING_COUNT -i 0.1 \ + -w $PING_TIMEOUT $dip &> /dev/null } ping_test() @@ -1312,8 +1456,8 @@ ping6_do() vrf_name=$(master_name_get $if_name) ip vrf exec $vrf_name \ - $PING6 $args $dip -c $PING_COUNT -i 0.1 \ - -w $PING_TIMEOUT &> /dev/null + $PING6 $args -c $PING_COUNT -i 0.1 \ + -w $PING_TIMEOUT $dip &> /dev/null } ping6_test() @@ -1606,12 +1750,17 @@ tcpdump_start() sleep 1 } -tcpdump_stop() +tcpdump_stop_nosleep() { local if_name=$1 local pid=${cappid[$if_name]} $ns_cmd kill "$pid" && wait "$pid" +} + +tcpdump_stop() +{ + tcpdump_stop_nosleep "$1" sleep 1 } @@ -1626,7 +1775,7 @@ tcpdump_show() { local if_name=$1 - tcpdump -e -n -r ${capfile[$if_name]} 2>&1 + tcpdump -e -nn -r ${capfile[$if_name]} 2>&1 } # return 0 if the packet wasn't seen on host2_if or 1 if it was @@ -1767,6 +1916,51 @@ mc_send() msend -g $groups -I $if_name -c 1 > /dev/null 2>&1 } +adf_mcd_start() +{ + local ifs=("$@") + + local table_name="$MCD_TABLE_NAME" + local smcroutedir + local pid + local if + local i + + check_command "$MCD" || return 1 + check_command "$MC_CLI" || return 1 + + smcroutedir=$(mktemp -d) + defer rm -rf "$smcroutedir" + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + echo "phyint ${NETIFS[p$i]} enable" >> \ + "$smcroutedir/$table_name.conf" + done + + for if in "${ifs[@]}"; do + if ! ip_link_has_flag "$if" MULTICAST; then + ip link set dev "$if" multicast on + defer ip link set dev "$if" multicast off + fi + + echo "phyint $if enable" >> \ + "$smcroutedir/$table_name.conf" + done + + "$MCD" -N -I "$table_name" -f "$smcroutedir/$table_name.conf" \ + -P "$smcroutedir/$table_name.pid" + busywait "$BUSYWAIT_TIMEOUT" test -e "$smcroutedir/$table_name.pid" + pid=$(cat "$smcroutedir/$table_name.pid") + defer kill_process "$pid" +} + +mc_cli() +{ + local table_name="$MCD_TABLE_NAME" + + "$MC_CLI" -I "$table_name" "$@" +} + start_ip_monitor() { local mtype=$1; shift diff --git a/tools/testing/selftests/net/forwarding/lib_sh_test.sh b/tools/testing/selftests/net/forwarding/lib_sh_test.sh index ff2accccaf4d..b4eda6c6199e 100755 --- a/tools/testing/selftests/net/forwarding/lib_sh_test.sh +++ b/tools/testing/selftests/net/forwarding/lib_sh_test.sh @@ -30,6 +30,11 @@ tfail() do_test "tfail" false } +tfail2() +{ + do_test "tfail2" false +} + txfail() { FAIL_TO_XFAIL=yes do_test "txfail" false @@ -132,6 +137,8 @@ test_ret() ret_subtest $ksft_fail "tfail" txfail tfail ret_subtest $ksft_xfail "txfail" txfail txfail + + ret_subtest $ksft_fail "tfail2" tfail2 tfail } exit_status_tests_run() diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index ecd34f364125..15b1a1255a41 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -57,21 +57,21 @@ PTP_1588_L2_PDELAY_REQ=" \ PTP_1588_IPV4_SYNC=" \ 01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ 00 48 0a 9a 40 00 01 11 cb 88 c0 00 02 01 e0 00 \ -01 81 01 3f 01 3f 00 34 a3 c8 00 02 00 2c 00 00 \ +01 81 01 3f 01 3f 00 34 9f 41 00 02 00 2c 00 00 \ 02 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ 63 ff fe cf 17 0e 00 01 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00" PTP_1588_IPV4_FOLLOW_UP=" 01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ 00 48 0a 9b 40 00 01 11 cb 87 c0 00 02 01 e0 00 \ -01 81 01 40 01 40 00 34 a3 c8 08 02 00 2c 00 00 \ +01 81 01 40 01 40 00 34 eb 8a 08 02 00 2c 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ 63 ff fe cf 17 0e 00 01 00 00 02 00 00 00 66 83 \ c6 0f 1d 9a 61 87" PTP_1588_IPV4_PDELAY_REQ=" \ 01:00:5e:00:00:6b 00:00:de:ad:be:ef 08:00 45 00 \ 00 52 35 a9 40 00 01 11 a1 85 c0 00 02 01 e0 00 \ -00 6b 01 3f 01 3f 00 3e a2 bc 02 02 00 36 00 00 \ +00 6b 01 3f 01 3f 00 3e 9a b9 02 02 00 36 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ 63 ff fe cf 17 0e 00 01 00 01 05 7f 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00" @@ -79,7 +79,7 @@ PTP_1588_IPV6_SYNC=" \ 33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 06 \ 7c 2f 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ 00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ -00 00 00 00 01 81 01 3f 01 3f 00 36 2e 92 00 02 \ +00 00 00 00 01 81 01 3f 01 3f 00 36 14 76 00 02 \ 00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00" @@ -87,7 +87,7 @@ PTP_1588_IPV6_FOLLOW_UP=" \ 33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 0a \ 00 bc 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ 00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ -00 00 00 00 01 81 01 40 01 40 00 36 2e 92 08 02 \ +00 00 00 00 01 81 01 40 01 40 00 36 f0 47 08 02 \ 00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \ 00 00 66 83 c6 2a 32 09 bd 74 00 00" @@ -95,11 +95,20 @@ PTP_1588_IPV6_PDELAY_REQ=" \ 33:33:00:00:00:6b 00:00:de:ad:be:ef 86:dd 60 0c \ 5c fd 00 40 11 01 fe 80 00 00 00 00 00 00 3c 37 \ 63 ff fe cf 17 0e ff 02 00 00 00 00 00 00 00 00 \ -00 00 00 00 00 6b 01 3f 01 3f 00 40 b4 54 02 02 \ +00 00 00 00 00 6b 01 3f 01 3f 00 40 89 1f 02 02 \ 00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 3e 37 63 ff fe cf 17 0e 00 01 00 01 05 7f \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00" +LINK_LOCAL_STP_BPDU=" \ +01:80:c2:00:00:00 00:00:de:ad:be:ef 00 26 42 42 03 \ +00 00 00 00 00 80 00 aa bb cc dd ee ff 00 00 00 00 \ +80 00 aa bb cc dd ee ff 80 01 00 00 14 00 02 00 \ +0f 00" +LINK_LOCAL_LLDP=" \ +01:80:c2:00:00:0e 00:00:de:ad:be:ef 88:cc 02 07 04 \ +00 11 22 33 44 55 04 05 05 65 74 68 30 06 02 00 \ +78 00 00" # Disable promisc to ensure we don't receive unknown MAC DA packets export TCPDUMP_EXTRA_FLAGS="-pl" @@ -176,6 +185,8 @@ run_test() local rcv_dmac=$(mac_get $rcv_if_name) local should_receive + setup_wait + tcpdump_start $rcv_if_name mc_route_prepare $send_if_name @@ -211,7 +222,15 @@ run_test() mc_route_destroy $rcv_if_name mc_route_destroy $send_if_name + ip maddress add 01:80:c2:00:00:00 dev $rcv_if_name + send_raw $send_if_name "$LINK_LOCAL_STP_BPDU" + ip maddress del 01:80:c2:00:00:00 dev $rcv_if_name + if [ $skip_ptp = false ]; then + ip maddress add 01:80:c2:00:00:0e dev $rcv_if_name + send_raw $send_if_name "$LINK_LOCAL_LLDP" + ip maddress del 01:80:c2:00:00:0e dev $rcv_if_name + ip maddress add 01:1b:19:00:00:00 dev $rcv_if_name send_raw $send_if_name "$PTP_1588_L2_SYNC" send_raw $send_if_name "$PTP_1588_L2_FOLLOW_UP" @@ -302,41 +321,49 @@ run_test() "$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \ true "$test_name" + check_rcv $rcv_if_name "Link-local STP BPDU" \ + "> 01:80:c2:00:00:00" \ + true "$test_name" + if [ $skip_ptp = false ]; then + check_rcv $rcv_if_name "Link-local LLDP" \ + "> 01:80:c2:00:00:0e" \ + true "$test_name" + check_rcv $rcv_if_name "1588v2 over L2 transport, Sync" \ - "ethertype PTP (0x88f7).* PTPv2.* msg type : sync msg" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type *: sync msg" \ true "$test_name" check_rcv $rcv_if_name "1588v2 over L2 transport, Follow-Up" \ - "ethertype PTP (0x88f7).* PTPv2.* msg type : follow up msg" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type *: follow up msg" \ true "$test_name" check_rcv $rcv_if_name "1588v2 over L2 transport, Peer Delay Request" \ - "ethertype PTP (0x88f7).* PTPv2.* msg type : peer delay req msg" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type *: peer delay req msg" \ true "$test_name" check_rcv $rcv_if_name "1588v2 over IPv4, Sync" \ - "ethertype IPv4 (0x0800).* PTPv2.* msg type : sync msg" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type *: sync msg" \ true "$test_name" check_rcv $rcv_if_name "1588v2 over IPv4, Follow-Up" \ - "ethertype IPv4 (0x0800).* PTPv2.* msg type : follow up msg" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type *: follow up msg" \ true "$test_name" check_rcv $rcv_if_name "1588v2 over IPv4, Peer Delay Request" \ - "ethertype IPv4 (0x0800).* PTPv2.* msg type : peer delay req msg" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type *: peer delay req msg" \ true "$test_name" check_rcv $rcv_if_name "1588v2 over IPv6, Sync" \ - "ethertype IPv6 (0x86dd).* PTPv2.* msg type : sync msg" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type *: sync msg" \ true "$test_name" check_rcv $rcv_if_name "1588v2 over IPv6, Follow-Up" \ - "ethertype IPv6 (0x86dd).* PTPv2.* msg type : follow up msg" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type *: follow up msg" \ true "$test_name" check_rcv $rcv_if_name "1588v2 over IPv6, Peer Delay Request" \ - "ethertype IPv6 (0x86dd).* PTPv2.* msg type : peer delay req msg" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type *: peer delay req msg" \ true "$test_name" fi diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh index a20d22d1df36..8d4ae6c952a1 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh @@ -238,7 +238,7 @@ test_lag_slave() ip neigh flush dev br1 setup_wait_dev $up_dev setup_wait_dev $host_dev - $ARPING -I br1 192.0.2.130 -qfc 1 + $ARPING -I br1 -qfc 1 192.0.2.130 sleep 2 mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 ">= 10" diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh index 1b902cc579f6..a21c771908b3 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh @@ -196,7 +196,7 @@ test_span_gre_forbidden_egress() bridge vlan add dev $swp3 vid 555 # Re-prime FDB - $ARPING -I br1.555 192.0.2.130 -fqc 1 + $ARPING -I br1.555 -fqc 1 192.0.2.130 sleep 1 quick_test_span_gre_dir $tundev @@ -290,7 +290,7 @@ test_span_gre_fdb_roaming() bridge fdb del dev $swp2 $h3mac vlan 555 master 2>/dev/null # Re-prime FDB - $ARPING -I br1.555 192.0.2.130 -fqc 1 + $ARPING -I br1.555 -fqc 1 192.0.2.130 sleep 1 quick_test_span_gre_dir $tundev diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh index af008fbf2725..eb2d8034de9c 100755 --- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh +++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh @@ -98,12 +98,20 @@ setup_prepare() h1_create h2_create switch_create + + if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then + sysctl_set net.bridge.bridge-nf-call-iptables 0 + fi } cleanup() { pre_cleanup + if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then + sysctl_restore net.bridge.bridge-nf-call-iptables + fi + switch_destroy h2_destroy h1_destroy diff --git a/tools/testing/selftests/net/forwarding/pedit_ip.sh b/tools/testing/selftests/net/forwarding/pedit_ip.sh index d14efb2d23b2..9235674627ab 100755 --- a/tools/testing/selftests/net/forwarding/pedit_ip.sh +++ b/tools/testing/selftests/net/forwarding/pedit_ip.sh @@ -91,12 +91,20 @@ setup_prepare() h1_create h2_create switch_create + + if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then + sysctl_set net.bridge.bridge-nf-call-iptables 0 + fi } cleanup() { pre_cleanup + if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then + sysctl_restore net.bridge.bridge-nf-call-iptables + fi + switch_destroy h2_destroy h1_destroy diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh index b98ea9449b8b..dfb6646cb97b 100755 --- a/tools/testing/selftests/net/forwarding/router.sh +++ b/tools/testing/selftests/net/forwarding/router.sh @@ -18,6 +18,8 @@ # | 2001:db8:1::1/64 2001:db8:2::1/64 | # | | # +-----------------------------------------------------------------+ +# +#shellcheck disable=SC2034 # SC doesn't see our uses of global variables ALL_TESTS=" ping_ipv4 @@ -27,6 +29,7 @@ ALL_TESTS=" ipv4_sip_equal_dip ipv6_sip_equal_dip ipv4_dip_link_local + ipv4_sip_link_local " NUM_NETIFS=4 @@ -330,6 +333,32 @@ ipv4_dip_link_local() tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower } +ipv4_sip_link_local() +{ + local sip=169.254.1.1 + + RET=0 + + # Disable rpfilter to prevent packets to be dropped because of it. + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf."$rp1".rp_filter 0 + + tc filter add dev "$rp2" egress protocol ip pref 1 handle 101 \ + flower src_ip "$sip" action pass + + $MZ "$h1" -t udp "sp=54321,dp=12345" -c 5 -d 1msec -b "$rp1mac" \ + -A "$sip" -B 198.51.100.2 -q + + tc_check_packets "dev $rp2 egress" 101 5 + check_err $? "Packets were dropped" + + log_test "IPv4 source IP is link-local" + + tc filter del dev "$rp2" egress protocol ip pref 1 handle 101 flower + sysctl_restore net.ipv4.conf."$rp1".rp_filter + sysctl_restore net.ipv4.conf.all.rp_filter +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh index 5a58b1ec8aef..83e52abdbc2e 100755 --- a/tools/testing/selftests/net/forwarding/router_multicast.sh +++ b/tools/testing/selftests/net/forwarding/router_multicast.sh @@ -33,10 +33,6 @@ NUM_NETIFS=6 source lib.sh source tc_common.sh -require_command $MCD -require_command $MC_CLI -table_name=selftests - h1_create() { simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64 @@ -149,25 +145,6 @@ router_destroy() ip link set dev $rp1 down } -start_mcd() -{ - SMCROUTEDIR="$(mktemp -d)" - - for ((i = 1; i <= $NUM_NETIFS; ++i)); do - echo "phyint ${NETIFS[p$i]} enable" >> \ - $SMCROUTEDIR/$table_name.conf - done - - $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \ - -P $SMCROUTEDIR/$table_name.pid -} - -kill_mcd() -{ - pkill $MCD - rm -rf $SMCROUTEDIR -} - setup_prepare() { h1=${NETIFS[p1]} @@ -179,7 +156,7 @@ setup_prepare() rp3=${NETIFS[p5]} h3=${NETIFS[p6]} - start_mcd + adf_mcd_start || exit "$EXIT_STATUS" vrf_prepare @@ -206,7 +183,7 @@ cleanup() vrf_cleanup - kill_mcd + defer_scopes_cleanup } create_mcast_sg() @@ -214,9 +191,9 @@ create_mcast_sg() local if_name=$1; shift local s_addr=$1; shift local mcast=$1; shift - local dest_ifs=${@} + local dest_ifs=("${@}") - $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs + mc_cli add "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}" } delete_mcast_sg() @@ -224,9 +201,9 @@ delete_mcast_sg() local if_name=$1; shift local s_addr=$1; shift local mcast=$1; shift - local dest_ifs=${@} + local dest_ifs=("${@}") - $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs + mc_cli remove "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}" } mcast_v4() diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh index 1f6f53e284b5..6269d5e23487 100755 --- a/tools/testing/selftests/net/forwarding/sch_ets.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets.sh @@ -11,6 +11,7 @@ ALL_TESTS=" ets_test_strict ets_test_mixed ets_test_dwrr + ets_test_plug classifier_mode ets_test_strict ets_test_mixed diff --git a/tools/testing/selftests/net/forwarding/sch_ets_core.sh b/tools/testing/selftests/net/forwarding/sch_ets_core.sh index 8f9922c695b0..0453210271dc 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_core.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_core.sh @@ -165,8 +165,7 @@ h1_create() { local i; - simple_if_init $h1 - defer simple_if_fini $h1 + adf_simple_if_init $h1 mtu_set $h1 9900 defer mtu_restore $h1 @@ -182,8 +181,7 @@ h2_create() { local i - simple_if_init $h2 - defer simple_if_fini $h2 + adf_simple_if_init $h2 mtu_set $h2 9900 defer mtu_restore $h2 @@ -251,8 +249,7 @@ setup_prepare() put=$swp2 hut=$h2 - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh index 08240d3e3c87..79d837a2868a 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh @@ -224,3 +224,11 @@ ets_test_dwrr() ets_set_dwrr_two_bands xfail_on_slow ets_dwrr_test_01 } + +ets_test_plug() +{ + ets_change_qdisc $put 2 "3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3" "1514 1514" + tc qdisc add dev $put handle 20: parent 10:4 plug + start_traffic_pktsize 100 $h1.10 192.0.2.1 192.0.2.2 00:c1:a0:c1:a0:00 "-c 1" + ets_qdisc_setup $put 2 +} diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh index af166662b78a..f2a3d9254642 100755 --- a/tools/testing/selftests/net/forwarding/sch_red.sh +++ b/tools/testing/selftests/net/forwarding/sch_red.sh @@ -52,8 +52,7 @@ PKTSZ=1400 h1_create() { - simple_if_init $h1 192.0.2.1/28 - defer simple_if_fini $h1 192.0.2.1/28 + adf_simple_if_init $h1 192.0.2.1/28 mtu_set $h1 10000 defer mtu_restore $h1 @@ -65,8 +64,7 @@ h1_create() h2_create() { - simple_if_init $h2 192.0.2.2/28 - defer simple_if_fini $h2 192.0.2.2/28 + adf_simple_if_init $h2 192.0.2.2/28 mtu_set $h2 10000 defer mtu_restore $h2 @@ -74,8 +72,7 @@ h2_create() h3_create() { - simple_if_init $h3 192.0.2.3/28 - defer simple_if_fini $h3 192.0.2.3/28 + adf_simple_if_init $h3 192.0.2.3/28 mtu_set $h3 10000 defer mtu_restore $h3 @@ -125,8 +122,7 @@ setup_prepare() h3_mac=$(mac_get $h3) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh index ec309a5086bc..070c17faa9e4 100644 --- a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh @@ -59,8 +59,7 @@ host_create() local dev=$1; shift local host=$1; shift - simple_if_init $dev - defer simple_if_fini $dev + adf_simple_if_init $dev mtu_set $dev 10000 defer mtu_restore $dev @@ -149,8 +148,7 @@ setup_prepare() h2_mac=$(mac_get $h2) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index ea89e558672d..86edbc7e2489 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -223,7 +223,7 @@ mirred_egress_to_ingress_tcp_test() ip_proto icmp \ action drop - ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 & + ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 > $mirred_e2i_tf2 & local rpid=$! ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1 wait -n $rpid diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh index b1daad19b01e..b58909a93112 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -6,7 +6,7 @@ ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \ match_ip_tos_test match_indev_test match_ip_ttl_test match_mpls_label_test \ match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \ - match_mpls_lse_test" + match_mpls_lse_test match_erspan_opts_test" NUM_NETIFS=2 source tc_common.sh source lib.sh @@ -676,6 +676,56 @@ match_mpls_lse_test() log_test "mpls lse match ($tcflags)" } +match_erspan_opts_test() +{ + RET=0 + + check_tc_erspan_support $h2 || return 0 + + # h1 erspan setup + tunnel_create erspan1 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1001 \ + tos C ttl 64 erspan_ver 1 erspan 6789 # ERSPAN Type II + tunnel_create erspan2 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1002 \ + tos C ttl 64 erspan_ver 2 erspan_dir egress erspan_hwid 63 \ + # ERSPAN Type III + ip link set dev erspan1 master v$h1 + ip link set dev erspan2 master v$h1 + # h2 erspan setup + ip link add ep-ex type erspan ttl 64 external # To collect tunnel info + ip link set ep-ex up + ip link set dev ep-ex master v$h2 + tc qdisc add dev ep-ex clsact + + # ERSPAN Type II [decap direction] + tc filter add dev ep-ex ingress protocol ip handle 101 flower \ + $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \ + enc_key_id 1001 erspan_opts 1:6789:0:0 \ + action drop + # ERSPAN Type III [decap direction] + tc filter add dev ep-ex ingress protocol ip handle 102 flower \ + $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \ + enc_key_id 1002 erspan_opts 2:0:1:63 action drop + + ep1mac=$(mac_get erspan1) + $MZ erspan1 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q + tc_check_packets "dev ep-ex ingress" 101 1 + check_err $? "ERSPAN Type II" + + ep2mac=$(mac_get erspan2) + $MZ erspan2 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q + tc_check_packets "dev ep-ex ingress" 102 1 + check_err $? "ERSPAN Type III" + + # h2 erspan cleanup + tc qdisc del dev ep-ex clsact + tunnel_destroy ep-ex + # h1 erspan cleanup + tunnel_destroy erspan2 # ERSPAN Type III + tunnel_destroy erspan1 # ERSPAN Type II + + log_test "erspan_opts match ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/forwarding/tc_taprio.sh b/tools/testing/selftests/net/forwarding/tc_taprio.sh new file mode 100755 index 000000000000..8992aeabfe0b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_taprio.sh @@ -0,0 +1,421 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" \ + test_clock_jump_backward \ + test_taprio_after_ptp \ + test_max_sdu \ + test_clock_jump_backward_forward \ +" +NUM_NETIFS=4 +source tc_common.sh +source lib.sh +source tsn_lib.sh + +require_command python3 + +# The test assumes the usual topology from the README, where h1 is connected to +# swp1, h2 to swp2, and swp1 and swp2 are together in a bridge. +# Additional assumption: h1 and h2 use the same PHC, and so do swp1 and swp2. +# By synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized to +# swp1 (and both to CLOCK_REALTIME). +h1=${NETIFS[p1]} +swp1=${NETIFS[p2]} +swp2=${NETIFS[p3]} +h2=${NETIFS[p4]} + +UDS_ADDRESS_H1="/var/run/ptp4l_h1" +UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1" + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +# Tunables +NUM_PKTS=100 +STREAM_VID=10 +STREAM_PRIO_1=6 +STREAM_PRIO_2=5 +STREAM_PRIO_3=4 +# PTP uses TC 0 +ALL_GATES=$((1 << 0 | 1 << STREAM_PRIO_1 | 1 << STREAM_PRIO_2)) +# Use a conservative cycle of 10 ms to allow the test to still pass when the +# kernel has some extra overhead like lockdep etc +CYCLE_TIME_NS=10000000 +# Create two Gate Control List entries, one OPEN and one CLOSE, of equal +# durations +GATE_DURATION_NS=$((CYCLE_TIME_NS / 2)) +# Give 2/3 of the cycle time to user space and 1/3 to the kernel +FUDGE_FACTOR=$((CYCLE_TIME_NS / 3)) +# Shift the isochron base time by half the gate time, so that packets are +# always received by swp1 close to the middle of the time slot, to minimize +# inaccuracies due to network sync +SHIFT_TIME_NS=$((GATE_DURATION_NS / 2)) + +path_delay= + +h1_create() +{ + simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_destroy() +{ + simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h2_create() +{ + simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_destroy() +{ + simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +switch_create() +{ + local h2_mac_addr=$(mac_get $h2) + + ip link set $swp1 up + ip link set $swp2 up + + ip link add br0 type bridge vlan_filtering 1 + ip link set $swp1 master br0 + ip link set $swp2 master br0 + ip link set br0 up + + bridge vlan add dev $swp2 vid $STREAM_VID + bridge vlan add dev $swp1 vid $STREAM_VID + bridge fdb add dev $swp2 \ + $h2_mac_addr vlan $STREAM_VID static master +} + +switch_destroy() +{ + ip link del br0 +} + +ptp_setup() +{ + # Set up swp1 as a master PHC for h1, synchronized to the local + # CLOCK_REALTIME. + phc2sys_start $UDS_ADDRESS_SWP1 + ptp4l_start $h1 true $UDS_ADDRESS_H1 + ptp4l_start $swp1 false $UDS_ADDRESS_SWP1 +} + +ptp_cleanup() +{ + ptp4l_stop $swp1 + ptp4l_stop $h1 + phc2sys_stop +} + +txtime_setup() +{ + local if_name=$1 + + tc qdisc add dev $if_name clsact + # Classify PTP on TC 7 and isochron on TC 6 + tc filter add dev $if_name egress protocol 0x88f7 \ + flower action skbedit priority 7 + tc filter add dev $if_name egress protocol 802.1Q \ + flower vlan_ethtype 0xdead action skbedit priority 6 + tc qdisc add dev $if_name handle 100: parent root mqprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + hw 1 + # Set up TC 5, 6, 7 for SO_TXTIME. tc-mqprio queues count from 1. + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_1 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_2 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_3 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR +} + +txtime_cleanup() +{ + local if_name=$1 + + tc qdisc del dev $if_name clsact + tc qdisc del dev $if_name root +} + +taprio_replace() +{ + local if_name="$1"; shift + local extra_args="$1"; shift + + # STREAM_PRIO_1 always has an open gate. + # STREAM_PRIO_2 has a gate open for GATE_DURATION_NS (half the cycle time) + # STREAM_PRIO_3 always has a closed gate. + tc qdisc replace dev $if_name root stab overhead 24 taprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + sched-entry S $(printf "%x" $ALL_GATES) $GATE_DURATION_NS \ + sched-entry S $(printf "%x" $((ALL_GATES & ~(1 << STREAM_PRIO_2)))) $GATE_DURATION_NS \ + base-time 0 flags 0x2 $extra_args + taprio_wait_for_admin $if_name +} + +taprio_cleanup() +{ + local if_name=$1 + + tc qdisc del dev $if_name root +} + +probe_path_delay() +{ + local isochron_dat="$(mktemp)" + local received + + log_info "Probing path delay" + + isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" 0 \ + "$CYCLE_TIME_NS" "" "" "$NUM_PKTS" \ + "$STREAM_VID" "$STREAM_PRIO_1" "" "$isochron_dat" + + received=$(isochron_report_num_received "$isochron_dat") + if [ "$received" != "$NUM_PKTS" ]; then + echo "Cannot establish basic data path between $h1 and $h2" + exit $ksft_fail + fi + + printf "pdelay = {}\n" > isochron_data.py + isochron report --input-file "$isochron_dat" \ + --printf-format "pdelay[%u] = %d - %d\n" \ + --printf-args "qRT" \ + >> isochron_data.py + cat <<-'EOF' > isochron_postprocess.py + #!/usr/bin/env python3 + + from isochron_data import pdelay + import numpy as np + + w = np.array(list(pdelay.values())) + print("{}".format(np.max(w))) + EOF + path_delay=$(python3 ./isochron_postprocess.py) + + log_info "Path delay from $h1 to $h2 estimated at $path_delay ns" + + if [ "$path_delay" -gt "$GATE_DURATION_NS" ]; then + echo "Path delay larger than gate duration, aborting" + exit $ksft_fail + fi + + rm -f ./isochron_data.py 2> /dev/null + rm -f ./isochron_postprocess.py 2> /dev/null + rm -f "$isochron_dat" 2> /dev/null +} + +setup_prepare() +{ + vrf_prepare + + h1_create + h2_create + switch_create + + txtime_setup $h1 + + # Temporarily set up PTP just to probe the end-to-end path delay. + ptp_setup + probe_path_delay + ptp_cleanup +} + +cleanup() +{ + pre_cleanup + + isochron_recv_stop + txtime_cleanup $h1 + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +run_test() +{ + local base_time=$1; shift + local stream_prio=$1; shift + local expected_delay=$1; shift + local should_fail=$1; shift + local test_name=$1; shift + local isochron_dat="$(mktemp)" + local received + local median_delay + + RET=0 + + # Set the shift time equal to the cycle time, which effectively + # cancels the default advance time. Packets won't be sent early in + # software, which ensures that they won't prematurely enter through + # the open gate in __test_out_of_band(). Also, the gate is open for + # long enough that this won't cause a problem in __test_in_band(). + isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" "$base_time" \ + "$CYCLE_TIME_NS" "$SHIFT_TIME_NS" "$GATE_DURATION_NS" \ + "$NUM_PKTS" "$STREAM_VID" "$stream_prio" "" "$isochron_dat" + + received=$(isochron_report_num_received "$isochron_dat") + [ "$received" = "$NUM_PKTS" ] + check_err_fail $should_fail $? "Reception of $NUM_PKTS packets" + + if [ $should_fail = 0 ] && [ "$received" = "$NUM_PKTS" ]; then + printf "pdelay = {}\n" > isochron_data.py + isochron report --input-file "$isochron_dat" \ + --printf-format "pdelay[%u] = %d - %d\n" \ + --printf-args "qRT" \ + >> isochron_data.py + cat <<-'EOF' > isochron_postprocess.py + #!/usr/bin/env python3 + + from isochron_data import pdelay + import numpy as np + + w = np.array(list(pdelay.values())) + print("{}".format(int(np.median(w)))) + EOF + median_delay=$(python3 ./isochron_postprocess.py) + + # If the condition below is true, packets were delayed by a closed gate + [ "$median_delay" -gt $((path_delay + expected_delay)) ] + check_fail $? "Median delay $median_delay is greater than expected delay $expected_delay plus path delay $path_delay" + + # If the condition below is true, packets were sent expecting them to + # hit a closed gate in the switch, but were not delayed + [ "$expected_delay" -gt 0 ] && [ "$median_delay" -lt "$expected_delay" ] + check_fail $? "Median delay $median_delay is less than expected delay $expected_delay" + fi + + log_test "$test_name" + + rm -f ./isochron_data.py 2> /dev/null + rm -f ./isochron_postprocess.py 2> /dev/null + rm -f "$isochron_dat" 2> /dev/null +} + +__test_always_open() +{ + run_test 0.000000000 $STREAM_PRIO_1 0 0 "Gate always open" +} + +__test_always_closed() +{ + run_test 0.000000000 $STREAM_PRIO_3 0 1 "Gate always closed" +} + +__test_in_band() +{ + # Send packets in-band with the OPEN gate entry + run_test 0.000000000 $STREAM_PRIO_2 0 0 "In band with gate" +} + +__test_out_of_band() +{ + # Send packets in-band with the CLOSE gate entry + run_test 0.005000000 $STREAM_PRIO_2 \ + $((GATE_DURATION_NS - SHIFT_TIME_NS)) 0 \ + "Out of band with gate" +} + +run_subtests() +{ + __test_always_open + __test_always_closed + __test_in_band + __test_out_of_band +} + +test_taprio_after_ptp() +{ + log_info "Setting up taprio after PTP" + ptp_setup + taprio_replace $swp2 + run_subtests + taprio_cleanup $swp2 + ptp_cleanup +} + +__test_under_max_sdu() +{ + # Limit max-sdu for STREAM_PRIO_1 + taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 100 0" + run_test 0.000000000 $STREAM_PRIO_1 0 0 "Under maximum SDU" +} + +__test_over_max_sdu() +{ + # Limit max-sdu for STREAM_PRIO_1 + taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 20 0" + run_test 0.000000000 $STREAM_PRIO_1 0 1 "Over maximum SDU" +} + +test_max_sdu() +{ + ptp_setup + __test_under_max_sdu + __test_over_max_sdu + taprio_cleanup $swp2 + ptp_cleanup +} + +# Perform a clock jump in the past without synchronization running, so that the +# time base remains where it was set by phc_ctl. +test_clock_jump_backward() +{ + # This is a more complex schedule specifically crafted in a way that + # has been problematic on NXP LS1028A. Not much to test with it other + # than the fact that it passes traffic. + tc qdisc replace dev $swp2 root stab overhead 24 taprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 map 0 1 2 3 4 5 6 7 \ + base-time 0 sched-entry S 20 300000 sched-entry S 10 200000 \ + sched-entry S 20 300000 sched-entry S 48 200000 \ + sched-entry S 20 300000 sched-entry S 83 200000 \ + sched-entry S 40 300000 sched-entry S 00 200000 flags 2 + + log_info "Forcing a backward clock jump" + phc_ctl $swp1 set 0 + + ping_test $h1 192.0.2.2 + taprio_cleanup $swp2 +} + +# Test that taprio tolerates clock jumps. +# Since ptp4l and phc2sys are running, it is expected for the time to +# eventually recover (through yet another clock jump). Isochron waits +# until that is the case. +test_clock_jump_backward_forward() +{ + log_info "Forcing a backward and a forward clock jump" + taprio_replace $swp2 + phc_ctl $swp1 set 0 + ptp_setup + ping_test $h1 192.0.2.2 + run_subtests + ptp_cleanup + taprio_cleanup $swp2 +} + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_test_skip "Could not test offloaded functionality" + exit $EXIT_STATUS +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh index b91bcd8008a9..08c044ff6689 100644 --- a/tools/testing/selftests/net/forwarding/tsn_lib.sh +++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh @@ -2,6 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright 2021-2022 NXP +tc_testing_scripts_dir=$(dirname $0)/../../tc-testing/scripts + REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes} REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes} @@ -18,6 +20,7 @@ fi if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then require_command phc2sys require_command ptp4l + require_command phc_ctl fi phc2sys_start() @@ -182,6 +185,7 @@ isochron_do() local base_time=$1; shift local cycle_time=$1; shift local shift_time=$1; shift + local window_size=$1; shift local num_pkts=$1; shift local vid=$1; shift local priority=$1; shift @@ -212,6 +216,10 @@ isochron_do() extra_args="${extra_args} --shift-time=${shift_time}" fi + if ! [ -z "${window_size}" ]; then + extra_args="${extra_args} --window-size=${window_size}" + fi + if [ "${use_l2}" = "true" ]; then extra_args="${extra_args} --l2 --etype=0xdead ${vid}" receiver_extra_args="--l2 --etype=0xdead" @@ -247,3 +255,21 @@ isochron_do() cpufreq_restore ${ISOCHRON_CPU} } + +isochron_report_num_received() +{ + local isochron_dat=$1; shift + + # Count all received packets by looking at the non-zero RX timestamps + isochron report \ + --input-file "${isochron_dat}" \ + --printf-format "%u\n" --printf-args "R" | \ + grep -w -v '0' | wc -l +} + +taprio_wait_for_admin() +{ + local if_name="$1"; shift + + "$tc_testing_scripts_dir/taprio_wait_for_admin.sh" "$(which tc)" "$if_name" +} diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index 3f9d50f1ef9e..457f41d5e584 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -428,6 +428,14 @@ __test_flood() test_flood() { __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood" + + # Add an entry with arbitrary destination IP. Verify that packets are + # not duplicated (this can happen if hardware floods the packets, and + # then traps them due to misconfiguration, so software data path repeats + # flooding and resends packets). + bridge fdb append dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self + __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood, unresolved FDB entry" + bridge fdb del dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self } vxlan_fdb_add_del() @@ -559,6 +567,21 @@ vxlan_encapped_ping_do() local inner_tos=$1; shift local outer_tos=$1; shift + local ipv4hdr=$(: + )"45:"$( : IP version + IHL + )"$inner_tos:"$( : IP TOS + )"00:54:"$( : IP total length + )"99:83:"$( : IP identification + )"40:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"01:"$( : IP proto + )"CHECKSUM:"$( : IP header csum + )"c0:00:02:03:"$( : IP saddr: 192.0.2.3 + )"c0:00:02:01"$( : IP daddr: 192.0.2.1 + ) + local checksum=$(payload_template_calc_checksum "$ipv4hdr") + ipv4hdr=$(payload_template_expand_checksum "$ipv4hdr" $checksum) + $MZ $dev -c $count -d 100msec -q \ -b $next_hop_mac -B $dest_ip \ -t udp tos=$outer_tos,sp=23456,dp=$VXPORT,p=$(: @@ -569,16 +592,7 @@ vxlan_encapped_ping_do() )"$dest_mac:"$( : ETH daddr )"$(mac_get w2):"$( : ETH saddr )"08:00:"$( : ETH type - )"45:"$( : IP version + IHL - )"$inner_tos:"$( : IP TOS - )"00:54:"$( : IP total length - )"99:83:"$( : IP identification - )"40:00:"$( : IP flags + frag off - )"40:"$( : IP TTL - )"01:"$( : IP proto - )"00:00:"$( : IP header csum - )"c0:00:02:03:"$( : IP saddr: 192.0.2.3 - )"c0:00:02:01:"$( : IP daddr: 192.0.2.1 + )"$ipv4hdr:"$( : IPv4 header )"08:"$( : ICMP type )"00:"$( : ICMP code )"8b:f2:"$( : ICMP csum @@ -740,6 +754,8 @@ test_learning() vxlan_flood_test $mac $dst 0 10 0 + # The entry should age out when it only forwards traffic + $MZ $h1 -c 50 -d 1sec -p 64 -b $mac -B $dst -t icmp -q & sleep 60 bridge fdb show brport vx1 | grep $mac | grep -q self diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh index a603f7b0a08f..e642feeada0e 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh @@ -695,7 +695,7 @@ vxlan_encapped_ping_do() )"6"$( : IP version )"$inner_tos"$( : Traffic class )"0:00:00:"$( : Flow label - )"00:08:"$( : Payload length + )"00:03:"$( : Payload length )"3a:"$( : Next header )"04:"$( : Hop limit )"$saddr:"$( : IP saddr diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh index fb9a34cb50c6..afc65647f673 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh @@ -539,6 +539,21 @@ test_flood() 10 10 0 10 0 __test_flood ca:fe:be:ef:13:37 198.51.100.100 20 "flood vlan 20" \ 10 0 10 0 10 + + # Add entries with arbitrary destination IP. Verify that packets are + # not duplicated (this can happen if hardware floods the packets, and + # then traps them due to misconfiguration, so software data path repeats + # flooding and resends packets). + bridge fdb append dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self + + __test_flood de:ad:be:ef:13:37 192.0.2.100 10 \ + "flood vlan 10, unresolved FDB entry" 10 10 0 10 0 + __test_flood ca:fe:be:ef:13:37 198.51.100.100 20 \ + "flood vlan 20, unresolved FDB entry" 10 0 10 0 10 + + bridge fdb del dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self } vxlan_fdb_add_del() diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh new file mode 100755 index 000000000000..2cf4c6d9245b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh @@ -0,0 +1,752 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------------------------+ +# | + $h1.10 + $h1.20 | +# | | 192.0.2.1/28 | 2001:db8:1::1/64 | +# | \________ ________/ | +# | \ / | +# | + $h1 H1 (vrf) | +# +-----------|-----------------------------+ +# | +# +-----------|----------------------------------------------------------------+ +# | +---------|--------------------------------------+ SWITCH (main vrf) | +# | | + $swp1 BR1 (802.1q) | | +# | | vid 10 20 | | +# | | | | +# | | + vx10 (vxlan) + vx20 (vxlan) | + lo10 (dummy) | +# | | local 192.0.2.100 local 2001:db8:4::1 | 192.0.2.100/28 | +# | | group 233.252.0.1 group ff0e::1:2:3 | 2001:db8:4::1/64 | +# | | id 1000 id 2000 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | +------------------------------------------------+ | +# | | +# | + $swp2 $swp3 + | +# | | 192.0.2.33/28 192.0.2.65/28 | | +# | | 2001:db8:2::1/64 2001:db8:3::1/64 | | +# | | | | +# +---|--------------------------------------------------------------------|---+ +# | | +# +---|--------------------------------+ +--------------------------------|---+ +# | | H2 (vrf) | | H3 (vrf) | | +# | +-|----------------------------+ | | +-----------------------------|-+ | +# | | + $h2 BR2 (802.1d) | | | | BR3 (802.1d) $h3 + | | +# | | | | | | | | +# | | + v1$h2 (veth) | | | | v1$h3 (veth) + | | +# | +-|----------------------------+ | | +-----------------------------|-+ | +# | | | | | | +# +---|--------------------------------+ +--------------------------------|---+ +# | | +# +---|--------------------------------+ +--------------------------------|---+ +# | + v2$h2 (veth) NS2 (netns) | | NS3 (netns) v2$h3 (veth) + | +# | 192.0.2.34/28 | | 192.0.2.66/28 | +# | 2001:db8:2::2/64 | | 2001:db8:3::2/64 | +# | | | | +# | +--------------------------------+ | | +--------------------------------+ | +# | | BR1 (802.1q) | | | | BR1 (802.1q) | | +# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | group 233.252.0.1 dev v2$h2 | | | | group 233.252.0.1 dev v2$h3 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | | +# | | | | | | | | +# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | | +# | | local 2001:db8:2::2 | | | | local 2001:db8:3::2 | | +# | | group ff0e::1:2:3 dev v2$h2 | | | | group ff0e::1:2:3 dev v2$h3 | | +# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | | +# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 10 20 | | | | | vid 10 20 | | +# | +--|-----------------------------+ | | +--|-----------------------------+ | +# | | | | | | +# | +--|-----------------------------+ | | +--|-----------------------------+ | +# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | | +# | | |\ | | | | |\ | | +# | | | + w2.10 | | | | | + w2.10 | | +# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | | +# | | | | | | | | | | +# | | + w2.20 | | | | + w2.20 | | +# | | 2001:db8:1::3/64 | | | | 2001:db8:1::4/64 | | +# | +--------------------------------+ | | +--------------------------------+ | +# +------------------------------------+ +------------------------------------+ +# +#shellcheck disable=SC2317 # SC doesn't see our uses of functions. + +: "${VXPORT:=4789}" +export VXPORT + +: "${GROUP4:=233.252.0.1}" +export GROUP4 + +: "${GROUP6:=ff0e::1:2:3}" +export GROUP6 + +: "${IPMR:=lo10}" + +ALL_TESTS=" + ipv4_nomcroute + ipv4_mcroute + ipv4_mcroute_changelink + ipv4_mcroute_starg + ipv4_mcroute_noroute + ipv4_mcroute_fdb + ipv4_mcroute_fdb_oif0 + ipv4_mcroute_fdb_oif0_sep + + ipv6_nomcroute + ipv6_mcroute + ipv6_mcroute_changelink + ipv6_mcroute_starg + ipv6_mcroute_noroute + ipv6_mcroute_fdb + ipv6_mcroute_fdb_oif0 + + ipv4_nomcroute_rx + ipv4_mcroute_rx + ipv4_mcroute_starg_rx + ipv4_mcroute_fdb_oif0_sep_rx + ipv4_mcroute_fdb_sep_rx + + ipv6_nomcroute_rx + ipv6_mcroute_rx + ipv6_mcroute_starg_rx + ipv6_mcroute_fdb_sep_rx +" + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + adf_simple_if_init "$h1" + + adf_ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10 + adf_ip_link_set_up "$h1.10" + adf_ip_addr_add "$h1.10" 192.0.2.1/28 + + adf_ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20 + adf_ip_link_set_up "$h1.20" + adf_ip_addr_add "$h1.20" 2001:db8:1::1/64 +} + +install_capture() +{ + local dev=$1; shift + + tc qdisc add dev "$dev" clsact + defer tc qdisc del dev "$dev" clsact + + tc filter add dev "$dev" ingress proto ip pref 104 \ + u32 match ip protocol 0x11 0xff \ + match u16 "$VXPORT" 0xffff at 0x16 \ + match u16 0x0800 0xffff at 0x30 \ + action pass + defer tc filter del dev "$dev" ingress proto ip pref 104 + + tc filter add dev "$dev" ingress proto ipv6 pref 106 \ + u32 match ip6 protocol 0x11 0xff \ + match u16 "$VXPORT" 0xffff at 0x2a \ + match u16 0x86dd 0xffff at 0x44 \ + match u8 0x11 0xff at 0x4c \ + action pass + defer tc filter del dev "$dev" ingress proto ipv6 pref 106 +} + +h2_create() +{ + # $h2 + adf_ip_link_set_up "$h2" + + # H2 + vrf_create "v$h2" + defer vrf_destroy "v$h2" + + adf_ip_link_set_up "v$h2" + + # br2 + adf_ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0 + adf_ip_link_set_master br2 "v$h2" + adf_ip_link_set_up br2 + + # $h2 + adf_ip_link_set_master "$h2" br2 + install_capture "$h2" + + # v1$h2 + adf_ip_link_set_up "v1$h2" + adf_ip_link_set_master "v1$h2" br2 +} + +h3_create() +{ + # $h3 + adf_ip_link_set_up "$h3" + + # H3 + vrf_create "v$h3" + defer vrf_destroy "v$h3" + + adf_ip_link_set_up "v$h3" + + # br3 + adf_ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0 + adf_ip_link_set_master br3 "v$h3" + adf_ip_link_set_up br3 + + # $h3 + adf_ip_link_set_master "$h3" br3 + install_capture "$h3" + + # v1$h3 + adf_ip_link_set_up "v1$h3" + adf_ip_link_set_master "v1$h3" br3 +} + +switch_create() +{ + local swp1_mac + + # br1 + swp1_mac=$(mac_get "$swp1") + adf_ip_link_add br1 type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + adf_ip_link_set_addr br1 "$swp1_mac" + adf_ip_link_set_up br1 + + # A dummy to force the IPv6 OIF=0 test to install a suitable MC route on + # $IPMR to be deterministic. Also used for the IPv6 RX!=TX ping test. + adf_ip_link_add "X$IPMR" up type dummy + + # IPMR + adf_ip_link_add "$IPMR" up type dummy + adf_ip_addr_add "$IPMR" 192.0.2.100/28 + adf_ip_addr_add "$IPMR" 2001:db8:4::1/64 + + # $swp1 + adf_ip_link_set_up "$swp1" + adf_ip_link_set_master "$swp1" br1 + adf_bridge_vlan_add vid 10 dev "$swp1" + adf_bridge_vlan_add vid 20 dev "$swp1" + + # $swp2 + adf_ip_link_set_up "$swp2" + adf_ip_addr_add "$swp2" 192.0.2.33/28 + adf_ip_addr_add "$swp2" 2001:db8:2::1/64 + + # $swp3 + adf_ip_link_set_up "$swp3" + adf_ip_addr_add "$swp3" 192.0.2.65/28 + adf_ip_addr_add "$swp3" 2001:db8:3::1/64 +} + +vx_create() +{ + local name=$1; shift + local vid=$1; shift + + adf_ip_link_add "$name" up type vxlan dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 16 \ + "$@" + adf_ip_link_set_master "$name" br1 + adf_bridge_vlan_add vid "$vid" dev "$name" pvid untagged +} +export -f vx_create + +vx10_create() +{ + vx_create vx10 10 id 1000 "$@" +} +export -f vx10_create + +vx20_create() +{ + vx_create vx20 20 id 2000 "$@" +} +export -f vx20_create + +ns_init_common() +{ + local ns=$1; shift + local if_in=$1; shift + local ipv4_in=$1; shift + local ipv6_in=$1; shift + local ipv4_host=$1; shift + local ipv6_host=$1; shift + + # v2$h2 / v2$h3 + adf_ip_link_set_up "$if_in" + adf_ip_addr_add "$if_in" "$ipv4_in" + adf_ip_addr_add "$if_in" "$ipv6_in" + + # br1 + adf_ip_link_add br1 type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + adf_ip_link_set_up br1 + + # vx10, vx20 + vx10_create local "${ipv4_in%/*}" group "$GROUP4" dev "$if_in" + vx20_create local "${ipv6_in%/*}" group "$GROUP6" dev "$if_in" + + # w1 + adf_ip_link_add w1 type veth peer name w2 + adf_ip_link_set_master w1 br1 + adf_ip_link_set_up w1 + adf_bridge_vlan_add vid 10 dev w1 + adf_bridge_vlan_add vid 20 dev w1 + + # w2 + adf_simple_if_init w2 + + # w2.10 + adf_ip_link_add w2.10 master vw2 link w2 type vlan id 10 + adf_ip_link_set_up w2.10 + adf_ip_addr_add w2.10 "$ipv4_host" + + # w2.20 + adf_ip_link_add w2.20 master vw2 link w2 type vlan id 20 + adf_ip_link_set_up w2.20 + adf_ip_addr_add w2.20 "$ipv6_host" +} +export -f ns_init_common + +ns2_create() +{ + # NS2 + ip netns add ns2 + defer ip netns del ns2 + + # v2$h2 + ip link set dev "v2$h2" netns ns2 + defer ip -n ns2 link set dev "v2$h2" netns 1 + + in_ns ns2 \ + ns_init_common ns2 "v2$h2" \ + 192.0.2.34/28 2001:db8:2::2/64 \ + 192.0.2.3/28 2001:db8:1::3/64 +} + +ns3_create() +{ + # NS3 + ip netns add ns3 + defer ip netns del ns3 + + # v2$h3 + ip link set dev "v2$h3" netns ns3 + defer ip -n ns3 link set dev "v2$h3" netns 1 + + ip -n ns3 link set dev "v2$h3" up + + in_ns ns3 \ + ns_init_common ns3 "v2$h3" \ + 192.0.2.66/28 2001:db8:3::2/64 \ + 192.0.2.4/28 2001:db8:1::4/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + adf_vrf_prepare + adf_forwarding_enable + + adf_ip_link_add "v1$h2" type veth peer name "v2$h2" + adf_ip_link_add "v1$h3" type veth peer name "v2$h3" + + h1_create + h2_create + h3_create + switch_create + ns2_create + ns3_create +} + +adf_install_broken_sg() +{ + adf_mcd_start "$IPMR" || exit "$EXIT_STATUS" + + mc_cli add "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3" + defer mc_cli remove "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3" + + mc_cli add "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3" + defer mc_cli remove "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3" +} + +adf_install_rx() +{ + mc_cli add "$swp2" 0.0.0.0 "$GROUP4" "$IPMR" + defer mc_cli remove "$swp2" 0.0.0.0 "$GROUP4" lo10 + + mc_cli add "$swp3" 0.0.0.0 "$GROUP4" "$IPMR" + defer mc_cli remove "$swp3" 0.0.0.0 "$GROUP4" lo10 + + mc_cli add "$swp2" :: "$GROUP6" "$IPMR" + defer mc_cli remove "$swp2" :: "$GROUP6" lo10 + + mc_cli add "$swp3" :: "$GROUP6" "$IPMR" + defer mc_cli remove "$swp3" :: "$GROUP6" lo10 +} + +adf_install_sg() +{ + adf_mcd_start "$IPMR" || exit "$EXIT_STATUS" + + mc_cli add "$IPMR" 192.0.2.100 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" 192.0.2.33 "$GROUP4" "$swp2" "$swp3" + + mc_cli add "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3" + + adf_install_rx +} + +adf_install_sg_sep() +{ + adf_mcd_start lo || exit "$EXIT_STATUS" + + mc_cli add lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + + mc_cli add lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" +} + +adf_install_sg_sep_rx() +{ + local lo=$1; shift + + adf_mcd_start "$IPMR" "$lo" || exit "$EXIT_STATUS" + + mc_cli add "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + + mc_cli add "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" + + adf_install_rx +} + +adf_install_starg() +{ + adf_mcd_start "$IPMR" || exit "$EXIT_STATUS" + + mc_cli add "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3" + + mc_cli add "$IPMR" :: "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" :: "$GROUP6" "$swp2" "$swp3" + + adf_install_rx +} + +do_packets_v4() +{ + local mac + + mac=$(mac_get "$h2") + "$MZ" "$h1" -Q 10 -c 10 -d 100msec -p 64 -a own -b "$mac" \ + -A 192.0.2.1 -B 192.0.2.2 -t udp sp=1234,dp=2345 -q +} + +do_packets_v6() +{ + local mac + + mac=$(mac_get "$h2") + "$MZ" -6 "$h1" -Q 20 -c 10 -d 100msec -p 64 -a own -b "$mac" \ + -A 2001:db8:1::1 -B 2001:db8:1::2 -t udp sp=1234,dp=2345 -q +} + +do_test() +{ + local ipv=$1; shift + local expect_h2=$1; shift + local expect_h3=$1; shift + local what=$1; shift + + local pref=$((100 + ipv)) + local t0_h2 + local t0_h3 + local t1_h2 + local t1_h3 + local d_h2 + local d_h3 + + RET=0 + + t0_h2=$(tc_rule_stats_get "$h2" "$pref" ingress) + t0_h3=$(tc_rule_stats_get "$h3" "$pref" ingress) + + "do_packets_v$ipv" + sleep 1 + + t1_h2=$(tc_rule_stats_get "$h2" "$pref" ingress) + t1_h3=$(tc_rule_stats_get "$h3" "$pref" ingress) + + d_h2=$((t1_h2 - t0_h2)) + d_h3=$((t1_h3 - t0_h3)) + + ((d_h2 == expect_h2)) + check_err $? "Expected $expect_h2 packets on H2, got $d_h2" + + ((d_h3 == expect_h3)) + check_err $? "Expected $expect_h3 packets on H3, got $d_h3" + + log_test "VXLAN MC flood $what" +} + +ipv4_do_test_rx() +{ + local h3_should_fail=$1; shift + local what=$1; shift + + RET=0 + + ping_do "$h1.10" 192.0.2.3 + check_err $? "H2 should respond" + + ping_do "$h1.10" 192.0.2.4 + check_err_fail "$h3_should_fail" $? "H3 responds" + + log_test "VXLAN MC flood $what" +} + +ipv6_do_test_rx() +{ + local h3_should_fail=$1; shift + local what=$1; shift + + RET=0 + + ping6_do "$h1.20" 2001:db8:1::3 + check_err $? "H2 should respond" + + ping6_do "$h1.20" 2001:db8:1::4 + check_err_fail "$h3_should_fail" $? "H3 responds" + + log_test "VXLAN MC flood $what" +} + +ipv4_nomcroute() +{ + # Install a misleading (S,G) rule to attempt to trick the system into + # pushing the packets elsewhere. + adf_install_broken_sg + vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2" + do_test 4 10 0 "IPv4 nomcroute" +} + +ipv6_nomcroute() +{ + # Like for IPv4, install a misleading (S,G). + adf_install_broken_sg + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2" + do_test 6 10 0 "IPv6 nomcroute" +} + +ipv4_nomcroute_rx() +{ + vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2" + ipv4_do_test_rx 1 "IPv4 nomcroute ping" +} + +ipv6_nomcroute_rx() +{ + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2" + ipv6_do_test_rx 1 "IPv6 nomcroute ping" +} + +ipv4_mcroute() +{ + adf_install_sg + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + do_test 4 10 10 "IPv4 mcroute" +} + +ipv6_mcroute() +{ + adf_install_sg + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + do_test 6 10 10 "IPv6 mcroute" +} + +ipv4_mcroute_rx() +{ + adf_install_sg + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + ipv4_do_test_rx 0 "IPv4 mcroute ping" +} + +ipv6_mcroute_rx() +{ + adf_install_sg + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + ipv6_do_test_rx 0 "IPv6 mcroute ping" +} + +ipv4_mcroute_changelink() +{ + adf_install_sg + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" + ip link set dev vx10 type vxlan mcroute + sleep 1 + do_test 4 10 10 "IPv4 mcroute changelink" +} + +ipv6_mcroute_changelink() +{ + adf_install_sg + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + ip link set dev vx20 type vxlan mcroute + sleep 1 + do_test 6 10 10 "IPv6 mcroute changelink" +} + +ipv4_mcroute_starg() +{ + adf_install_starg + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + do_test 4 10 10 "IPv4 mcroute (*,G)" +} + +ipv6_mcroute_starg() +{ + adf_install_starg + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + do_test 6 10 10 "IPv6 mcroute (*,G)" +} + +ipv4_mcroute_starg_rx() +{ + adf_install_starg + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + ipv4_do_test_rx 0 "IPv4 mcroute (*,G) ping" +} + +ipv6_mcroute_starg_rx() +{ + adf_install_starg + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + ipv6_do_test_rx 0 "IPv6 mcroute (*,G) ping" +} + +ipv4_mcroute_noroute() +{ + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + do_test 4 0 0 "IPv4 mcroute, no route" +} + +ipv6_mcroute_noroute() +{ + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + do_test 6 0 0 "IPv6 mcroute, no route" +} + +ipv4_mcroute_fdb() +{ + adf_install_sg + vx10_create local 192.0.2.100 dev "$IPMR" mcroute + bridge fdb add dev vx10 \ + 00:00:00:00:00:00 self static dst "$GROUP4" via "$IPMR" + do_test 4 10 10 "IPv4 mcroute FDB" +} + +ipv6_mcroute_fdb() +{ + adf_install_sg + vx20_create local 2001:db8:4::1 dev "$IPMR" mcroute + bridge -6 fdb add dev vx20 \ + 00:00:00:00:00:00 self static dst "$GROUP6" via "$IPMR" + do_test 6 10 10 "IPv6 mcroute FDB" +} + +# Use FDB to configure VXLAN in a way where oif=0 for purposes of FIB lookup. +ipv4_mcroute_fdb_oif0() +{ + adf_install_sg + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" + do_test 4 10 10 "IPv4 mcroute oif=0" +} + +ipv6_mcroute_fdb_oif0() +{ + # The IPv6 tunnel lookup does not fall back to selection by source + # address. Instead it just does a FIB match, and that would find one of + # the several ff00::/8 multicast routes -- each device has one. In order + # to reliably force the $IPMR device, add a /128 route for the + # destination group address. + ip -6 route add table local multicast "$GROUP6/128" dev "$IPMR" + defer ip -6 route del table local multicast "$GROUP6/128" dev "$IPMR" + + adf_install_sg + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + bridge -6 fdb del dev vx20 00:00:00:00:00:00 + bridge -6 fdb add dev vx20 00:00:00:00:00:00 self static dst "$GROUP6" + do_test 6 10 10 "IPv6 mcroute oif=0" +} + +# In oif=0 test as above, have FIB lookup resolve to loopback instead of IPMR. +# This doesn't work with IPv6 -- a MC route on lo would be marked as RTF_REJECT. +ipv4_mcroute_fdb_oif0_sep() +{ + adf_install_sg_sep + + adf_ip_addr_add lo 192.0.2.120/28 + vx10_create local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" + do_test 4 10 10 "IPv4 mcroute TX!=RX oif=0" +} + +ipv4_mcroute_fdb_oif0_sep_rx() +{ + adf_install_sg_sep_rx lo + + adf_ip_addr_add lo 192.0.2.120/28 + vx10_create local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" + ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX oif=0 ping" +} + +ipv4_mcroute_fdb_sep_rx() +{ + adf_install_sg_sep_rx lo + + adf_ip_addr_add lo 192.0.2.120/28 + vx10_create local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add \ + dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" via lo + ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX ping" +} + +ipv6_mcroute_fdb_sep_rx() +{ + adf_install_sg_sep_rx "X$IPMR" + + adf_ip_addr_add "X$IPMR" 2001:db8:5::1/64 + vx20_create local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute + bridge -6 fdb del dev vx20 00:00:00:00:00:00 + bridge -6 fdb add dev vx20 00:00:00:00:00:00 \ + self static dst "$GROUP6" via "X$IPMR" + ipv6_do_test_rx 0 "IPv6 mcroute TX!=RX ping" +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh index 46c31794b91b..709845123727 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh @@ -47,8 +47,7 @@ source lib.sh h1_create() { - simple_if_init $h1 192.0.2.1/28 - defer simple_if_fini $h1 192.0.2.1/28 + adf_simple_if_init $h1 192.0.2.1/28 tc qdisc add dev $h1 clsact defer tc qdisc del dev $h1 clsact @@ -60,24 +59,23 @@ h1_create() switch_create() { - ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 + adf_ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 # Make sure the bridge uses the MAC address of the local port and not # that of the VxLAN's device. - ip_link_set_addr br1 $(mac_get $swp1) - ip_link_set_up br1 + adf_ip_link_set_addr br1 $(mac_get $swp1) + adf_ip_link_set_up br1 - ip_link_set_up $rp1 - ip_addr_add $rp1 192.0.2.17/28 - ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18 + adf_ip_link_set_up $rp1 + adf_ip_addr_add $rp1 192.0.2.17/28 + adf_ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18 - ip_link_set_master $swp1 br1 - ip_link_set_up $swp1 + adf_ip_link_set_master $swp1 br1 + adf_ip_link_set_up $swp1 } vrp2_create() { - simple_if_init $rp2 192.0.2.18/28 - defer simple_if_fini $rp2 192.0.2.18/28 + adf_simple_if_init $rp2 192.0.2.18/28 } setup_prepare() @@ -88,11 +86,8 @@ setup_prepare() rp1=${NETIFS[p3]} rp2=${NETIFS[p4]} - vrf_prepare - defer vrf_cleanup - - forwarding_enable - defer forwarding_restore + adf_vrf_prepare + adf_forwarding_enable h1_create switch_create @@ -200,10 +195,10 @@ vxlan_ping_do() vxlan_device_add() { - ip_link_add vx1 up type vxlan id 1000 \ + adf_ip_link_add vx1 up type vxlan id 1000 \ local 192.0.2.17 dstport "$VXPORT" \ nolearning noudpcsum tos inherit ttl 100 "$@" - ip_link_set_master vx1 br1 + adf_ip_link_set_master vx1 br1 } vxlan_all_reserved_bits() diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh index 977070ed42b3..223f9efe4090 100755 --- a/tools/testing/selftests/net/fq_band_pktlimit.sh +++ b/tools/testing/selftests/net/fq_band_pktlimit.sh @@ -32,19 +32,19 @@ tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 DELAY=400000 ./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000 -OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" +OUT1="$(tc -s qdisc show dev dummy0 | grep '^ Sent')" ./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000 -OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" +OUT2="$(tc -s qdisc show dev dummy0 | grep '^ Sent')" ./cmsg_sender -6 -p u -d "${DELAY}" -n 20 -P 7 fdaa::2 8000 -OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" +OUT3="$(tc -s qdisc show dev dummy0 | grep '^ Sent')" # Initial stats will report zero sent, as all packets are still # queued in FQ. Sleep for at least the delay period and see that # twenty are now sent. sleep 0.6 -OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" +OUT4="$(tc -s qdisc show dev dummy0 | grep '^ Sent')" # Log the output after the test echo "${OUT1}" @@ -53,7 +53,7 @@ echo "${OUT3}" echo "${OUT4}" # Test the output for expected values -echo "${OUT1}" | grep -q '0\ pkt\ (dropped\ 10' || die "unexpected drop count at 1" -echo "${OUT2}" | grep -q '0\ pkt\ (dropped\ 30' || die "unexpected drop count at 2" -echo "${OUT3}" | grep -q '0\ pkt\ (dropped\ 40' || die "unexpected drop count at 3" -echo "${OUT4}" | grep -q '20\ pkt\ (dropped\ 40' || die "unexpected accept count at 4" +echo "${OUT1}" | grep -q '0 pkt (dropped 10' || die "unexpected drop count at 1" +echo "${OUT2}" | grep -q '0 pkt (dropped 30' || die "unexpected drop count at 2" +echo "${OUT3}" | grep -q '0 pkt (dropped 40' || die "unexpected drop count at 3" +echo "${OUT4}" | grep -q '20 pkt (dropped 40' || die "unexpected accept count at 4" diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh new file mode 100755 index 000000000000..48eb999a3120 --- /dev/null +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -0,0 +1,184 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./lib.sh + +PAUSE_ON_FAIL="no" + +# The trap function handler +# +exit_cleanup_all() +{ + cleanup_all_ns + + exit "${EXIT_STATUS}" +} + +# Add fake IPv4 and IPv6 networks on the loopback device, to be used as +# underlay by future GRE devices. +# +setup_basenet() +{ + ip -netns "${NS0}" link set dev lo up + ip -netns "${NS0}" address add dev lo 192.0.2.10/24 + ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad +} + +# Check the IPv6 configuration of a network device. +# +# We currently check the generation of the link-local IPv6 address and the +# creation of the ff00::/8 multicast route. +# +# Parameters: +# +# * $1: The network device to test +# * $2: An extra regular expression that should be matched (to verify the +# presence of extra attributes) +# * $3: The expected return code from grep (to allow checking the absence of +# a link-local address) +# * $4: The user visible name for the scenario being tested +# +check_ipv6_device_config() +{ + local DEV="$1" + local EXTRA_MATCH="$2" + local XRET="$3" + local MSG="$4" + + RET=0 + set +e + ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" + check_err_fail "${XRET}" $? "IPv6 link-local address generation" + + ip -netns "${NS0}" -6 route show table local type multicast ff00::/8 proto kernel | grep -q "${DEV}" + check_err_fail 0 $? "IPv6 multicast route creation" + + log_test "${MSG}" + set -e +} + +# Create a GRE device and verify that it gets an IPv6 link-local address as +# expected. +# +# Parameters: +# +# * $1: The device type (gre, ip6gre, gretap or ip6gretap) +# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any") +# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any") +# * $4: The IPv6 interface identifier generation mode to use for the GRE +# device (eui64, none, stable-privacy or random). +# +test_gre_device() +{ + local GRE_TYPE="$1" + local LOCAL_IP="$2" + local REMOTE_IP="$3" + local MODE="$4" + local ADDR_GEN_MODE + local MATCH_REGEXP + local MSG + + ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}" + + case "${MODE}" in + "eui64") + ADDR_GEN_MODE=0 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + "none") + ADDR_GEN_MODE=1 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=1 # No link-local address should be generated + ;; + "stable-privacy") + ADDR_GEN_MODE=2 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + # Initialise stable_secret (required for stable-privacy mode) + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd" + ;; + "random") + ADDR_GEN_MODE=3 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + esac + + # Check the IPv6 device configuration when it goes up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + ip -netns "${NS0}" link set dev gretest up + check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + + # Now disable link-local address generation + ip -netns "${NS0}" link set dev gretest down + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 + ip -netns "${NS0}" link set dev gretest up + + # Check the IPv6 device configuration when link-local address + # generation is re-enabled while the device is already up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + + ip -netns "${NS0}" link del dev gretest +} + +test_gre4() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "gre" "gretap"; do + printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}" + done + done +} + +test_gre6() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "ip6gre" "ip6gretap"; do + printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}" + done + done +} + +usage() +{ + echo "Usage: $0 [-p]" + exit 1 +} + +while getopts :p o +do + case $o in + p) PAUSE_ON_FAIL="yes";; + *) usage;; + esac +done + +setup_ns NS0 + +set -e +trap exit_cleanup_all EXIT + +setup_basenet + +test_gre4 +test_gre6 diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh deleted file mode 100755 index 02c21ff4ca81..000000000000 --- a/tools/testing/selftests/net/gro.sh +++ /dev/null @@ -1,104 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -readonly SERVER_MAC="aa:00:00:00:00:02" -readonly CLIENT_MAC="aa:00:00:00:00:01" -readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large") -readonly PROTOS=("ipv4" "ipv6") -dev="" -test="all" -proto="ipv4" - -run_test() { - local server_pid=0 - local exit_code=0 - local protocol=$1 - local test=$2 - local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \ - "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" ) - - setup_ns - # Each test is run 3 times to deflake, because given the receive timing, - # not all packets that should coalesce will be considered in the same flow - # on every try. - for tries in {1..3}; do - # Actual test starts here - ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \ - 1>>log.txt & - server_pid=$! - sleep 0.5 # to allow for socket init - ip netns exec $client_ns ./gro "${ARGS[@]}" "--iface" "client" \ - 1>>log.txt - wait "${server_pid}" - exit_code=$? - if [[ ${test} == "large" && -n "${KSFT_MACHINE_SLOW}" && \ - ${exit_code} -ne 0 ]]; then - echo "Ignoring errors due to slow environment" 1>&2 - exit_code=0 - fi - if [[ "${exit_code}" -eq 0 ]]; then - break; - fi - done - cleanup_ns - echo ${exit_code} -} - -run_all_tests() { - local failed_tests=() - for proto in "${PROTOS[@]}"; do - for test in "${TESTS[@]}"; do - echo "running test ${proto} ${test}" >&2 - exit_code=$(run_test $proto $test) - if [[ "${exit_code}" -ne 0 ]]; then - failed_tests+=("${proto}_${test}") - fi; - done; - done - if [[ ${#failed_tests[@]} -ne 0 ]]; then - echo "failed tests: ${failed_tests[*]}. \ - Please see log.txt for more logs" - exit 1 - else - echo "All Tests Succeeded!" - fi; -} - -usage() { - echo "Usage: $0 \ - [-i <DEV>] \ - [-t data|ack|flags|tcp|ip|large] \ - [-p <ipv4|ipv6>]" 1>&2; - exit 1; -} - -while getopts "i:t:p:" opt; do - case "${opt}" in - i) - dev="${OPTARG}" - ;; - t) - test="${OPTARG}" - ;; - p) - proto="${OPTARG}" - ;; - *) - usage - ;; - esac -done - -if [ -n "$dev" ]; then - source setup_loopback.sh -else - source setup_veth.sh -fi - -setup -trap cleanup EXIT -if [[ "${test}" == "all" ]]; then - run_all_tests -else - run_test "${proto}" "${test}" -fi; diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile index 884cd2cc0681..31fb9326cf53 100644 --- a/tools/testing/selftests/net/hsr/Makefile +++ b/tools/testing/selftests/net/hsr/Makefile @@ -2,7 +2,13 @@ top_srcdir = ../../../../.. -TEST_PROGS := hsr_ping.sh hsr_redbox.sh +TEST_PROGS := \ + hsr_ping.sh \ + hsr_redbox.sh \ + link_faults.sh \ + prp_ping.sh \ +# end of TEST_PROGS + TEST_FILES += hsr_common.sh include ../../lib.mk diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config index 555a868743f0..205cc4d3d64b 100644 --- a/tools/testing/selftests/net/hsr/config +++ b/tools/testing/selftests/net/hsr/config @@ -1,6 +1,6 @@ +CONFIG_BRIDGE=y +CONFIG_HSR=y CONFIG_IPV6=y CONFIG_NET_SCH_NETEM=m -CONFIG_HSR=y CONFIG_VETH=y -CONFIG_BRIDGE=y CONFIG_VLAN_8021Q=m diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index 5a65f4f836be..f4d685df4345 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -27,31 +27,34 @@ while getopts "$optstring" option;do esac done -do_complete_ping_test() +do_ping_tests() { - echo "INFO: Initial validation ping." - # Each node has to be able each one. - do_ping "$ns1" 100.64.0.2 - do_ping "$ns2" 100.64.0.1 - do_ping "$ns3" 100.64.0.1 - stop_if_error "Initial validation failed." - - do_ping "$ns1" 100.64.0.3 - do_ping "$ns2" 100.64.0.3 - do_ping "$ns3" 100.64.0.2 + local netid="$1" - do_ping "$ns1" dead:beef:1::2 - do_ping "$ns1" dead:beef:1::3 - do_ping "$ns2" dead:beef:1::1 - do_ping "$ns2" dead:beef:1::2 - do_ping "$ns3" dead:beef:1::1 - do_ping "$ns3" dead:beef:1::2 + echo "INFO: Running ping tests." - stop_if_error "Initial validation failed." + echo "INFO: Initial validation ping." + # Each node has to be able to reach each one. + do_ping "$ns1" "100.64.$netid.2" + do_ping "$ns1" "100.64.$netid.3" + do_ping "$ns2" "100.64.$netid.1" + do_ping "$ns2" "100.64.$netid.3" + do_ping "$ns3" "100.64.$netid.1" + do_ping "$ns3" "100.64.$netid.2" + stop_if_error "Initial validation failed on IPv4." + + do_ping "$ns1" "dead:beef:$netid::2" + do_ping "$ns1" "dead:beef:$netid::3" + do_ping "$ns2" "dead:beef:$netid::1" + do_ping "$ns2" "dead:beef:$netid::2" + do_ping "$ns3" "dead:beef:$netid::1" + do_ping "$ns3" "dead:beef:$netid::2" + stop_if_error "Initial validation failed on IPv6." # Wait until supervisor all supervision frames have been processed and the node # entries have been merged. Otherwise duplicate frames will be observed which is # valid at this stage. + echo "INFO: Wait for node table entries to be merged." WAIT=5 while [ ${WAIT} -gt 0 ] do @@ -68,62 +71,30 @@ do_complete_ping_test() sleep 1 echo "INFO: Longer ping test." - do_ping_long "$ns1" 100.64.0.2 - do_ping_long "$ns1" dead:beef:1::2 - do_ping_long "$ns1" 100.64.0.3 - do_ping_long "$ns1" dead:beef:1::3 - - stop_if_error "Longer ping test failed." - - do_ping_long "$ns2" 100.64.0.1 - do_ping_long "$ns2" dead:beef:1::1 - do_ping_long "$ns2" 100.64.0.3 - do_ping_long "$ns2" dead:beef:1::2 - stop_if_error "Longer ping test failed." - - do_ping_long "$ns3" 100.64.0.1 - do_ping_long "$ns3" dead:beef:1::1 - do_ping_long "$ns3" 100.64.0.2 - do_ping_long "$ns3" dead:beef:1::2 - stop_if_error "Longer ping test failed." - - echo "INFO: Cutting one link." - do_ping_long "$ns1" 100.64.0.3 & - - sleep 3 - ip -net "$ns3" link set ns3eth1 down - wait - - ip -net "$ns3" link set ns3eth1 up - - stop_if_error "Failed with one link down." - - echo "INFO: Delay the link and drop a few packages." - tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms - tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25% - - do_ping_long "$ns1" 100.64.0.2 - do_ping_long "$ns1" 100.64.0.3 - - stop_if_error "Failed with delay and packetloss." - - do_ping_long "$ns2" 100.64.0.1 - do_ping_long "$ns2" 100.64.0.3 - - stop_if_error "Failed with delay and packetloss." - - do_ping_long "$ns3" 100.64.0.1 - do_ping_long "$ns3" 100.64.0.2 - stop_if_error "Failed with delay and packetloss." - - echo "INFO: All good." + do_ping_long "$ns1" "100.64.$netid.2" + do_ping_long "$ns1" "dead:beef:$netid::2" + do_ping_long "$ns1" "100.64.$netid.3" + do_ping_long "$ns1" "dead:beef:$netid::3" + stop_if_error "Longer ping test failed (ns1)." + + do_ping_long "$ns2" "100.64.$netid.1" + do_ping_long "$ns2" "dead:beef:$netid::1" + do_ping_long "$ns2" "100.64.$netid.3" + do_ping_long "$ns2" "dead:beef:$netid::3" + stop_if_error "Longer ping test failed (ns2)." + + do_ping_long "$ns3" "100.64.$netid.1" + do_ping_long "$ns3" "dead:beef:$netid::1" + do_ping_long "$ns3" "100.64.$netid.2" + do_ping_long "$ns3" "dead:beef:$netid::2" + stop_if_error "Longer ping test failed (ns3)." } setup_hsr_interfaces() { local HSRv="$1" - echo "INFO: preparing interfaces for HSRv${HSRv}." + echo "INFO: Preparing interfaces for HSRv${HSRv}." # Three HSR nodes. Each node has one link to each of its neighbour, two links in total. # # ns1eth1 ----- ns2eth1 @@ -140,17 +111,20 @@ setup_hsr_interfaces() ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2" # HSRv0/1 - ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version $HSRv proto 0 - ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version $HSRv proto 0 - ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version $HSRv proto 0 + ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 \ + slave2 ns1eth2 supervision 45 version "$HSRv" proto 0 + ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 \ + slave2 ns2eth2 supervision 45 version "$HSRv" proto 0 + ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 \ + slave2 ns3eth2 supervision 45 version "$HSRv" proto 0 # IP for HSR ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1 - ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad + ip -net "$ns1" addr add dead:beef:0::1/64 dev hsr1 nodad ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2 - ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad + ip -net "$ns2" addr add dead:beef:0::2/64 dev hsr2 nodad ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 - ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad + ip -net "$ns3" addr add dead:beef:0::3/64 dev hsr3 nodad ip -net "$ns1" link set address 00:11:22:00:01:01 dev ns1eth1 ip -net "$ns1" link set address 00:11:22:00:01:02 dev ns1eth2 @@ -177,113 +151,56 @@ setup_hsr_interfaces() setup_vlan_interfaces() { ip -net "$ns1" link add link hsr1 name hsr1.2 type vlan id 2 - ip -net "$ns1" link add link hsr1 name hsr1.3 type vlan id 3 - ip -net "$ns1" link add link hsr1 name hsr1.4 type vlan id 4 - ip -net "$ns1" link add link hsr1 name hsr1.5 type vlan id 5 - ip -net "$ns2" link add link hsr2 name hsr2.2 type vlan id 2 - ip -net "$ns2" link add link hsr2 name hsr2.3 type vlan id 3 - ip -net "$ns2" link add link hsr2 name hsr2.4 type vlan id 4 - ip -net "$ns2" link add link hsr2 name hsr2.5 type vlan id 5 - ip -net "$ns3" link add link hsr3 name hsr3.2 type vlan id 2 - ip -net "$ns3" link add link hsr3 name hsr3.3 type vlan id 3 - ip -net "$ns3" link add link hsr3 name hsr3.4 type vlan id 4 - ip -net "$ns3" link add link hsr3 name hsr3.5 type vlan id 5 ip -net "$ns1" addr add 100.64.2.1/24 dev hsr1.2 - ip -net "$ns1" addr add 100.64.3.1/24 dev hsr1.3 - ip -net "$ns1" addr add 100.64.4.1/24 dev hsr1.4 - ip -net "$ns1" addr add 100.64.5.1/24 dev hsr1.5 + ip -net "$ns1" addr add dead:beef:2::1/64 dev hsr1.2 nodad ip -net "$ns2" addr add 100.64.2.2/24 dev hsr2.2 - ip -net "$ns2" addr add 100.64.3.2/24 dev hsr2.3 - ip -net "$ns2" addr add 100.64.4.2/24 dev hsr2.4 - ip -net "$ns2" addr add 100.64.5.2/24 dev hsr2.5 + ip -net "$ns2" addr add dead:beef:2::2/64 dev hsr2.2 nodad ip -net "$ns3" addr add 100.64.2.3/24 dev hsr3.2 - ip -net "$ns3" addr add 100.64.3.3/24 dev hsr3.3 - ip -net "$ns3" addr add 100.64.4.3/24 dev hsr3.4 - ip -net "$ns3" addr add 100.64.5.3/24 dev hsr3.5 + ip -net "$ns3" addr add dead:beef:2::3/64 dev hsr3.2 nodad ip -net "$ns1" link set dev hsr1.2 up - ip -net "$ns1" link set dev hsr1.3 up - ip -net "$ns1" link set dev hsr1.4 up - ip -net "$ns1" link set dev hsr1.5 up - ip -net "$ns2" link set dev hsr2.2 up - ip -net "$ns2" link set dev hsr2.3 up - ip -net "$ns2" link set dev hsr2.4 up - ip -net "$ns2" link set dev hsr2.5 up - ip -net "$ns3" link set dev hsr3.2 up - ip -net "$ns3" link set dev hsr3.3 up - ip -net "$ns3" link set dev hsr3.4 up - ip -net "$ns3" link set dev hsr3.5 up } -hsr_vlan_ping() { - do_ping "$ns1" 100.64.2.2 - do_ping "$ns1" 100.64.3.2 - do_ping "$ns1" 100.64.4.2 - do_ping "$ns1" 100.64.5.2 - - do_ping "$ns1" 100.64.2.3 - do_ping "$ns1" 100.64.3.3 - do_ping "$ns1" 100.64.4.3 - do_ping "$ns1" 100.64.5.3 - - do_ping "$ns2" 100.64.2.1 - do_ping "$ns2" 100.64.3.1 - do_ping "$ns2" 100.64.4.1 - do_ping "$ns2" 100.64.5.1 - - do_ping "$ns2" 100.64.2.3 - do_ping "$ns2" 100.64.3.3 - do_ping "$ns2" 100.64.4.3 - do_ping "$ns2" 100.64.5.3 - - do_ping "$ns3" 100.64.2.1 - do_ping "$ns3" 100.64.3.1 - do_ping "$ns3" 100.64.4.1 - do_ping "$ns3" 100.64.5.1 - - do_ping "$ns3" 100.64.2.2 - do_ping "$ns3" 100.64.3.2 - do_ping "$ns3" 100.64.4.2 - do_ping "$ns3" 100.64.5.2 +run_ping_tests() +{ + echo "INFO: Running ping tests." + do_ping_tests 0 } -run_vlan_tests() { +run_vlan_tests() +{ vlan_challenged_hsr1=$(ip net exec "$ns1" ethtool -k hsr1 | grep "vlan-challenged" | awk '{print $2}') vlan_challenged_hsr2=$(ip net exec "$ns2" ethtool -k hsr2 | grep "vlan-challenged" | awk '{print $2}') vlan_challenged_hsr3=$(ip net exec "$ns3" ethtool -k hsr3 | grep "vlan-challenged" | awk '{print $2}') if [[ "$vlan_challenged_hsr1" = "off" || "$vlan_challenged_hsr2" = "off" || "$vlan_challenged_hsr3" = "off" ]]; then - echo "INFO: Running VLAN tests" + echo "INFO: Running VLAN ping tests" setup_vlan_interfaces - hsr_vlan_ping + do_ping_tests 2 else echo "INFO: Not Running VLAN tests as the device does not support VLAN" fi } check_prerequisites -setup_ns ns1 ns2 ns3 - trap cleanup_all_ns EXIT +setup_ns ns1 ns2 ns3 setup_hsr_interfaces 0 -do_complete_ping_test - +run_ping_tests run_vlan_tests setup_ns ns1 ns2 ns3 - setup_hsr_interfaces 1 -do_complete_ping_test - +run_ping_tests run_vlan_tests exit $ret diff --git a/tools/testing/selftests/net/hsr/link_faults.sh b/tools/testing/selftests/net/hsr/link_faults.sh new file mode 100755 index 000000000000..be526281571c --- /dev/null +++ b/tools/testing/selftests/net/hsr/link_faults.sh @@ -0,0 +1,378 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2329 + +source ../lib.sh + +ALL_TESTS=" + test_clean_hsrv0 + test_cut_link_hsrv0 + test_packet_loss_hsrv0 + test_high_packet_loss_hsrv0 + test_reordering_hsrv0 + + test_clean_hsrv1 + test_cut_link_hsrv1 + test_packet_loss_hsrv1 + test_high_packet_loss_hsrv1 + test_reordering_hsrv1 + + test_clean_prp + test_cut_link_prp + test_packet_loss_prp + test_high_packet_loss_prp + test_reordering_prp +" + +# The tests are running ping for 5sec with a relatively short interval in +# different scenarios with faulty links (cut links, packet loss, delay, +# reordering) that should be recoverable by HSR/PRP. The ping interval (10ms) +# is short enough that the base delay (50ms) leads to a queue in the netem +# qdiscs which is needed for reordering. + +setup_hsr_topo() +{ + # Three HSR nodes in a ring, every node has a LAN A interface connected + # to the LAN B interface of the next node. + # + # node1 node2 + # + # vethA -------- vethB + # hsr1 hsr2 + # vethB vethA + # \ / + # vethA vethB + # hsr3 + # + # node3 + + local ver="$1" + + setup_ns node1 node2 node3 + + # veth links + # shellcheck disable=SC2154 # variables assigned by setup_ns + ip link add vethA netns "$node1" type veth peer name vethB netns "$node2" + # shellcheck disable=SC2154 # variables assigned by setup_ns + ip link add vethA netns "$node2" type veth peer name vethB netns "$node3" + ip link add vethA netns "$node3" type veth peer name vethB netns "$node1" + + # MAC addresses (not needed for HSR operation, but helps with debugging) + ip -net "$node1" link set address 00:11:22:00:01:01 dev vethA + ip -net "$node1" link set address 00:11:22:00:01:02 dev vethB + + ip -net "$node2" link set address 00:11:22:00:02:01 dev vethA + ip -net "$node2" link set address 00:11:22:00:02:02 dev vethB + + ip -net "$node3" link set address 00:11:22:00:03:01 dev vethA + ip -net "$node3" link set address 00:11:22:00:03:02 dev vethB + + # HSR interfaces + ip -net "$node1" link add name hsr1 type hsr proto 0 version "$ver" \ + slave1 vethA slave2 vethB supervision 45 + ip -net "$node2" link add name hsr2 type hsr proto 0 version "$ver" \ + slave1 vethA slave2 vethB supervision 45 + ip -net "$node3" link add name hsr3 type hsr proto 0 version "$ver" \ + slave1 vethA slave2 vethB supervision 45 + + # IP addresses + ip -net "$node1" addr add 100.64.0.1/24 dev hsr1 + ip -net "$node2" addr add 100.64.0.2/24 dev hsr2 + ip -net "$node3" addr add 100.64.0.3/24 dev hsr3 + + # Set all links up + ip -net "$node1" link set vethA up + ip -net "$node1" link set vethB up + ip -net "$node1" link set hsr1 up + + ip -net "$node2" link set vethA up + ip -net "$node2" link set vethB up + ip -net "$node2" link set hsr2 up + + ip -net "$node3" link set vethA up + ip -net "$node3" link set vethB up + ip -net "$node3" link set hsr3 up +} + +setup_prp_topo() +{ + # Two PRP nodes, connected by two links (treated as LAN A and LAN B). + # + # vethA ----- vethA + # prp1 prp2 + # vethB ----- vethB + # + # node1 node2 + + setup_ns node1 node2 + + # veth links + ip link add vethA netns "$node1" type veth peer name vethA netns "$node2" + ip link add vethB netns "$node1" type veth peer name vethB netns "$node2" + + # MAC addresses will be copied from LAN A interface + ip -net "$node1" link set address 00:11:22:00:00:01 dev vethA + ip -net "$node2" link set address 00:11:22:00:00:02 dev vethA + + # PRP interfaces + ip -net "$node1" link add name prp1 type hsr \ + slave1 vethA slave2 vethB supervision 45 proto 1 + ip -net "$node2" link add name prp2 type hsr \ + slave1 vethA slave2 vethB supervision 45 proto 1 + + # IP addresses + ip -net "$node1" addr add 100.64.0.1/24 dev prp1 + ip -net "$node2" addr add 100.64.0.2/24 dev prp2 + + # All links up + ip -net "$node1" link set vethA up + ip -net "$node1" link set vethB up + ip -net "$node1" link set prp1 up + + ip -net "$node2" link set vethA up + ip -net "$node2" link set vethB up + ip -net "$node2" link set prp2 up +} + +wait_for_hsr_node_table() +{ + log_info "Wait for node table entries to be merged." + WAIT=5 + while [ "${WAIT}" -gt 0 ]; do + nts=$(cat /sys/kernel/debug/hsr/hsr*/node_table) + + # We need entries in the node tables, and they need to be merged + if (echo "$nts" | grep -qE "^([0-9a-f]{2}:){5}") && \ + ! (echo "$nts" | grep -q "00:00:00:00:00:00"); then + return + fi + + sleep 1 + ((WAIT--)) + done + check_err 1 "Failed to wait for merged node table entries" +} + +setup_topo() +{ + local proto="$1" + + if [ "$proto" = "HSRv0" ]; then + setup_hsr_topo 0 + wait_for_hsr_node_table + elif [ "$proto" = "HSRv1" ]; then + setup_hsr_topo 1 + wait_for_hsr_node_table + elif [ "$proto" = "PRP" ]; then + setup_prp_topo + else + check_err 1 "Unknown protocol (${proto})" + fi +} + +check_ping() +{ + local node="$1" + local dst="$2" + local accepted_dups="$3" + local ping_args="-q -i 0.01 -c 400" + + log_info "Running ping $node -> $dst" + # shellcheck disable=SC2086 + output=$(ip netns exec "$node" ping $ping_args "$dst" | \ + grep "packets transmitted") + log_info "$output" + + dups=0 + loss=0 + + if [[ "$output" =~ \+([0-9]+)" duplicates" ]]; then + dups="${BASH_REMATCH[1]}" + fi + if [[ "$output" =~ ([0-9\.]+\%)" packet loss" ]]; then + loss="${BASH_REMATCH[1]}" + fi + + if [ "$dups" -gt "$accepted_dups" ]; then + check_err 1 "Unexpected duplicate packets (${dups})" + fi + if [ "$loss" != "0%" ]; then + check_err 1 "Unexpected packet loss (${loss})" + fi +} + +test_clean() +{ + local proto="$1" + + RET=0 + tname="${FUNCNAME[0]} - ${proto}" + + setup_topo "$proto" + if ((RET != ksft_pass)); then + log_test "${tname} setup" + return + fi + + check_ping "$node1" "100.64.0.2" 0 + + log_test "${tname}" +} + +test_clean_hsrv0() +{ + test_clean "HSRv0" +} + +test_clean_hsrv1() +{ + test_clean "HSRv1" +} + +test_clean_prp() +{ + test_clean "PRP" +} + +test_cut_link() +{ + local proto="$1" + + RET=0 + tname="${FUNCNAME[0]} - ${proto}" + + setup_topo "$proto" + if ((RET != ksft_pass)); then + log_test "${tname} setup" + return + fi + + # Cutting link from subshell, so check_ping can run in the normal shell + # with access to global variables from the test harness. + ( + sleep 2 + log_info "Cutting link" + ip -net "$node1" link set vethB down + ) & + check_ping "$node1" "100.64.0.2" 0 + + wait + log_test "${tname}" +} + + +test_cut_link_hsrv0() +{ + test_cut_link "HSRv0" +} + +test_cut_link_hsrv1() +{ + test_cut_link "HSRv1" +} + +test_cut_link_prp() +{ + test_cut_link "PRP" +} + +test_packet_loss() +{ + local proto="$1" + local loss="$2" + + RET=0 + tname="${FUNCNAME[0]} - ${proto}, ${loss}" + + setup_topo "$proto" + if ((RET != ksft_pass)); then + log_test "${tname} setup" + return + fi + + # Packet loss with lower delay makes sure the packets on the lossy link + # arrive first. + tc -net "$node1" qdisc add dev vethA root netem delay 50ms + tc -net "$node1" qdisc add dev vethB root netem delay 20ms loss "$loss" + + check_ping "$node1" "100.64.0.2" 40 + + log_test "${tname}" +} + +test_packet_loss_hsrv0() +{ + test_packet_loss "HSRv0" "20%" +} + +test_packet_loss_hsrv1() +{ + test_packet_loss "HSRv1" "20%" +} + +test_packet_loss_prp() +{ + test_packet_loss "PRP" "20%" +} + +test_high_packet_loss_hsrv0() +{ + test_packet_loss "HSRv0" "80%" +} + +test_high_packet_loss_hsrv1() +{ + test_packet_loss "HSRv1" "80%" +} + +test_high_packet_loss_prp() +{ + test_packet_loss "PRP" "80%" +} + +test_reordering() +{ + local proto="$1" + + RET=0 + tname="${FUNCNAME[0]} - ${proto}" + + setup_topo "$proto" + if ((RET != ksft_pass)); then + log_test "${tname} setup" + return + fi + + tc -net "$node1" qdisc add dev vethA root netem delay 50ms + tc -net "$node1" qdisc add dev vethB root netem delay 50ms reorder 20% + + check_ping "$node1" "100.64.0.2" 40 + + log_test "${tname}" +} + +test_reordering_hsrv0() +{ + test_reordering "HSRv0" +} + +test_reordering_hsrv1() +{ + test_reordering "HSRv1" +} + +test_reordering_prp() +{ + test_reordering "PRP" +} + +cleanup() +{ + cleanup_all_ns +} + +trap cleanup EXIT + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/hsr/prp_ping.sh b/tools/testing/selftests/net/hsr/prp_ping.sh new file mode 100755 index 000000000000..fd2ba9f05d4c --- /dev/null +++ b/tools/testing/selftests/net/hsr/prp_ping.sh @@ -0,0 +1,147 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ipv6=true + +source ./hsr_common.sh + +optstring="h4" +usage() { + echo "Usage: $0 [OPTION]" + echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)" +} + +while getopts "$optstring" option;do + case "$option" in + "h") + usage "$0" + exit 0 + ;; + "4") + ipv6=false + ;; + "?") + usage "$0" + exit 1 + ;; +esac +done + +setup_prp_interfaces() +{ + echo "INFO: Preparing interfaces for PRP" +# Two PRP nodes, connected by two links (treated as LAN A and LAN B). +# +# vethA ----- vethA +# prp1 prp2 +# vethB ----- vethB +# +# node1 node2 + + # Interfaces + # shellcheck disable=SC2154 # variables assigned by setup_ns + ip link add vethA netns "$node1" type veth peer name vethA netns "$node2" + ip link add vethB netns "$node1" type veth peer name vethB netns "$node2" + + # MAC addresses will be copied from LAN A interface + ip -net "$node1" link set address 00:11:22:00:00:01 dev vethA + ip -net "$node2" link set address 00:11:22:00:00:02 dev vethA + + # PRP + ip -net "$node1" link add name prp1 type hsr \ + slave1 vethA slave2 vethB supervision 45 proto 1 + ip -net "$node2" link add name prp2 type hsr \ + slave1 vethA slave2 vethB supervision 45 proto 1 + + # IP addresses + ip -net "$node1" addr add 100.64.0.1/24 dev prp1 + ip -net "$node1" addr add dead:beef:0::1/64 dev prp1 nodad + ip -net "$node2" addr add 100.64.0.2/24 dev prp2 + ip -net "$node2" addr add dead:beef:0::2/64 dev prp2 nodad + + # All links up + ip -net "$node1" link set vethA up + ip -net "$node1" link set vethB up + ip -net "$node1" link set prp1 up + + ip -net "$node2" link set vethA up + ip -net "$node2" link set vethB up + ip -net "$node2" link set prp2 up +} + +setup_vlan_interfaces() +{ + # Interfaces + ip -net "$node1" link add link prp1 name prp1.2 type vlan id 2 + ip -net "$node2" link add link prp2 name prp2.2 type vlan id 2 + + # IP addresses + ip -net "$node1" addr add 100.64.2.1/24 dev prp1.2 + ip -net "$node1" addr add dead:beef:2::1/64 dev prp1.2 nodad + + ip -net "$node2" addr add 100.64.2.2/24 dev prp2.2 + ip -net "$node2" addr add dead:beef:2::2/64 dev prp2.2 nodad + + # All links up + ip -net "$node1" link set prp1.2 up + ip -net "$node2" link set prp2.2 up +} + +do_ping_tests() +{ + local netid="$1" + + echo "INFO: Initial validation ping" + + do_ping "$node1" "100.64.$netid.2" + do_ping "$node2" "100.64.$netid.1" + stop_if_error "Initial validation failed on IPv4" + + do_ping "$node1" "dead:beef:$netid::2" + do_ping "$node2" "dead:beef:$netid::1" + stop_if_error "Initial validation failed on IPv6" + + echo "INFO: Longer ping test." + + do_ping_long "$node1" "100.64.$netid.2" + do_ping_long "$node2" "100.64.$netid.1" + stop_if_error "Longer ping test failed on IPv4." + + do_ping_long "$node1" "dead:beef:$netid::2" + do_ping_long "$node2" "dead:beef:$netid::1" + stop_if_error "Longer ping test failed on IPv6." +} + +run_ping_tests() +{ + echo "INFO: Running ping tests" + do_ping_tests 0 +} + +run_vlan_ping_tests() +{ + vlan_challenged_prp1=$(ip net exec "$node1" ethtool -k prp1 | \ + grep "vlan-challenged" | awk '{print $2}') + vlan_challenged_prp2=$(ip net exec "$node2" ethtool -k prp2 | \ + grep "vlan-challenged" | awk '{print $2}') + + if [[ "$vlan_challenged_prp1" = "off" || \ + "$vlan_challenged_prp2" = "off" ]]; then + echo "INFO: Running VLAN ping tests" + setup_vlan_interfaces + do_ping_tests 2 + else + echo "INFO: Not Running VLAN tests as the device does not support VLAN" + fi +} + +check_prerequisites +trap cleanup_all_ns EXIT + +setup_ns node1 node2 +setup_prp_interfaces + +run_ping_tests +run_vlan_ping_tests + +exit $ret diff --git a/tools/testing/selftests/net/hsr/settings b/tools/testing/selftests/net/hsr/settings index 0fbc037f2aa8..a953c96aa16e 100644 --- a/tools/testing/selftests/net/hsr/settings +++ b/tools/testing/selftests/net/hsr/settings @@ -1 +1 @@ -timeout=50 +timeout=180 diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index d6f0e449c029..b13c89a99ecb 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -178,8 +178,6 @@ setup() else ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1 - ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0 - ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0 ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10 diff --git a/tools/testing/selftests/net/icmp_rfc4884.c b/tools/testing/selftests/net/icmp_rfc4884.c new file mode 100644 index 000000000000..cd826b913557 --- /dev/null +++ b/tools/testing/selftests/net/icmp_rfc4884.c @@ -0,0 +1,679 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <arpa/inet.h> +#include <error.h> +#include <linux/errqueue.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/in6.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <netinet/in.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/ioctl.h> +#include <sys/socket.h> + +#include "../kselftest_harness.h" + +static const unsigned short src_port = 44444; +static const unsigned short dst_port = 55555; +static const int min_orig_dgram_len = 128; +static const int min_payload_len_v4 = + min_orig_dgram_len - sizeof(struct iphdr) - sizeof(struct udphdr); +static const int min_payload_len_v6 = + min_orig_dgram_len - sizeof(struct ipv6hdr) - sizeof(struct udphdr); +static const uint8_t orig_payload_byte = 0xAA; + +struct sockaddr_inet { + union { + struct sockaddr_in6 v6; + struct sockaddr_in v4; + struct sockaddr sa; + }; + socklen_t len; +}; + +struct ip_case_info { + int domain; + int level; + int opt1; + int opt2; + int proto; + int (*build_func)(uint8_t *buf, ssize_t buflen, bool with_ext, + int payload_len, bool bad_csum, bool bad_len, + bool smaller_len); + int min_payload; +}; + +static int bringup_loopback(void) +{ + struct ifreq ifr = { + .ifr_name = "lo" + }; + int fd; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return -1; + + if (ioctl(fd, SIOCGIFFLAGS, &ifr) < 0) + goto err; + + ifr.ifr_flags = ifr.ifr_flags | IFF_UP; + + if (ioctl(fd, SIOCSIFFLAGS, &ifr) < 0) + goto err; + + close(fd); + return 0; + +err: + close(fd); + return -1; +} + +static uint16_t csum(const void *buf, size_t len) +{ + const uint8_t *data = buf; + uint32_t sum = 0; + + while (len > 1) { + sum += (data[0] << 8) | data[1]; + data += 2; + len -= 2; + } + + if (len == 1) + sum += data[0] << 8; + + while (sum >> 16) + sum = (sum & 0xFFFF) + (sum >> 16); + + return ~sum & 0xFFFF; +} + +static int poll_err(int fd) +{ + struct pollfd pfd; + + memset(&pfd, 0, sizeof(pfd)); + pfd.fd = fd; + + if (poll(&pfd, 1, 5000) != 1 || pfd.revents != POLLERR) + return -1; + + return 0; +} + +static void set_addr(struct sockaddr_inet *addr, int domain, + unsigned short port) +{ + memset(addr, 0, sizeof(*addr)); + + switch (domain) { + case AF_INET: + addr->v4.sin_family = AF_INET; + addr->v4.sin_port = htons(port); + addr->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr->len = sizeof(addr->v4); + break; + case AF_INET6: + addr->v6.sin6_family = AF_INET6; + addr->v6.sin6_port = htons(port); + addr->v6.sin6_addr = in6addr_loopback; + addr->len = sizeof(addr->v6); + break; + } +} + +static int bind_and_setsockopt(int fd, const struct ip_case_info *info) +{ + struct sockaddr_inet addr; + int opt = 1; + + set_addr(&addr, info->domain, src_port); + + if (setsockopt(fd, info->level, info->opt1, &opt, sizeof(opt)) < 0) + return -1; + + if (setsockopt(fd, info->level, info->opt2, &opt, sizeof(opt)) < 0) + return -1; + + return bind(fd, &addr.sa, addr.len); +} + +static int build_rfc4884_ext(uint8_t *buf, size_t buflen, bool bad_csum, + bool bad_len, bool smaller_len) +{ + struct icmp_extobj_hdr *objh; + struct icmp_ext_hdr *exthdr; + size_t obj_len, ext_len; + uint16_t sum; + + /* Use an object payload of 4 bytes */ + obj_len = sizeof(*objh) + sizeof(uint32_t); + ext_len = sizeof(*exthdr) + obj_len; + + if (ext_len > buflen) + return -EINVAL; + + exthdr = (struct icmp_ext_hdr *)buf; + objh = (struct icmp_extobj_hdr *)(buf + sizeof(*exthdr)); + + exthdr->version = 2; + /* When encoding a bad object length, either encode a length too small + * to fit the object header or too big to fit in the packet. + */ + if (bad_len) + obj_len = smaller_len ? sizeof(*objh) - 1 : obj_len * 2; + objh->length = htons(obj_len); + + sum = csum(buf, ext_len); + exthdr->checksum = htons(bad_csum ? sum - 1 : sum); + + return ext_len; +} + +static int build_orig_dgram_v4(uint8_t *buf, ssize_t buflen, int payload_len) +{ + struct udphdr *udph; + struct iphdr *iph; + size_t len = 0; + + len = sizeof(*iph) + sizeof(*udph) + payload_len; + if (len > buflen) + return -EINVAL; + + iph = (struct iphdr *)buf; + udph = (struct udphdr *)(buf + sizeof(*iph)); + + iph->version = 4; + iph->ihl = 5; + iph->protocol = IPPROTO_UDP; + iph->saddr = htonl(INADDR_LOOPBACK); + iph->daddr = htonl(INADDR_LOOPBACK); + iph->tot_len = htons(len); + iph->check = htons(csum(iph, sizeof(*iph))); + + udph->source = htons(src_port); + udph->dest = htons(dst_port); + udph->len = htons(sizeof(*udph) + payload_len); + + memset(buf + sizeof(*iph) + sizeof(*udph), orig_payload_byte, + payload_len); + + return len; +} + +static int build_orig_dgram_v6(uint8_t *buf, ssize_t buflen, int payload_len) +{ + struct udphdr *udph; + struct ipv6hdr *iph; + size_t len = 0; + + len = sizeof(*iph) + sizeof(*udph) + payload_len; + if (len > buflen) + return -EINVAL; + + iph = (struct ipv6hdr *)buf; + udph = (struct udphdr *)(buf + sizeof(*iph)); + + iph->version = 6; + iph->payload_len = htons(sizeof(*udph) + payload_len); + iph->nexthdr = IPPROTO_UDP; + iph->saddr = in6addr_loopback; + iph->daddr = in6addr_loopback; + + udph->source = htons(src_port); + udph->dest = htons(dst_port); + udph->len = htons(sizeof(*udph) + payload_len); + + memset(buf + sizeof(*iph) + sizeof(*udph), orig_payload_byte, + payload_len); + + return len; +} + +static int build_icmpv4_pkt(uint8_t *buf, ssize_t buflen, bool with_ext, + int payload_len, bool bad_csum, bool bad_len, + bool smaller_len) +{ + struct icmphdr *icmph; + int len, ret; + + len = sizeof(*icmph); + memset(buf, 0, buflen); + + icmph = (struct icmphdr *)buf; + icmph->type = ICMP_DEST_UNREACH; + icmph->code = ICMP_PORT_UNREACH; + icmph->checksum = 0; + + ret = build_orig_dgram_v4(buf + len, buflen - len, payload_len); + if (ret < 0) + return ret; + + len += ret; + + icmph->un.reserved[1] = (len - sizeof(*icmph)) / sizeof(uint32_t); + + if (with_ext) { + ret = build_rfc4884_ext(buf + len, buflen - len, + bad_csum, bad_len, smaller_len); + if (ret < 0) + return ret; + + len += ret; + } + + icmph->checksum = htons(csum(icmph, len)); + return len; +} + +static int build_icmpv6_pkt(uint8_t *buf, ssize_t buflen, bool with_ext, + int payload_len, bool bad_csum, bool bad_len, + bool smaller_len) +{ + struct icmp6hdr *icmph; + int len, ret; + + len = sizeof(*icmph); + memset(buf, 0, buflen); + + icmph = (struct icmp6hdr *)buf; + icmph->icmp6_type = ICMPV6_DEST_UNREACH; + icmph->icmp6_code = ICMPV6_PORT_UNREACH; + icmph->icmp6_cksum = 0; + + ret = build_orig_dgram_v6(buf + len, buflen - len, payload_len); + if (ret < 0) + return ret; + + len += ret; + + icmph->icmp6_datagram_len = (len - sizeof(*icmph)) / sizeof(uint64_t); + + if (with_ext) { + ret = build_rfc4884_ext(buf + len, buflen - len, + bad_csum, bad_len, smaller_len); + if (ret < 0) + return ret; + + len += ret; + } + + icmph->icmp6_cksum = htons(csum(icmph, len)); + return len; +} + +FIXTURE(rfc4884) {}; + +FIXTURE_SETUP(rfc4884) +{ + int ret; + + ret = unshare(CLONE_NEWNET); + ASSERT_EQ(ret, 0) { + TH_LOG("unshare(CLONE_NEWNET) failed: %s", strerror(errno)); + } + + ret = bringup_loopback(); + ASSERT_EQ(ret, 0) TH_LOG("Failed to bring up loopback interface"); +} + +FIXTURE_TEARDOWN(rfc4884) +{ +} + +const struct ip_case_info ipv4_info = { + .domain = AF_INET, + .level = SOL_IP, + .opt1 = IP_RECVERR, + .opt2 = IP_RECVERR_RFC4884, + .proto = IPPROTO_ICMP, + .build_func = build_icmpv4_pkt, + .min_payload = min_payload_len_v4, +}; + +const struct ip_case_info ipv6_info = { + .domain = AF_INET6, + .level = SOL_IPV6, + .opt1 = IPV6_RECVERR, + .opt2 = IPV6_RECVERR_RFC4884, + .proto = IPPROTO_ICMPV6, + .build_func = build_icmpv6_pkt, + .min_payload = min_payload_len_v6, +}; + +FIXTURE_VARIANT(rfc4884) { + /* IPv4/v6 related information */ + struct ip_case_info info; + /* Whether to append an ICMP extension or not */ + bool with_ext; + /* UDP payload length */ + int payload_len; + /* Whether to generate a bad checksum in the ICMP extension structure */ + bool bad_csum; + /* Whether to generate a bad length in the ICMP object header */ + bool bad_len; + /* Whether it is too small to fit the object header or too big to fit + * in the packet + */ + bool smaller_len; +}; + +/* Tests that a valid ICMPv4 error message with extension and the original + * datagram is smaller than 128 bytes, generates an error with zero offset, + * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv4_ext_small_payload) { + .info = ipv4_info, + .with_ext = true, + .payload_len = 64, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that a valid ICMPv4 error message with extension and 128 bytes original + * datagram, generates an error with the expected offset, and does not raise the + * SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv4_ext) { + .info = ipv4_info, + .with_ext = true, + .payload_len = min_payload_len_v4, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that a valid ICMPv4 error message with extension and the original + * datagram is larger than 128 bytes, generates an error with the expected + * offset, and does not raise the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv4_ext_large_payload) { + .info = ipv4_info, + .with_ext = true, + .payload_len = 256, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that a valid ICMPv4 error message without extension and the original + * datagram is smaller than 128 bytes, generates an error with zero offset, + * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv4_no_ext_small_payload) { + .info = ipv4_info, + .with_ext = false, + .payload_len = 64, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that a valid ICMPv4 error message without extension and 128 bytes + * original datagram, generates an error with zero offset, and does not raise + * the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv4_no_ext_min_payload) { + .info = ipv4_info, + .with_ext = false, + .payload_len = min_payload_len_v4, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that a valid ICMPv4 error message without extension and the original + * datagram is larger than 128 bytes, generates an error with zero offset, + * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv4_no_ext_large_payload) { + .info = ipv4_info, + .with_ext = false, + .payload_len = 256, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that an ICMPv4 error message with extension and an invalid checksum, + * generates an error with the expected offset, and raises the + * SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv4_invalid_ext_checksum) { + .info = ipv4_info, + .with_ext = true, + .payload_len = min_payload_len_v4, + .bad_csum = true, + .bad_len = false, +}; + +/* Tests that an ICMPv4 error message with extension and an object length + * smaller than the object header, generates an error with the expected offset, + * and raises the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv4_invalid_ext_length_small) { + .info = ipv4_info, + .with_ext = true, + .payload_len = min_payload_len_v4, + .bad_csum = false, + .bad_len = true, + .smaller_len = true, +}; + +/* Tests that an ICMPv4 error message with extension and an object length that + * is too big to fit in the packet, generates an error with the expected offset, + * and raises the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv4_invalid_ext_length_large) { + .info = ipv4_info, + .with_ext = true, + .payload_len = min_payload_len_v4, + .bad_csum = false, + .bad_len = true, + .smaller_len = false, +}; + +/* Tests that a valid ICMPv6 error message with extension and the original + * datagram is smaller than 128 bytes, generates an error with zero offset, + * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv6_ext_small_payload) { + .info = ipv6_info, + .with_ext = true, + .payload_len = 64, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that a valid ICMPv6 error message with extension and 128 bytes original + * datagram, generates an error with the expected offset, and does not raise the + * SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv6_ext) { + .info = ipv6_info, + .with_ext = true, + .payload_len = min_payload_len_v6, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that a valid ICMPv6 error message with extension and the original + * datagram is larger than 128 bytes, generates an error with the expected + * offset, and does not raise the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv6_ext_large_payload) { + .info = ipv6_info, + .with_ext = true, + .payload_len = 256, + .bad_csum = false, + .bad_len = false, +}; +/* Tests that a valid ICMPv6 error message without extension and the original + * datagram is smaller than 128 bytes, generates an error with zero offset, + * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv6_no_ext_small_payload) { + .info = ipv6_info, + .with_ext = false, + .payload_len = 64, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that a valid ICMPv6 error message without extension and 128 bytes + * original datagram, generates an error with zero offset, and does not + * raise the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv6_no_ext_min_payload) { + .info = ipv6_info, + .with_ext = false, + .payload_len = min_payload_len_v6, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that a valid ICMPv6 error message without extension and the original + * datagram is larger than 128 bytes, generates an error with zero offset, + * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv6_no_ext_large_payload) { + .info = ipv6_info, + .with_ext = false, + .payload_len = 256, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that an ICMPv6 error message with extension and an invalid checksum, + * generates an error with the expected offset, and raises the + * SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv6_invalid_ext_checksum) { + .info = ipv6_info, + .with_ext = true, + .payload_len = min_payload_len_v6, + .bad_csum = true, + .bad_len = false, +}; + +/* Tests that an ICMPv6 error message with extension and an object length + * smaller than the object header, generates an error with the expected offset, + * and raises the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv6_invalid_ext_length_small) { + .info = ipv6_info, + .with_ext = true, + .payload_len = min_payload_len_v6, + .bad_csum = false, + .bad_len = true, + .smaller_len = true, +}; + +/* Tests that an ICMPv6 error message with extension and an object length that + * is too big to fit in the packet, generates an error with the expected offset, + * and raises the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv6_invalid_ext_length_large) { + .info = ipv6_info, + .with_ext = true, + .payload_len = min_payload_len_v6, + .bad_csum = false, + .bad_len = true, + .smaller_len = false, +}; + +static void +check_rfc4884_offset(struct __test_metadata *_metadata, int sock, + const FIXTURE_VARIANT(rfc4884) *v) +{ + char rxbuf[1024]; + char ctrl[1024]; + struct iovec iov = { + .iov_base = rxbuf, + .iov_len = sizeof(rxbuf) + }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = ctrl, + .msg_controllen = sizeof(ctrl), + }; + struct cmsghdr *cmsg; + int recv; + + ASSERT_EQ(poll_err(sock), 0); + + recv = recvmsg(sock, &msg, MSG_ERRQUEUE); + ASSERT_GE(recv, 0) TH_LOG("recvmsg(MSG_ERRQUEUE) failed"); + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + bool is_invalid, expected_invalid; + struct sock_extended_err *ee; + int expected_off; + uint16_t off; + + if (cmsg->cmsg_level != v->info.level || + cmsg->cmsg_type != v->info.opt1) { + TH_LOG("Unrelated cmsgs were encountered in recvmsg()"); + continue; + } + + ee = (struct sock_extended_err *)CMSG_DATA(cmsg); + off = ee->ee_rfc4884.len; + is_invalid = ee->ee_rfc4884.flags & SO_EE_RFC4884_FLAG_INVALID; + + expected_invalid = v->bad_csum || v->bad_len; + ASSERT_EQ(is_invalid, expected_invalid) { + TH_LOG("Expected invalidity flag to be %d, but got %d", + expected_invalid, is_invalid); + } + + expected_off = + (v->with_ext && v->payload_len >= v->info.min_payload) ? + v->payload_len : 0; + ASSERT_EQ(off, expected_off) { + TH_LOG("Expected RFC4884 offset %u, got %u", + expected_off, off); + } + break; + } +} + +TEST_F(rfc4884, rfc4884) +{ + const typeof(variant) v = variant; + struct sockaddr_inet addr; + uint8_t pkt[1024]; + int dgram, raw; + int len, sent; + int err; + + dgram = socket(v->info.domain, SOCK_DGRAM, 0); + ASSERT_GE(dgram, 0) TH_LOG("Opening datagram socket failed"); + + err = bind_and_setsockopt(dgram, &v->info); + ASSERT_EQ(err, 0) TH_LOG("Bind failed"); + + raw = socket(v->info.domain, SOCK_RAW, v->info.proto); + ASSERT_GE(raw, 0) TH_LOG("Opening raw socket failed"); + + len = v->info.build_func(pkt, sizeof(pkt), v->with_ext, v->payload_len, + v->bad_csum, v->bad_len, v->smaller_len); + ASSERT_GT(len, 0) TH_LOG("Building packet failed"); + + set_addr(&addr, v->info.domain, 0); + sent = sendto(raw, pkt, len, 0, &addr.sa, addr.len); + ASSERT_EQ(len, sent) TH_LOG("Sending packet failed"); + + check_rfc4884_offset(_metadata, dgram, v); + + close(dgram); + close(raw); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c index 76e604e4810e..7bfeeb133705 100644 --- a/tools/testing/selftests/net/io_uring_zerocopy_tx.c +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c @@ -106,14 +106,14 @@ static void do_tx(int domain, int type, int protocol) ret = io_uring_queue_init(512, &ring, 0); if (ret) - error(1, ret, "io_uring: queue init"); + error(1, -ret, "io_uring: queue init"); iov.iov_base = payload; iov.iov_len = cfg_payload_len; ret = io_uring_register_buffers(&ring, &iov, 1); if (ret) - error(1, ret, "io_uring: buffer registration"); + error(1, -ret, "io_uring: buffer registration"); tstop = gettimeofday_ms() + cfg_runtime_ms; do { @@ -149,24 +149,24 @@ static void do_tx(int domain, int type, int protocol) ret = io_uring_submit(&ring); if (ret != cfg_nr_reqs) - error(1, ret, "submit"); + error(1, -ret, "submit"); if (cfg_cork) do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0); for (i = 0; i < cfg_nr_reqs; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) - error(1, ret, "wait cqe"); + error(1, -ret, "wait cqe"); if (cqe->user_data != NONZC_TAG && cqe->user_data != ZC_TAG) - error(1, -EINVAL, "invalid cqe->user_data"); + error(1, EINVAL, "invalid cqe->user_data"); if (cqe->flags & IORING_CQE_F_NOTIF) { if (cqe->flags & IORING_CQE_F_MORE) - error(1, -EINVAL, "invalid notif flags"); + error(1, EINVAL, "invalid notif flags"); if (compl_cqes <= 0) - error(1, -EINVAL, "notification mismatch"); + error(1, EINVAL, "notification mismatch"); compl_cqes--; i--; io_uring_cqe_seen(&ring); @@ -174,14 +174,14 @@ static void do_tx(int domain, int type, int protocol) } if (cqe->flags & IORING_CQE_F_MORE) { if (cqe->user_data != ZC_TAG) - error(1, cqe->res, "unexpected F_MORE"); + error(1, -cqe->res, "unexpected F_MORE"); compl_cqes++; } if (cqe->res >= 0) { packets++; bytes += cqe->res; } else if (cqe->res != -EAGAIN) { - error(1, cqe->res, "send failed"); + error(1, -cqe->res, "send failed"); } io_uring_cqe_seen(&ring); } @@ -190,11 +190,11 @@ static void do_tx(int domain, int type, int protocol) while (compl_cqes) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) - error(1, ret, "wait cqe"); + error(1, -ret, "wait cqe"); if (cqe->flags & IORING_CQE_F_MORE) - error(1, -EINVAL, "invalid notif flags"); + error(1, EINVAL, "invalid notif flags"); if (!(cqe->flags & IORING_CQE_F_NOTIF)) - error(1, -EINVAL, "missing notif flag"); + error(1, EINVAL, "missing notif flag"); io_uring_cqe_seen(&ring); compl_cqes--; diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh index 123439545013..8c3647de9b4c 100755 --- a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh @@ -77,9 +77,13 @@ esac # Start of state changes: install cleanup handler +old_io_uring_disabled=0 cleanup() { ip netns del "${NS2}" ip netns del "${NS1}" + if [ "$old_io_uring_disabled" -ne 0 ]; then + sysctl -w -q kernel.io_uring_disabled="$old_io_uring_disabled" 2>/dev/null || true + fi } trap cleanup EXIT @@ -122,5 +126,10 @@ do_test() { wait } +old_io_uring_disabled=$(sysctl -n kernel.io_uring_disabled 2>/dev/null || echo "0") +if [ "$old_io_uring_disabled" -ne 0 ]; then + sysctl -w -q kernel.io_uring_disabled=0 +fi + do_test "${EXTRA_ARGS}" echo ok diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index 845c26dd01a9..b2b99889942f 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -273,8 +273,8 @@ setup() ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 &>/dev/null ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 &>/dev/null - ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null ip -netns $ioam_node_alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null + ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null ip -netns $ioam_node_alpha link set veth0 up &>/dev/null ip -netns $ioam_node_alpha link set lo up &>/dev/null ip -netns $ioam_node_alpha route add 2001:db8:2::/64 \ diff --git a/tools/testing/selftests/net/ip6_tunnel.sh b/tools/testing/selftests/net/ip6_tunnel.sh new file mode 100755 index 000000000000..fe081a521819 --- /dev/null +++ b/tools/testing/selftests/net/ip6_tunnel.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Test that IPv4-over-IPv6 tunneling works. + +source lib.sh +set -e + +setup_prepare() { + ip link add transport1 type veth peer name transport2 + + setup_ns ns1 + ip link set transport1 netns $ns1 + ip -n $ns1 address add 2001:db8::1/64 dev transport1 nodad + ip -n $ns1 address add 2001:db8::3/64 dev transport1 nodad + ip -n $ns1 link set transport1 up + ip -n $ns1 link add link transport1 name tunnel4 type ip6tnl mode ipip6 local 2001:db8::1 remote 2001:db8::2 + ip -n $ns1 address add 172.0.0.1/32 peer 172.0.0.2/32 dev tunnel4 + ip -n $ns1 link set tunnel4 up + ip -n $ns1 link add link transport1 name tunnel6 type ip6tnl mode ip6ip6 local 2001:db8::3 remote 2001:db8::4 + ip -n $ns1 address add 2001:db8:6::1/64 dev tunnel6 + ip -n $ns1 link set tunnel6 up + + setup_ns ns2 + ip link set transport2 netns $ns2 + ip -n $ns2 address add 2001:db8::2/64 dev transport2 nodad + ip -n $ns2 address add 2001:db8::4/64 dev transport2 nodad + ip -n $ns2 link set transport2 up + ip -n $ns2 link add link transport2 name tunnel4 type ip6tnl mode ipip6 local 2001:db8::2 remote 2001:db8::1 + ip -n $ns2 address add 172.0.0.2/32 peer 172.0.0.1/32 dev tunnel4 + ip -n $ns2 link set tunnel4 up + ip -n $ns2 link add link transport2 name tunnel6 type ip6tnl mode ip6ip6 local 2001:db8::4 remote 2001:db8::3 + ip -n $ns2 address add 2001:db8:6::2/64 dev tunnel6 + ip -n $ns2 link set tunnel6 up +} + +cleanup() { + cleanup_all_ns + # in case the namespaces haven't been set up yet + ip link delete transport1 &>/dev/null || true +} + +trap cleanup EXIT +setup_prepare +ip netns exec $ns1 ping -q -W1 -c1 172.0.0.2 >/dev/null +ip netns exec $ns1 ping -q -W1 -c1 2001:db8:6::2 >/dev/null diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c index 29451d2244b7..e6834a6cfc8f 100644 --- a/tools/testing/selftests/net/ip_local_port_range.c +++ b/tools/testing/selftests/net/ip_local_port_range.c @@ -10,7 +10,7 @@ #include <fcntl.h> #include <netinet/ip.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" #ifndef IP_LOCAL_PORT_RANGE #define IP_LOCAL_PORT_RANGE 51 diff --git a/tools/testing/selftests/net/ip_local_port_range.sh b/tools/testing/selftests/net/ip_local_port_range.sh index 6c6ad346eaa0..4ff746db1256 100755 --- a/tools/testing/selftests/net/ip_local_port_range.sh +++ b/tools/testing/selftests/net/ip_local_port_range.sh @@ -2,4 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 ./in_netns.sh \ - sh -c 'sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && ./ip_local_port_range' + sh -c 'sysctl -q -w net.mptcp.enabled=1 && \ + sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && \ + ./ip_local_port_range' diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index 9b44a091802c..89c32c354c00 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -34,7 +34,7 @@ #include <time.h> #include <unistd.h> -#include "../kselftest.h" +#include "kselftest.h" #define printk(fmt, ...) \ ksft_print_msg("%d[%u] " fmt "\n", getpid(), __LINE__, ##__VA_ARGS__) @@ -43,6 +43,10 @@ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +#ifndef offsetof +#define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER) +#endif + #define IPV4_STR_SZ 16 /* xxx.xxx.xxx.xxx is longest + \0 */ #define MAX_PAYLOAD 2048 #define XFRM_ALGO_KEY_BUF_SIZE 512 @@ -58,8 +62,6 @@ #define VETH_FMT "ktst-%d" #define VETH_LEN 12 -#define XFRM_ALGO_NR_KEYS 29 - static int nsfd_parent = -1; static int nsfd_childa = -1; static int nsfd_childb = -1; @@ -92,7 +94,6 @@ struct xfrm_key_entry xfrm_key_entries[] = { {"cbc(cast5)", 128}, {"cbc(serpent)", 128}, {"hmac(sha1)", 160}, - {"hmac(rmd160)", 160}, {"cbc(des3_ede)", 192}, {"hmac(sha256)", 256}, {"cbc(aes)", 256}, @@ -809,7 +810,7 @@ static int xfrm_fill_key(char *name, char *buf, { int i; - for (i = 0; i < XFRM_ALGO_NR_KEYS; i++) { + for (i = 0; i < ARRAY_SIZE(xfrm_key_entries); i++) { if (strncmp(name, xfrm_key_entries[i].algo_name, ALGO_LEN) == 0) *key_len = xfrm_key_entries[i].key_len; } @@ -827,13 +828,16 @@ static int xfrm_fill_key(char *name, char *buf, static int xfrm_state_pack_algo(struct nlmsghdr *nh, size_t req_sz, struct xfrm_desc *desc) { - struct { + union { union { struct xfrm_algo alg; struct xfrm_algo_aead aead; struct xfrm_algo_auth auth; } u; - char buf[XFRM_ALGO_KEY_BUF_SIZE]; + struct { + unsigned char __offset_to_FAM[offsetof(struct xfrm_algo_auth, alg_key)]; + char buf[XFRM_ALGO_KEY_BUF_SIZE]; + }; } alg = {}; size_t alen, elen, clen, aelen; unsigned short type; @@ -2054,8 +2058,7 @@ static int write_desc(int proto, int test_desc_fd, int proto_list[] = { IPPROTO_AH, IPPROTO_COMP, IPPROTO_ESP }; char *ah_list[] = { "digest_null", "hmac(md5)", "hmac(sha1)", "hmac(sha256)", - "hmac(sha384)", "hmac(sha512)", "hmac(rmd160)", - "xcbc(aes)", "cmac(aes)" + "hmac(sha384)", "hmac(sha512)", "xcbc(aes)", "cmac(aes)" }; char *comp_list[] = { "deflate", diff --git a/tools/testing/selftests/net/ipv6_force_forwarding.sh b/tools/testing/selftests/net/ipv6_force_forwarding.sh new file mode 100755 index 000000000000..bf0243366caa --- /dev/null +++ b/tools/testing/selftests/net/ipv6_force_forwarding.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test IPv6 force_forwarding interface property +# +# This test verifies that the force_forwarding property works correctly: +# - When global forwarding is disabled, packets are not forwarded normally +# - When force_forwarding is enabled on an interface, packets are forwarded +# regardless of the global forwarding setting + +source lib.sh + +cleanup() { + cleanup_ns $ns1 $ns2 $ns3 +} + +trap cleanup EXIT + +setup_test() { + # Create three namespaces: sender, router, receiver + setup_ns ns1 ns2 ns3 + + # Create veth pairs: ns1 <-> ns2 <-> ns3 + ip link add name veth12 type veth peer name veth21 + ip link add name veth23 type veth peer name veth32 + + # Move interfaces to namespaces + ip link set veth12 netns $ns1 + ip link set veth21 netns $ns2 + ip link set veth23 netns $ns2 + ip link set veth32 netns $ns3 + + # Configure interfaces + ip -n $ns1 addr add 2001:db8:1::1/64 dev veth12 nodad + ip -n $ns2 addr add 2001:db8:1::2/64 dev veth21 nodad + ip -n $ns2 addr add 2001:db8:2::1/64 dev veth23 nodad + ip -n $ns3 addr add 2001:db8:2::2/64 dev veth32 nodad + + # Bring up interfaces + ip -n $ns1 link set veth12 up + ip -n $ns2 link set veth21 up + ip -n $ns2 link set veth23 up + ip -n $ns3 link set veth32 up + + # Add routes + ip -n $ns1 route add 2001:db8:2::/64 via 2001:db8:1::2 + ip -n $ns3 route add 2001:db8:1::/64 via 2001:db8:2::1 + + # Disable global forwarding + ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=0 +} + +test_force_forwarding() { + local ret=0 + + echo "TEST: force_forwarding functionality" + + # Check if force_forwarding sysctl exists + if ! ip netns exec $ns2 test -f /proc/sys/net/ipv6/conf/veth21/force_forwarding; then + echo "SKIP: force_forwarding not available" + return $ksft_skip + fi + + # Test 1: Without force_forwarding, ping should fail + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=0 + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=0 + + if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then + echo "FAIL: ping succeeded when forwarding disabled" + ret=1 + else + echo "PASS: forwarding disabled correctly" + fi + + # Test 2: With force_forwarding enabled, ping should succeed + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=1 + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=1 + + if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then + echo "PASS: force_forwarding enabled forwarding" + else + echo "FAIL: ping failed with force_forwarding enabled" + ret=1 + fi + + return $ret +} + +echo "IPv6 force_forwarding test" +echo "==========================" + +setup_test +test_force_forwarding +ret=$? + +if [ $ret -eq 0 ]; then + echo "OK" + exit 0 +elif [ $ret -eq $ksft_skip ]; then + echo "SKIP" + exit $ksft_skip +else + echo "FAIL" + exit 1 +fi diff --git a/tools/testing/selftests/net/ipv6_fragmentation.c b/tools/testing/selftests/net/ipv6_fragmentation.c new file mode 100644 index 000000000000..672c9fe086a7 --- /dev/null +++ b/tools/testing/selftests/net/ipv6_fragmentation.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Brett A C Sheffield <bacs@librecast.net> + * + * Kernel selftest for the IPv6 fragmentation regression which affected stable + * kernels: + * + * https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com + * + * Commit: a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable + * without some prerequisite commits. + * + * This caused a regression when sending IPv6 UDP packets by preventing + * fragmentation and instead returning -1 (EMSGSIZE). + * + * This selftest demonstrates the issue by sending an IPv6 UDP packet to + * localhost (::1) on the loopback interface from the autoconfigured link-local + * address. + * + * sendmsg(2) returns bytes sent correctly on a working kernel, and returns -1 + * (EMSGSIZE) when the regression is present. + * + * The regression was not present in the mainline kernel, but add this test to + * catch similar breakage in future. + */ + +#define _GNU_SOURCE + +#include <error.h> +#include <net/if.h> +#include <netinet/in.h> +#include <sched.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <unistd.h> +#include "kselftest.h" + +#define MTU 1500 +#define LARGER_THAN_MTU 8192 + +static void setup(void) +{ + struct ifreq ifr = { + .ifr_name = "lo" + }; + int ctl; + + /* we need to set MTU, so do this in a namespace to play nicely */ + if (unshare(CLONE_NEWNET) == -1) + error(KSFT_FAIL, errno, "unshare"); + + ctl = socket(AF_LOCAL, SOCK_STREAM, 0); + if (ctl == -1) + error(KSFT_FAIL, errno, "socket"); + + /* ensure MTU is smaller than what we plan to send */ + ifr.ifr_mtu = MTU; + if (ioctl(ctl, SIOCSIFMTU, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl: set MTU"); + + /* bring up interface */ + if (ioctl(ctl, SIOCGIFFLAGS, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl SIOCGIFFLAGS"); + ifr.ifr_flags = ifr.ifr_flags | IFF_UP; + if (ioctl(ctl, SIOCSIFFLAGS, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl: bring interface up"); + + if (close(ctl) == -1) + error(KSFT_FAIL, errno, "close"); +} + +int main(void) +{ + struct in6_addr addr = { + .s6_addr[15] = 0x01, /* ::1 */ + }; + struct sockaddr_in6 sa = { + .sin6_family = AF_INET6, + .sin6_addr = addr, + .sin6_port = htons(9) /* port 9/udp (DISCARD) */ + }; + static char buf[LARGER_THAN_MTU] = {0}; + struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf) }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_name = (struct sockaddr *)&sa, + .msg_namelen = sizeof(sa), + }; + ssize_t rc; + int s; + + printf("Testing IPv6 fragmentation\n"); + setup(); + s = socket(AF_INET6, SOCK_DGRAM, 0); +send_again: + rc = sendmsg(s, &msg, 0); + if (rc == -1) { + /* if interface wasn't ready, try again */ + if (errno == EADDRNOTAVAIL) { + usleep(1000); + goto send_again; + } + error(KSFT_FAIL, errno, "sendmsg"); + } else if (rc != LARGER_THAN_MTU) { + error(KSFT_FAIL, errno, "sendmsg returned %zi, expected %i", + rc, LARGER_THAN_MTU); + } + printf("[PASS] sendmsg() returned %zi\n", rc); + if (close(s) == -1) + error(KSFT_FAIL, errno, "close"); + return KSFT_PASS; +} diff --git a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh index a6b2b1f9c641..c6866e42f95c 100755 --- a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh +++ b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh @@ -69,7 +69,6 @@ # which can affect the conditions needed to trigger a soft lockup. source lib.sh -source net_helper.sh TEST_DURATION=300 ROUTING_TABLE_REFRESH_PERIOD=0.01 diff --git a/tools/testing/selftests/net/ipvtap_test.sh b/tools/testing/selftests/net/ipvtap_test.sh new file mode 100755 index 000000000000..354ca7ce8584 --- /dev/null +++ b/tools/testing/selftests/net/ipvtap_test.sh @@ -0,0 +1,168 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Simple tests for ipvtap + + +# +# The testing environment looks this way: +# +# |------HNS-------| |------PHY-------| +# | veth<----------------->veth | +# |------|--|------| |----------------| +# | | +# | | |-----TST0-------| +# | |------------|----ipvlan | +# | |----------------| +# | +# | |-----TST1-------| +# |---------------|----ipvlan | +# |----------------| +# + +ALL_TESTS=" + test_ip_set +" + +source lib.sh + +DEBUG=0 + +VETH_HOST=vethtst.h +VETH_PHY=vethtst.p + +NS_COUNT=32 +IP_ITERATIONS=1024 +IPSET_TIMEOUT="60s" + +ns_run() { + ns=$1 + shift + if [[ "$ns" == "global" ]]; then + "$@" >/dev/null + else + ip netns exec "$ns" "$@" >/dev/null + fi +} + +test_ip_setup_env() { + setup_ns NS_PHY + setup_ns HST_NS + + # setup simulated other-host (phy) and host itself + ns_run "$HST_NS" ip link add $VETH_HOST type veth peer name $VETH_PHY \ + netns "$NS_PHY" >/dev/null + ns_run "$HST_NS" ip link set $VETH_HOST up + ns_run "$NS_PHY" ip link set $VETH_PHY up + + for ((i=0; i<NS_COUNT; i++)); do + setup_ns ipvlan_ns_$i + ns="ipvlan_ns_$i" + if [ "$DEBUG" = "1" ]; then + echo "created NS ${!ns}" + fi + if ! ns_run "$HST_NS" ip link add netns ${!ns} ipvlan0 \ + link $VETH_HOST \ + type ipvtap mode l2 bridge; then + exit_error "FAIL: Failed to configure ipvlan link." + fi + done +} + +test_ip_cleanup_env() { + ns_run "$HST_NS" ip link del $VETH_HOST + cleanup_all_ns +} + +exit_error() { + echo "$1" + exit $ksft_fail +} + +rnd() { + echo $(( RANDOM % 32 + 16 )) +} + +test_ip_set_thread() { + # Here we are trying to create some IP conflicts between namespaces. + # If just add/remove IP, nothing interesting will happen. + # But if add random IP and then remove random IP, + # eventually conflicts start to apear. + ip link set ipvlan0 up + for ((i=0; i<IP_ITERATIONS; i++)); do + v=$(rnd) + ip a a "172.25.0.$v/24" dev ipvlan0 2>/dev/null + ip a a "fc00::$v/64" dev ipvlan0 2>/dev/null + v=$(rnd) + ip a d "172.25.0.$v/24" dev ipvlan0 2>/dev/null + ip a d "fc00::$v/64" dev ipvlan0 2>/dev/null + done +} + +test_ip_set() { + RET=0 + + trap test_ip_cleanup_env EXIT + + test_ip_setup_env + + declare -A ns_pids + for ((i=0; i<NS_COUNT; i++)); do + ns="ipvlan_ns_$i" + ns_run ${!ns} timeout "$IPSET_TIMEOUT" \ + bash -c "$0 test_ip_set_thread"& + ns_pids[$i]=$! + done + + for ((i=0; i<NS_COUNT; i++)); do + wait "${ns_pids[$i]}" + done + + declare -A all_ips + for ((i=0; i<NS_COUNT; i++)); do + ns="ipvlan_ns_$i" + ip_output=$(ip netns exec ${!ns} ip a l dev ipvlan0 | grep inet) + while IFS= read -r nsip_out; do + if [[ -z $nsip_out ]]; then + continue; + fi + nsip=$(awk '{print $2}' <<< "$nsip_out") + if [[ -v all_ips[$nsip] ]]; then + RET=$ksft_fail + log_test "conflict for $nsip" + return "$RET" + else + all_ips[$nsip]=$i + fi + done <<< "$ip_output" + done + + if [ "$DEBUG" = "1" ]; then + for key in "${!all_ips[@]}"; do + echo "$key: ${all_ips[$key]}" + done + fi + + trap - EXIT + test_ip_cleanup_env + + log_test "test multithreaded ip set" +} + +if [[ "$1" == "-d" ]]; then + DEBUG=1 + shift +fi + +if [[ "$1" == "-t" ]]; then + shift + TESTS="$*" +fi + +if [[ "$1" == "test_ip_set_thread" ]]; then + test_ip_set_thread +else + require_command ip + + tests_run +fi diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 0bd9a038a1f0..b3827b43782b 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -43,7 +43,7 @@ __ksft_status_merge() weights[$i]=$((weight++)) done - if [[ ${weights[$a]} > ${weights[$b]} ]]; then + if [[ ${weights[$a]} -ge ${weights[$b]} ]]; then echo "$a" return 0 else @@ -217,11 +217,74 @@ setup_ns() return $ksft_skip fi ip -n "${!ns_name}" link set lo up + ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ns_list+=("${!ns_name}") done NS_LIST+=("${ns_list[@]}") } +in_all_ns() +{ + local ret=0 + local ns_list=("${NS_LIST[@]}") + + for ns in "${ns_list[@]}"; do + ip netns exec "${ns}" "$@" + (( ret = ret || $? )) + done + + return "${ret}" +} + +# Create netdevsim with given id and net namespace. +create_netdevsim() { + local id="$1" + local ns="$2" + + modprobe netdevsim &> /dev/null + udevadm settle + + echo "$id 1" | ip netns exec $ns tee /sys/bus/netdevsim/new_device >/dev/null + local dev=$(ip netns exec $ns ls /sys/bus/netdevsim/devices/netdevsim$id/net) + ip -netns $ns link set dev $dev name nsim$id + ip -netns $ns link set dev nsim$id up + + echo nsim$id +} + +create_netdevsim_port() { + local nsim_id="$1" + local ns="$2" + local port_id="$3" + local perm_addr="$4" + local orig_dev + local new_dev + local nsim_path + + nsim_path="/sys/bus/netdevsim/devices/netdevsim$nsim_id" + + echo "$port_id $perm_addr" | ip netns exec "$ns" tee "$nsim_path"/new_port > /dev/null || return 1 + + orig_dev=$(ip netns exec "$ns" find "$nsim_path"/net/ -maxdepth 1 -name 'e*' | tail -n 1) + orig_dev=$(basename "$orig_dev") + new_dev="nsim${nsim_id}p$port_id" + + ip -netns "$ns" link set dev "$orig_dev" name "$new_dev" + ip -netns "$ns" link set dev "$new_dev" up + + echo "$new_dev" +} + +# Remove netdevsim with given id. +cleanup_netdevsim() { + local id="$1" + + if [ -d "/sys/bus/netdevsim/devices/netdevsim$id/net" ]; then + echo "$id" > /sys/bus/netdevsim/del_device + fi +} + tc_rule_stats_get() { local dev=$1; shift @@ -230,7 +293,8 @@ tc_rule_stats_get() local selector=${1:-.packets}; shift tc -j -s filter show dev $dev $dir pref $pref \ - | jq ".[1].options.actions[].stats$selector" + | jq ".[] | select(.options.actions) | + .options.actions[].stats$selector" } tc_rule_handle_stats_get() @@ -245,6 +309,30 @@ tc_rule_handle_stats_get() .options.actions[0].stats$selector" } +# attach a qdisc with two children match/no-match and a flower filter to match +tc_set_flower_counter() { + local -r ns=$1 + local -r ipver=$2 + local -r dev=$3 + local -r flower_expr=$4 + + tc -n $ns qdisc add dev $dev root handle 1: prio bands 2 \ + priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + + tc -n $ns qdisc add dev $dev parent 1:1 handle 11: pfifo + tc -n $ns qdisc add dev $dev parent 1:2 handle 12: pfifo + + tc -n $ns filter add dev $dev parent 1: protocol ipv$ipver \ + flower $flower_expr classid 1:2 +} + +tc_get_flower_counter() { + local -r ns=$1 + local -r dev=$2 + + tc -n $ns -j -s qdisc show dev $dev handle 12: | jq .[0].packets +} + ret_set_ksft_status() { local ksft_status=$1; shift @@ -261,7 +349,7 @@ log_test_result() local test_name=$1; shift local opt_str=$1; shift local result=$1; shift - local retmsg=$1; shift + local retmsg=$1 printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" if [[ $retmsg ]]; then @@ -439,7 +527,8 @@ mac_get() { local if_name=$1 - ip -j link show dev $if_name | jq -r '.[]["address"]' + run_on "$if_name" \ + ip -j link show dev "$if_name" | jq -r '.[]["address"]' } kill_process() @@ -450,78 +539,153 @@ kill_process() { kill $pid && wait $pid; } 2>/dev/null } -ip_link_add() +check_command() +{ + local cmd=$1; shift + + if [[ ! -x "$(command -v "$cmd")" ]]; then + log_test_skip "$cmd not installed" + return $EXIT_STATUS + fi +} + +require_command() +{ + local cmd=$1; shift + + if ! check_command "$cmd"; then + exit $EXIT_STATUS + fi +} + +adf_ip_link_add() { local name=$1; shift - ip link add name "$name" "$@" - defer ip link del dev "$name" + ip link add name "$name" "$@" && \ + defer ip link del dev "$name" } -ip_link_set_master() +adf_ip_link_set_master() { local member=$1; shift local master=$1; shift - ip link set dev "$member" master "$master" - defer ip link set dev "$member" nomaster + ip link set dev "$member" master "$master" && \ + defer ip link set dev "$member" nomaster } -ip_link_set_addr() +adf_ip_link_set_addr() { local name=$1; shift local addr=$1; shift local old_addr=$(mac_get "$name") - ip link set dev "$name" address "$addr" - defer ip link set dev "$name" address "$old_addr" + ip link set dev "$name" address "$addr" && \ + defer ip link set dev "$name" address "$old_addr" } -ip_link_is_up() +ip_link_has_flag() { local name=$1; shift + local flag=$1; shift local state=$(ip -j link show "$name" | - jq -r '(.[].flags[] | select(. == "UP")) // "DOWN"') - [[ $state == "UP" ]] + jq --arg flag "$flag" 'any(.[].flags[]; . == $flag)') + [[ $state == true ]] +} + +ip_link_is_up() +{ + ip_link_has_flag "$1" UP } -ip_link_set_up() +adf_ip_link_set_up() { local name=$1; shift if ! ip_link_is_up "$name"; then - ip link set dev "$name" up - defer ip link set dev "$name" down + ip link set dev "$name" up && \ + defer ip link set dev "$name" down fi } -ip_link_set_down() +adf_ip_link_set_down() { local name=$1; shift if ip_link_is_up "$name"; then - ip link set dev "$name" down - defer ip link set dev "$name" up + ip link set dev "$name" down && \ + defer ip link set dev "$name" up fi } -ip_addr_add() +adf_ip_addr_add() { local name=$1; shift - ip addr add dev "$name" "$@" - defer ip addr del dev "$name" "$@" + ip addr add dev "$name" "$@" && \ + defer ip addr del dev "$name" "$@" +} + +adf_ip_route_add() +{ + ip route add "$@" && \ + defer ip route del "$@" +} + +adf_bridge_vlan_add() +{ + bridge vlan add "$@" && \ + defer bridge vlan del "$@" +} + +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + local protocol="${3}" + local pattern + local i + + pattern=":$(printf "%04X" "${port}") " + + # for tcp protocol additionally check the socket state + [ ${protocol} = "tcp" ] && pattern="${pattern}0A" + for i in $(seq 10); do + if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \ + /proc/net/"${protocol}"* | grep -q "${pattern}"; then + break + fi + sleep 0.1 + done } -ip_route_add() +cmd_jq() { - ip route add "$@" - defer ip route del "$@" + local cmd=$1 + local jq_exp=$2 + local jq_opts=$3 + local ret + local output + + output="$($cmd)" + # it the command fails, return error right away + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + output=$(echo $output | jq -r $jq_opts "$jq_exp") + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + echo $output + # return success only in case of non-empty output + [ ! -z "$output" ] } -bridge_vlan_add() +run_on() { - bridge vlan add "$@" - defer bridge vlan del "$@" + shift; "$@" } diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore index 1ebc6187f421..6cd2b762af5d 100644 --- a/tools/testing/selftests/net/lib/.gitignore +++ b/tools/testing/selftests/net/lib/.gitignore @@ -1,2 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only csum +gro +xdp_helper diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index c22623b9a2a5..ff83603397d0 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -5,11 +5,18 @@ CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES) # Additional include paths needed by kselftest.h CFLAGS += -I../../ -TEST_FILES := ../../../../../Documentation/netlink/specs -TEST_FILES += ../../../../net/ynl +TEST_FILES := \ + ../../../../net/ynl \ + ../../../../../Documentation/netlink/specs \ + ksft_setup_loopback.sh \ +# end of TEST_FILES -TEST_GEN_FILES += csum -TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) +TEST_GEN_FILES := \ + $(patsubst %.c,%.o,$(wildcard *.bpf.c)) \ + csum \ + gro \ + xdp_helper \ +# end of TEST_GEN_FILES TEST_INCLUDES := $(wildcard py/*.py sh/*.sh) diff --git a/tools/testing/selftests/net/lib/csum.c b/tools/testing/selftests/net/lib/csum.c index 27437590eeb5..e28884ce3ab3 100644 --- a/tools/testing/selftests/net/lib/csum.c +++ b/tools/testing/selftests/net/lib/csum.c @@ -707,7 +707,7 @@ static uint32_t recv_get_packet_csum_status(struct msghdr *msg) cm->cmsg_level, cm->cmsg_type); if (cm->cmsg_len != CMSG_LEN(sizeof(struct tpacket_auxdata))) - error(1, 0, "cmsg: len=%lu expected=%lu", + error(1, 0, "cmsg: len=%zu expected=%zu", cm->cmsg_len, CMSG_LEN(sizeof(struct tpacket_auxdata))); aux = (void *)CMSG_DATA(cm); diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/lib/gro.c index b2184847e388..11b16ae5f0e8 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/lib/gro.c @@ -3,26 +3,52 @@ * This testsuite provides conformance testing for GRO coalescing. * * Test cases: - * 1.data + * + * data_*: * Data packets of the same size and same header setup with correct * sequence numbers coalesce. The one exception being the last data * packet coalesced: it can be smaller than the rest and coalesced * as long as it is in the same flow. - * 2.ack + * - data_same: same size packets coalesce + * - data_lrg_sml: large then small coalesces + * - data_lrg_1byte: large then 1 byte coalesces (Ethernet padding) + * - data_sml_lrg: small then large doesn't coalesce + * - data_burst: two bursts of two, separated by 100ms + * + * ack: * Pure ACK does not coalesce. - * 3.flags - * Specific test cases: no packets with PSH, SYN, URG, RST set will - * be coalesced. - * 4.tcp + * + * flags_*: + * No packets with PSH, SYN, URG, RST, CWR set will be coalesced. + * - flags_psh, flags_syn, flags_rst, flags_urg, flags_cwr + * + * tcp_*: * Packets with incorrect checksum, non-consecutive seqno and * different TCP header options shouldn't coalesce. Nit: given that * some extension headers have paddings, such as timestamp, headers - * that are padding differently would not be coalesced. - * 5.ip: - * Packets with different (ECN, TTL, TOS) header, ip options or - * ip fragments (ipv6) shouldn't coalesce. - * 6.large: + * that are padded differently would not be coalesced. + * - tcp_csum: incorrect checksum + * - tcp_seq: non-consecutive sequence numbers + * - tcp_ts: different timestamps + * - tcp_opt: different TCP options + * + * ip_*: + * Packets with different (ECN, TTL, TOS) header, IP options or + * IP fragments shouldn't coalesce. + * - ip_ecn, ip_tos: shared between IPv4/IPv6 + * - ip_csum: IPv4 only, bad IP header checksum + * - ip_ttl, ip_opt, ip_frag4: IPv4 only + * - ip_id_df*: IPv4 IP ID field coalescing tests + * - ip_frag6, ip_v6ext_*: IPv6 only + * + * large_*: * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce. + * - large_max: exceeding max size + * - large_rem: remainder handling + * + * single, capacity: + * Boring cases used to test coalescing machinery itself and stats + * more than protocol behavior. * * MSS is defined as 4096 - header because if it is too small * (i.e. 1500 MTU - header), it will result in many packets, @@ -44,6 +70,7 @@ #include <linux/filter.h> #include <linux/if_packet.h> #include <linux/ipv6.h> +#include <linux/net_tstamp.h> #include <net/ethernet.h> #include <net/if.h> #include <netinet/in.h> @@ -55,9 +82,11 @@ #include <stdio.h> #include <stdarg.h> #include <string.h> +#include <time.h> #include <unistd.h> -#include "../kselftest.h" +#include "kselftest.h" +#include "ksft.h" #define DPORT 8000 #define SPORT 1500 @@ -66,11 +95,12 @@ #define START_SEQ 100 #define START_ACK 100 #define ETH_P_NONE 0 -#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) -#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) -#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) -#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS) -#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) +#define ASSUMED_MTU 4096 +#define MAX_MSS (ASSUMED_MTU - sizeof(struct iphdr) - sizeof(struct tcphdr)) +#define MAX_HDR_LEN \ + (ETH_HLEN + sizeof(struct ipv6hdr) * 2 + sizeof(struct tcphdr)) +#define MAX_LARGE_PKT_CNT ((IP_MAXPACKET - (MAX_HDR_LEN - ETH_HLEN)) / \ + (ASSUMED_MTU - (MAX_HDR_LEN - ETH_HLEN))) #define MIN_EXTHDR_SIZE 8 #define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00" #define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11" @@ -78,6 +108,15 @@ #define ipv6_optlen(p) (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +enum flush_id_case { + FLUSH_ID_DF1_INC, + FLUSH_ID_DF1_FIXED, + FLUSH_ID_DF0_INC, + FLUSH_ID_DF0_FIXED, + FLUSH_ID_DF1_INC_FIXED, + FLUSH_ID_DF1_FIXED_INC, +}; + static const char *addr6_src = "fdaa::2"; static const char *addr6_dst = "fdaa::1"; static const char *addr4_src = "192.168.1.200"; @@ -93,7 +132,33 @@ static bool tx_socket = true; static int tcp_offset = -1; static int total_hdr_len = -1; static int ethhdr_proto = -1; -static const int num_flush_id_cases = 6; +static bool ipip; +static bool ip6ip6; +static uint64_t txtime_ns; +static int num_flows = 4; +static bool order_check; + +#define CAPACITY_PAYLOAD_LEN 200 + +#define TXTIME_DELAY_MS 5 + +/* Max TCP payload that GRO will coalesce. The outer header overhead + * varies by encapsulation, reducing the effective max payload. + */ +static int max_payload(void) +{ + return IP_MAXPACKET - (total_hdr_len - ETH_HLEN); +} + +static int calc_mss(void) +{ + return ASSUMED_MTU - (total_hdr_len - ETH_HLEN); +} + +static int num_large_pkt(void) +{ + return max_payload() / calc_mss(); +} static void vlog(const char *fmt, ...) { @@ -112,30 +177,30 @@ static void setup_sock_filter(int fd) const int ethproto_off = offsetof(struct ethhdr, h_proto); int optlen = 0; int ipproto_off, opt_ipproto_off; - int next_off; if (proto == PF_INET) - next_off = offsetof(struct iphdr, protocol); + ipproto_off = tcp_offset - sizeof(struct iphdr) + + offsetof(struct iphdr, protocol); else - next_off = offsetof(struct ipv6hdr, nexthdr); - ipproto_off = ETH_HLEN + next_off; + ipproto_off = tcp_offset - sizeof(struct ipv6hdr) + + offsetof(struct ipv6hdr, nexthdr); /* Overridden later if exthdrs are used: */ opt_ipproto_off = ipproto_off; - if (strcmp(testname, "ip") == 0) { - if (proto == PF_INET) - optlen = sizeof(struct ip_timestamp); - else { - BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE); - BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE); - BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE); - - /* same size for HBH and Fragment extension header types */ - optlen = MIN_EXTHDR_SIZE; - opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr) - + offsetof(struct ip6_ext, ip6e_nxt); - } + if (strcmp(testname, "ip_opt") == 0) { + optlen = sizeof(struct ip_timestamp); + } else if (strcmp(testname, "ip_frag6") == 0 || + strcmp(testname, "ip_v6ext_same") == 0 || + strcmp(testname, "ip_v6ext_diff") == 0) { + BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE); + BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE); + BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE); + + /* same size for HBH and Fragment extension header types */ + optlen = MIN_EXTHDR_SIZE; + opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr) + + offsetof(struct ip6_ext, ip6e_nxt); } /* this filter validates the following: @@ -244,7 +309,7 @@ static void fill_datalinklayer(void *buf) eth->h_proto = ethhdr_proto; } -static void fill_networklayer(void *buf, int payload_len) +static void fill_networklayer(void *buf, int payload_len, int protocol) { struct ipv6hdr *ip6h = buf; struct iphdr *iph = buf; @@ -254,7 +319,7 @@ static void fill_networklayer(void *buf, int payload_len) ip6h->version = 6; ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len); - ip6h->nexthdr = IPPROTO_TCP; + ip6h->nexthdr = protocol; ip6h->hop_limit = 8; if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1) error(1, errno, "inet_pton source ip6"); @@ -266,7 +331,7 @@ static void fill_networklayer(void *buf, int payload_len) iph->version = 4; iph->ihl = 5; iph->ttl = 8; - iph->protocol = IPPROTO_TCP; + iph->protocol = protocol; iph->tot_len = htons(sizeof(struct tcphdr) + payload_len + sizeof(struct iphdr)); iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */ @@ -299,52 +364,155 @@ static void fill_transportlayer(void *buf, int seq_offset, int ack_offset, static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr) { + char control[CMSG_SPACE(sizeof(uint64_t))]; + struct msghdr msg = {}; + struct iovec iov = {}; + struct cmsghdr *cm; int ret = -1; - ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr)); + iov.iov_base = buf; + iov.iov_len = len; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_name = daddr; + msg.msg_namelen = sizeof(*daddr); + + if (txtime_ns) { + memset(control, 0, sizeof(control)); + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + cm = CMSG_FIRSTHDR(&msg); + cm->cmsg_level = SOL_SOCKET; + cm->cmsg_type = SCM_TXTIME; + cm->cmsg_len = CMSG_LEN(sizeof(uint64_t)); + memcpy(CMSG_DATA(cm), &txtime_ns, sizeof(txtime_ns)); + } + + ret = sendmsg(fd, &msg, 0); if (ret == -1) - error(1, errno, "sendto failure"); + error(1, errno, "sendmsg failure"); if (ret != len) - error(1, errno, "sendto wrong length"); + error(1, 0, "sendmsg wrong length: %d vs %d", ret, len); } static void create_packet(void *buf, int seq_offset, int ack_offset, int payload_len, int fin) { + int ip_hdr_len = (proto == PF_INET) ? + sizeof(struct iphdr) : sizeof(struct ipv6hdr); + int inner_ip_off = tcp_offset - ip_hdr_len; + memset(buf, 0, total_hdr_len); memset(buf + total_hdr_len, 'a', payload_len); + fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset, payload_len, fin); - fill_networklayer(buf + ETH_HLEN, payload_len); + + fill_networklayer(buf + inner_ip_off, payload_len, IPPROTO_TCP); + if (inner_ip_off > ETH_HLEN) { + int encap_proto = (proto == PF_INET) ? + IPPROTO_IPIP : IPPROTO_IPV6; + + fill_networklayer(buf + ETH_HLEN, + payload_len + ip_hdr_len, encap_proto); + } + fill_datalinklayer(buf); } -/* send one extra flag, not first and not last pkt */ -static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn, - int rst, int urg) +static void create_capacity_packet(void *buf, int flow_id, int pkt_idx, int psh) { - static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN]; - static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; - int payload_len, pkt_size, flag, i; + int seq_offset = pkt_idx * CAPACITY_PAYLOAD_LEN; struct tcphdr *tcph; - payload_len = PAYLOAD_LEN * psh; - pkt_size = total_hdr_len + payload_len; - flag = NUM_PACKETS / 2; + create_packet(buf, seq_offset, 0, CAPACITY_PAYLOAD_LEN, 0); - create_packet(flag_buf, flag * payload_len, 0, payload_len, 0); + /* Customize for this flow id */ + memset(buf + total_hdr_len, 'a' + flow_id, CAPACITY_PAYLOAD_LEN); - tcph = (struct tcphdr *)(flag_buf + tcp_offset); + tcph = buf + tcp_offset; + tcph->source = htons(SPORT + flow_id); + tcph->psh = psh; + tcph->check = 0; + tcph->check = tcp_checksum(tcph, CAPACITY_PAYLOAD_LEN); +} + +/* Send a capacity test, 2 packets per flow, all first packets then all second: + * A1 B1 C1 D1 ... A2 B2 C2 D2 ... + */ +static void send_capacity(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + CAPACITY_PAYLOAD_LEN]; + int pkt_size = total_hdr_len + CAPACITY_PAYLOAD_LEN; + int i; + + /* Send first packet of each flow (no PSH) */ + for (i = 0; i < num_flows; i++) { + create_capacity_packet(buf, i, 0, 0); + write_packet(fd, buf, pkt_size, daddr); + } + + /* Send second packet of each flow (with PSH to flush) */ + for (i = 0; i < num_flows; i++) { + create_capacity_packet(buf, i, 1, 1); + write_packet(fd, buf, pkt_size, daddr); + } +} + +#ifndef TH_CWR +#define TH_CWR 0x80 +#endif +static void set_flags(struct tcphdr *tcph, int payload_len, int psh, int syn, + int rst, int urg, int cwr) +{ tcph->psh = psh; tcph->syn = syn; tcph->rst = rst; tcph->urg = urg; + if (cwr) + tcph->th_flags |= TH_CWR; + else + tcph->th_flags &= ~TH_CWR; tcph->check = 0; tcph->check = tcp_checksum(tcph, payload_len); +} + +/* send extra flags of the (NUM_PACKETS / 2) and (NUM_PACKETS / 2 - 1) + * pkts, not first and not last pkt + */ +static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn, + int rst, int urg, int cwr) +{ + static char flag_buf[2][MAX_HDR_LEN + PAYLOAD_LEN]; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + int payload_len, pkt_size, i; + struct tcphdr *tcph; + int flag[2]; + + payload_len = PAYLOAD_LEN * (psh || cwr); + pkt_size = total_hdr_len + payload_len; + flag[0] = NUM_PACKETS / 2; + flag[1] = NUM_PACKETS / 2 - 1; + + /* Create and configure packets with flags + */ + for (i = 0; i < 2; i++) { + if (flag[i] > 0) { + create_packet(flag_buf[i], flag[i] * payload_len, 0, + payload_len, 0); + tcph = (struct tcphdr *)(flag_buf[i] + tcp_offset); + set_flags(tcph, payload_len, psh, syn, rst, urg, cwr); + } + } for (i = 0; i < NUM_PACKETS + 1; i++) { - if (i == flag) { - write_packet(fd, flag_buf, pkt_size, daddr); + if (i == flag[0]) { + write_packet(fd, flag_buf[0], pkt_size, daddr); + continue; + } else if (i == flag[1] && cwr) { + write_packet(fd, flag_buf[1], pkt_size, daddr); continue; } create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); @@ -371,18 +539,20 @@ static void send_data_pkts(int fd, struct sockaddr_ll *daddr, */ static void send_large(int fd, struct sockaddr_ll *daddr, int remainder) { - static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS]; - static char last[TOTAL_HDR_LEN + MSS]; - static char new_seg[TOTAL_HDR_LEN + MSS]; + static char pkts[MAX_LARGE_PKT_CNT][MAX_HDR_LEN + MAX_MSS]; + static char new_seg[MAX_HDR_LEN + MAX_MSS]; + static char last[MAX_HDR_LEN + MAX_MSS]; + const int num_pkt = num_large_pkt(); + const int mss = calc_mss(); int i; - for (i = 0; i < NUM_LARGE_PKT; i++) - create_packet(pkts[i], i * MSS, 0, MSS, 0); - create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0); - create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0); + for (i = 0; i < num_pkt; i++) + create_packet(pkts[i], i * mss, 0, mss, 0); + create_packet(last, num_pkt * mss, 0, remainder, 0); + create_packet(new_seg, (num_pkt + 1) * mss, 0, remainder, 0); - for (i = 0; i < NUM_LARGE_PKT; i++) - write_packet(fd, pkts[i], total_hdr_len + MSS, daddr); + for (i = 0; i < num_pkt; i++) + write_packet(fd, pkts[i], total_hdr_len + mss, daddr); write_packet(fd, last, total_hdr_len + remainder, daddr); write_packet(fd, new_seg, total_hdr_len + remainder, daddr); } @@ -402,8 +572,7 @@ static void send_ack(int fd, struct sockaddr_ll *daddr) static void recompute_packet(char *buf, char *no_ext, int extlen) { struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset); - struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); - struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + int off; memmove(buf, no_ext, total_hdr_len); memmove(buf + total_hdr_len + extlen, @@ -413,11 +582,22 @@ static void recompute_packet(char *buf, char *no_ext, int extlen) tcphdr->check = 0; tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen); if (proto == PF_INET) { - iph->tot_len = htons(ntohs(iph->tot_len) + extlen); - iph->check = 0; - iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + for (off = ETH_HLEN; off < tcp_offset; + off += sizeof(struct iphdr)) { + struct iphdr *iph = (struct iphdr *)(buf + off); + + iph->tot_len = htons(ntohs(iph->tot_len) + extlen); + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } } else { - ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); + for (off = ETH_HLEN; off < tcp_offset; + off += sizeof(struct ipv6hdr)) { + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + off); + + ip6h->payload_len = + htons(ntohs(ip6h->payload_len) + extlen); + } } } @@ -506,6 +686,24 @@ static void send_changed_checksum(int fd, struct sockaddr_ll *daddr) write_packet(fd, buf, pkt_size, daddr); } +/* Packets with incorrect IPv4 header checksum don't coalesce. */ +static void send_changed_ip_checksum(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + int pkt_size = total_hdr_len + PAYLOAD_LEN; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + iph->check = iph->check - 1; + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); +} + /* Packets with non-consecutive sequence number don't coalesce.*/ static void send_changed_seq(int fd, struct sockaddr_ll *daddr) { @@ -627,7 +825,8 @@ static void fix_ip4_checksum(struct iphdr *iph) iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); } -static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) +static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, + enum flush_id_case tcase) { static char buf1[MAX_HDR_LEN + PAYLOAD_LEN]; static char buf2[MAX_HDR_LEN + PAYLOAD_LEN]; @@ -646,7 +845,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); switch (tcase) { - case 0: /* DF=1, Incrementing - should coalesce */ + case FLUSH_ID_DF1_INC: /* DF=1, Incrementing - should coalesce */ iph1->frag_off |= htons(IP_DF); iph1->id = htons(8); @@ -654,7 +853,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) iph2->id = htons(9); break; - case 1: /* DF=1, Fixed - should coalesce */ + case FLUSH_ID_DF1_FIXED: /* DF=1, Fixed - should coalesce */ iph1->frag_off |= htons(IP_DF); iph1->id = htons(8); @@ -662,7 +861,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) iph2->id = htons(8); break; - case 2: /* DF=0, Incrementing - should coalesce */ + case FLUSH_ID_DF0_INC: /* DF=0, Incrementing - should coalesce */ iph1->frag_off &= ~htons(IP_DF); iph1->id = htons(8); @@ -670,7 +869,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) iph2->id = htons(9); break; - case 3: /* DF=0, Fixed - should not coalesce */ + case FLUSH_ID_DF0_FIXED: /* DF=0, Fixed - should coalesce */ iph1->frag_off &= ~htons(IP_DF); iph1->id = htons(8); @@ -678,9 +877,10 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) iph2->id = htons(8); break; - case 4: /* DF=1, two packets incrementing, and one fixed - should - * coalesce only the first two packets - */ + case FLUSH_ID_DF1_INC_FIXED: /* DF=1, two packets incrementing, and + * one fixed - should coalesce only the + * first two packets + */ iph1->frag_off |= htons(IP_DF); iph1->id = htons(8); @@ -692,9 +892,10 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) send_three = true; break; - case 5: /* DF=1, two packets fixed, and one incrementing - should - * coalesce only the first two packets - */ + case FLUSH_ID_DF1_FIXED_INC: /* DF=1, two packets fixed, and one + * incrementing - should coalesce only + * the first two packets + */ iph1->frag_off |= htons(IP_DF); iph1->id = htons(8); @@ -718,27 +919,17 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) } } -static void test_flush_id(int fd, struct sockaddr_ll *daddr, char *fin_pkt) -{ - for (int i = 0; i < num_flush_id_cases; i++) { - sleep(1); - send_flush_id_case(fd, daddr, i); - sleep(1); - write_packet(fd, fin_pkt, total_hdr_len, daddr); - } -} - static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2) { static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE]; create_packet(buf, 0, 0, PAYLOAD_LEN, 0); - add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data1); write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0); - add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data2); write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); } @@ -777,7 +968,7 @@ static void send_fragment4(int fd, struct sockaddr_ll *daddr) */ memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2); fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0); - fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN); + fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN, IPPROTO_TCP); fill_datalinklayer(buf); iph->frag_off = htons(0x6000); // DF = 1, MF = 1 @@ -905,6 +1096,28 @@ static void set_timeout(int fd) error(1, errno, "cannot set timeout, setsockopt failed"); } +static void set_rcvbuf(int fd) +{ + int bufsize = 1 * 1024 * 1024; /* 1 MB */ + + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof(bufsize))) + error(1, errno, "cannot set rcvbuf size, setsockopt failed"); +} + +static void recv_error(int fd, int rcv_errno) +{ + struct tpacket_stats stats; + socklen_t len; + + len = sizeof(stats); + if (getsockopt(fd, SOL_PACKET, PACKET_STATISTICS, &stats, &len)) + error(1, errno, "can't get stats"); + + fprintf(stderr, "Socket stats: packets=%u, drops=%u\n", + stats.tp_packets, stats.tp_drops); + error(1, rcv_errno, "could not receive"); +} + static void check_recv_pkts(int fd, int *correct_payload, int correct_num_pkts) { @@ -929,11 +1142,12 @@ static void check_recv_pkts(int fd, int *correct_payload, ip_ext_len = 0; pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0); if (pkt_size < 0) - error(1, errno, "could not receive"); + recv_error(fd, errno); if (iph->version == 4) ip_ext_len = (iph->ihl - 5) * 4; - else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP) + else if (ip6h->version == 6 && !ip6ip6 && + ip6h->nexthdr != IPPROTO_TCP) ip_ext_len = MIN_EXTHDR_SIZE; tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len); @@ -967,8 +1181,130 @@ static void check_recv_pkts(int fd, int *correct_payload, printf("Test succeeded\n\n"); } +static void check_capacity_pkts(int fd) +{ + static char buffer[IP_MAXPACKET + ETH_HLEN + 1]; + struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN); + int num_pkt = 0, num_coal = 0, pkt_idx; + const char *fail_reason = NULL; + int flow_order[num_flows * 2]; + int coalesced[num_flows]; + struct tcphdr *tcph; + int ip_ext_len = 0; + int total_data = 0; + int pkt_size = -1; + int data_len = 0; + int flow_id; + int sport; + + memset(coalesced, 0, sizeof(coalesced)); + memset(flow_order, -1, sizeof(flow_order)); + + while (1) { + ip_ext_len = 0; + pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0); + if (pkt_size < 0) + recv_error(fd, errno); + + if (iph->version == 4) + ip_ext_len = (iph->ihl - 5) * 4; + else if (ip6h->version == 6 && !ip6ip6 && + ip6h->nexthdr != IPPROTO_TCP) + ip_ext_len = MIN_EXTHDR_SIZE; + + tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len); + + if (tcph->fin) + break; + + sport = ntohs(tcph->source); + flow_id = sport - SPORT; + + if (flow_id < 0 || flow_id >= num_flows) { + vlog("Invalid flow_id %d from sport %d\n", + flow_id, sport); + fail_reason = fail_reason ?: "invalid packet"; + continue; + } + + /* Calculate payload length */ + if (pkt_size == ETH_ZLEN && iph->version == 4) { + data_len = ntohs(iph->tot_len) + - sizeof(struct tcphdr) - sizeof(struct iphdr); + } else { + data_len = pkt_size - total_hdr_len - ip_ext_len; + } + + if (num_pkt < num_flows * 2) { + flow_order[num_pkt] = flow_id; + } else if (num_pkt == num_flows * 2) { + vlog("More packets than expected (%d)\n", + num_flows * 2); + fail_reason = fail_reason ?: "too many packets"; + } + coalesced[flow_id] = data_len; + + if (data_len == CAPACITY_PAYLOAD_LEN * 2) { + num_coal++; + } else { + vlog("Pkt %d: flow %d, sport %d, len %d (expected %d)\n", + num_pkt, flow_id, sport, data_len, + CAPACITY_PAYLOAD_LEN * 2); + fail_reason = fail_reason ?: "not coalesced"; + } + + num_pkt++; + total_data += data_len; + } + + /* Check flow ordering. We expect to see all non-coalesced first segs + * then interleaved coalesced and non-coalesced second frames. + */ + pkt_idx = 0; + for (flow_id = 0; order_check && flow_id < num_flows; flow_id++) { + bool coaled = coalesced[flow_id] > CAPACITY_PAYLOAD_LEN; + + if (coaled) + continue; + + if (flow_order[pkt_idx] != flow_id) { + vlog("Flow order mismatch (non-coalesced) at position %d: expected flow %d, got flow %d\n", + pkt_idx, flow_id, flow_order[pkt_idx]); + fail_reason = fail_reason ?: "bad packet order (1)"; + } + pkt_idx++; + } + for (flow_id = 0; order_check && flow_id < num_flows; flow_id++) { + bool coaled = coalesced[flow_id] > CAPACITY_PAYLOAD_LEN; + + if (flow_order[pkt_idx] != flow_id) { + vlog("Flow order mismatch at position %d: expected flow %d, got flow %d, coalesced: %d\n", + pkt_idx, flow_id, flow_order[pkt_idx], coaled); + fail_reason = fail_reason ?: "bad packet order (2)"; + } + pkt_idx++; + } + + if (!fail_reason) { + vlog("All %d flows coalesced correctly\n", num_flows); + printf("Test succeeded\n\n"); + } else { + printf("FAILED\n"); + } + + /* Always print stats for external validation */ + printf("STATS: received=%d wire=%d coalesced=%d\n", + num_pkt, num_pkt + num_coal, num_coal); + + if (fail_reason) + error(1, 0, "capacity test failed %s", fail_reason); +} + static void gro_sender(void) { + int bufsize = 4 * 1024 * 1024; /* 4 MB */ + const int fin_delay_us = 100 * 1000; static char fin_pkt[MAX_HDR_LEN]; struct sockaddr_ll daddr = {}; int txfd = -1; @@ -977,6 +1313,27 @@ static void gro_sender(void) if (txfd < 0) error(1, errno, "socket creation"); + if (setsockopt(txfd, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize))) + error(1, errno, "cannot set sndbuf size, setsockopt failed"); + + /* Enable SO_TXTIME unless test case generates more than one flow + * SO_TXTIME could result in qdisc layer sorting the packets at sender. + */ + if (strcmp(testname, "single") && strcmp(testname, "capacity")) { + struct sock_txtime so_txtime = { .clockid = CLOCK_MONOTONIC, }; + struct timespec ts; + + if (setsockopt(txfd, SOL_SOCKET, SO_TXTIME, + &so_txtime, sizeof(so_txtime))) + error(1, errno, "setsockopt SO_TXTIME"); + + if (clock_gettime(CLOCK_MONOTONIC, &ts)) + error(1, errno, "clock_gettime"); + + txtime_ns = ts.tv_sec * 1000000000ULL + ts.tv_nsec; + txtime_ns += TXTIME_DELAY_MS * 1000000ULL; + } + memset(&daddr, 0, sizeof(daddr)); daddr.sll_ifindex = if_nametoindex(ifname); if (daddr.sll_ifindex == 0) @@ -986,101 +1343,164 @@ static void gro_sender(void) daddr.sll_halen = ETH_ALEN; create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1); - if (strcmp(testname, "data") == 0) { + /* data sub-tests */ + if (strcmp(testname, "data_same") == 0) { send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - + } else if (strcmp(testname, "data_lrg_sml") == 0) { send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - + } else if (strcmp(testname, "data_lrg_1byte") == 0) { + send_data_pkts(txfd, &daddr, PAYLOAD_LEN, 1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "data_sml_lrg") == 0) { send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "data_burst") == 0) { + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(txfd, buf, total_hdr_len + PAYLOAD_LEN, &daddr); + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + write_packet(txfd, buf, total_hdr_len + PAYLOAD_LEN, &daddr); + + usleep(100 * 1000); /* 100ms */ + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + write_packet(txfd, buf, total_hdr_len + PAYLOAD_LEN, &daddr); + create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0); + write_packet(txfd, buf, total_hdr_len + PAYLOAD_LEN, &daddr); + + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + /* ack test */ } else if (strcmp(testname, "ack") == 0) { send_ack(txfd, &daddr); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - } else if (strcmp(testname, "flags") == 0) { - send_flags(txfd, &daddr, 1, 0, 0, 0); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - send_flags(txfd, &daddr, 0, 1, 0, 0); + /* flags sub-tests */ + } else if (strcmp(testname, "flags_psh") == 0) { + send_flags(txfd, &daddr, 1, 0, 0, 0, 0); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - send_flags(txfd, &daddr, 0, 0, 1, 0); + } else if (strcmp(testname, "flags_syn") == 0) { + send_flags(txfd, &daddr, 0, 1, 0, 0, 0); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - send_flags(txfd, &daddr, 0, 0, 0, 1); + } else if (strcmp(testname, "flags_rst") == 0) { + send_flags(txfd, &daddr, 0, 0, 1, 0, 0); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - } else if (strcmp(testname, "tcp") == 0) { - send_changed_checksum(txfd, &daddr); + } else if (strcmp(testname, "flags_urg") == 0) { + send_flags(txfd, &daddr, 0, 0, 0, 1, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "flags_cwr") == 0) { + send_flags(txfd, &daddr, 0, 0, 0, 0, 1); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + /* tcp sub-tests */ + } else if (strcmp(testname, "tcp_csum") == 0) { + send_changed_checksum(txfd, &daddr); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "tcp_seq") == 0) { send_changed_seq(txfd, &daddr); + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - + } else if (strcmp(testname, "tcp_ts") == 0) { send_changed_ts(txfd, &daddr); + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - + } else if (strcmp(testname, "tcp_opt") == 0) { send_diff_opt(txfd, &daddr); + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - } else if (strcmp(testname, "ip") == 0) { + + /* ip sub-tests - shared between IPv4 and IPv6 */ + } else if (strcmp(testname, "ip_ecn") == 0) { send_changed_ECN(txfd, &daddr); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - + } else if (strcmp(testname, "ip_tos") == 0) { send_changed_tos(txfd, &daddr); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - if (proto == PF_INET) { - /* Modified packets may be received out of order. - * Sleep function added to enforce test boundaries - * so that fin pkts are not received prior to other pkts. - */ - sleep(1); - send_changed_ttl(txfd, &daddr); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - sleep(1); - send_ip_options(txfd, &daddr); - sleep(1); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - sleep(1); - send_fragment4(txfd, &daddr); - sleep(1); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - test_flush_id(txfd, &daddr, fin_pkt); - } else if (proto == PF_INET6) { - sleep(1); - send_fragment6(txfd, &daddr); - sleep(1); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - sleep(1); - /* send IPv6 packets with ext header with same payload */ - send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1); - sleep(1); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - sleep(1); - /* send IPv6 packets with ext header with different payload */ - send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2); - sleep(1); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - } - } else if (strcmp(testname, "large") == 0) { - /* 20 is the difference between min iphdr size - * and min ipv6hdr size. Like MAX_HDR_SIZE, - * MAX_PAYLOAD is defined with the larger header of the two. - */ - int offset = proto == PF_INET ? 20 : 0; - int remainder = (MAX_PAYLOAD + offset) % MSS; + + /* ip sub-tests - IPv4 only */ + } else if (strcmp(testname, "ip_csum") == 0) { + send_changed_ip_checksum(txfd, &daddr); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_ttl") == 0) { + send_changed_ttl(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_opt") == 0) { + send_ip_options(txfd, &daddr); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_frag4") == 0) { + send_fragment4(txfd, &daddr); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_id_df1_inc") == 0) { + send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_INC); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_id_df1_fixed") == 0) { + send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_FIXED); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_id_df0_inc") == 0) { + send_flush_id_case(txfd, &daddr, FLUSH_ID_DF0_INC); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_id_df0_fixed") == 0) { + send_flush_id_case(txfd, &daddr, FLUSH_ID_DF0_FIXED); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_id_df1_inc_fixed") == 0) { + send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_INC_FIXED); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_id_df1_fixed_inc") == 0) { + send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_FIXED_INC); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + /* ip sub-tests - IPv6 only */ + } else if (strcmp(testname, "ip_frag6") == 0) { + send_fragment6(txfd, &daddr); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_v6ext_same") == 0) { + send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_v6ext_diff") == 0) { + send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + /* large sub-tests */ + } else if (strcmp(testname, "large_max") == 0) { + int remainder = max_payload() % calc_mss(); send_large(txfd, &daddr, remainder); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "large_rem") == 0) { + int remainder = max_payload() % calc_mss(); send_large(txfd, &daddr, remainder + 1); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + /* machinery sub-tests */ + } else if (strcmp(testname, "single") == 0) { + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(txfd, buf, total_hdr_len + PAYLOAD_LEN, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "capacity") == 0) { + send_capacity(txfd, &daddr); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else { - error(1, 0, "Unknown testcase"); + error(1, 0, "Unknown testcase: %s", testname); } if (close(txfd)) @@ -1097,146 +1517,207 @@ static void gro_receiver(void) error(1, 0, "socket creation"); setup_sock_filter(rxfd); set_timeout(rxfd); + set_rcvbuf(rxfd); bind_packetsocket(rxfd); + ksft_ready(); + memset(correct_payload, 0, sizeof(correct_payload)); - if (strcmp(testname, "data") == 0) { + /* data sub-tests */ + if (strcmp(testname, "data_same") == 0) { printf("pure data packet of same size: "); correct_payload[0] = PAYLOAD_LEN * 2; check_recv_pkts(rxfd, correct_payload, 1); - + } else if (strcmp(testname, "data_lrg_sml") == 0) { printf("large data packets followed by a smaller one: "); correct_payload[0] = PAYLOAD_LEN * 1.5; check_recv_pkts(rxfd, correct_payload, 1); - + } else if (strcmp(testname, "data_lrg_1byte") == 0) { + printf("large data packet followed by a 1 byte one: "); + correct_payload[0] = PAYLOAD_LEN + 1; + check_recv_pkts(rxfd, correct_payload, 1); + } else if (strcmp(testname, "data_sml_lrg") == 0) { printf("small data packets followed by a larger one: "); correct_payload[0] = PAYLOAD_LEN / 2; correct_payload[1] = PAYLOAD_LEN; check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "data_burst") == 0) { + printf("two bursts of two data packets: "); + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 2); + + /* ack test */ } else if (strcmp(testname, "ack") == 0) { printf("duplicate ack and pure ack: "); check_recv_pkts(rxfd, correct_payload, 3); - } else if (strcmp(testname, "flags") == 0) { + + /* flags sub-tests */ + } else if (strcmp(testname, "flags_psh") == 0) { correct_payload[0] = PAYLOAD_LEN * 3; correct_payload[1] = PAYLOAD_LEN * 2; - printf("psh flag ends coalescing: "); check_recv_pkts(rxfd, correct_payload, 2); - + } else if (strcmp(testname, "flags_syn") == 0) { correct_payload[0] = PAYLOAD_LEN * 2; correct_payload[1] = 0; correct_payload[2] = PAYLOAD_LEN * 2; printf("syn flag ends coalescing: "); check_recv_pkts(rxfd, correct_payload, 3); - + } else if (strcmp(testname, "flags_rst") == 0) { + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = 0; + correct_payload[2] = PAYLOAD_LEN * 2; printf("rst flag ends coalescing: "); check_recv_pkts(rxfd, correct_payload, 3); - + } else if (strcmp(testname, "flags_urg") == 0) { + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = 0; + correct_payload[2] = PAYLOAD_LEN * 2; printf("urg flag ends coalescing: "); check_recv_pkts(rxfd, correct_payload, 3); - } else if (strcmp(testname, "tcp") == 0) { + } else if (strcmp(testname, "flags_cwr") == 0) { correct_payload[0] = PAYLOAD_LEN; - correct_payload[1] = PAYLOAD_LEN; - correct_payload[2] = PAYLOAD_LEN; - correct_payload[3] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN * 2; + correct_payload[2] = PAYLOAD_LEN * 2; + printf("cwr flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 3); + /* tcp sub-tests */ + } else if (strcmp(testname, "tcp_csum") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; printf("changed checksum does not coalesce: "); check_recv_pkts(rxfd, correct_payload, 2); - + } else if (strcmp(testname, "tcp_seq") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; printf("Wrong Seq number doesn't coalesce: "); check_recv_pkts(rxfd, correct_payload, 2); - - printf("Different timestamp doesn't coalesce: "); + } else if (strcmp(testname, "tcp_ts") == 0) { correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + correct_payload[2] = PAYLOAD_LEN; + correct_payload[3] = PAYLOAD_LEN; + printf("Different timestamp doesn't coalesce: "); check_recv_pkts(rxfd, correct_payload, 4); - - printf("Different options doesn't coalesce: "); + } else if (strcmp(testname, "tcp_opt") == 0) { correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + printf("Different options doesn't coalesce: "); check_recv_pkts(rxfd, correct_payload, 2); - } else if (strcmp(testname, "ip") == 0) { + + /* ip sub-tests - shared between IPv4 and IPv6 */ + } else if (strcmp(testname, "ip_ecn") == 0) { correct_payload[0] = PAYLOAD_LEN; correct_payload[1] = PAYLOAD_LEN; - printf("different ECN doesn't coalesce: "); check_recv_pkts(rxfd, correct_payload, 2); - + } else if (strcmp(testname, "ip_tos") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; printf("different tos doesn't coalesce: "); check_recv_pkts(rxfd, correct_payload, 2); - if (proto == PF_INET) { - printf("different ttl doesn't coalesce: "); - check_recv_pkts(rxfd, correct_payload, 2); - - printf("ip options doesn't coalesce: "); - correct_payload[2] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 3); - - printf("fragmented ip4 doesn't coalesce: "); - check_recv_pkts(rxfd, correct_payload, 2); - - /* is_atomic checks */ - printf("DF=1, Incrementing - should coalesce: "); - correct_payload[0] = PAYLOAD_LEN * 2; - check_recv_pkts(rxfd, correct_payload, 1); - - printf("DF=1, Fixed - should coalesce: "); - correct_payload[0] = PAYLOAD_LEN * 2; - check_recv_pkts(rxfd, correct_payload, 1); - - printf("DF=0, Incrementing - should coalesce: "); - correct_payload[0] = PAYLOAD_LEN * 2; - check_recv_pkts(rxfd, correct_payload, 1); - - printf("DF=0, Fixed - should not coalesce: "); - correct_payload[0] = PAYLOAD_LEN; - correct_payload[1] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 2); - - printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: "); - correct_payload[0] = PAYLOAD_LEN * 2; - correct_payload[1] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 2); - - printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: "); - correct_payload[0] = PAYLOAD_LEN * 2; - correct_payload[1] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 2); - } else if (proto == PF_INET6) { - /* GRO doesn't check for ipv6 hop limit when flushing. - * Hence no corresponding test to the ipv4 case. - */ - printf("fragmented ip6 doesn't coalesce: "); - correct_payload[0] = PAYLOAD_LEN * 2; - correct_payload[1] = PAYLOAD_LEN; - correct_payload[2] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 3); - - printf("ipv6 with ext header does coalesce: "); - correct_payload[0] = PAYLOAD_LEN * 2; - check_recv_pkts(rxfd, correct_payload, 1); - - printf("ipv6 with ext header with different payloads doesn't coalesce: "); - correct_payload[0] = PAYLOAD_LEN; - correct_payload[1] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 2); - } - } else if (strcmp(testname, "large") == 0) { - int offset = proto == PF_INET ? 20 : 0; - int remainder = (MAX_PAYLOAD + offset) % MSS; + /* ip sub-tests - IPv4 only */ + } else if (strcmp(testname, "ip_csum") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + correct_payload[2] = PAYLOAD_LEN; + printf("bad ip checksum doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 3); + } else if (strcmp(testname, "ip_ttl") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + printf("different ttl doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "ip_opt") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + correct_payload[2] = PAYLOAD_LEN; + printf("ip options doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 3); + } else if (strcmp(testname, "ip_frag4") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + printf("fragmented ip4 doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "ip_id_df1_inc") == 0) { + printf("DF=1, Incrementing - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + } else if (strcmp(testname, "ip_id_df1_fixed") == 0) { + printf("DF=1, Fixed - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + } else if (strcmp(testname, "ip_id_df0_inc") == 0) { + printf("DF=0, Incrementing - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + } else if (strcmp(testname, "ip_id_df0_fixed") == 0) { + printf("DF=0, Fixed - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + } else if (strcmp(testname, "ip_id_df1_inc_fixed") == 0) { + printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: "); + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "ip_id_df1_fixed_inc") == 0) { + printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: "); + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + + /* ip sub-tests - IPv6 only */ + } else if (strcmp(testname, "ip_frag6") == 0) { + /* GRO doesn't check for ipv6 hop limit when flushing. + * Hence no corresponding test to the ipv4 case. + */ + printf("fragmented ip6 doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + correct_payload[2] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 3); + } else if (strcmp(testname, "ip_v6ext_same") == 0) { + printf("ipv6 with ext header does coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + } else if (strcmp(testname, "ip_v6ext_diff") == 0) { + printf("ipv6 with ext header with different payloads doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); - correct_payload[0] = (MAX_PAYLOAD + offset); + /* large sub-tests */ + } else if (strcmp(testname, "large_max") == 0) { + int remainder = max_payload() % calc_mss(); + + correct_payload[0] = max_payload(); correct_payload[1] = remainder; printf("Shouldn't coalesce if exceed IP max pkt size: "); check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "large_rem") == 0) { + int remainder = max_payload() % calc_mss(); /* last segment sent individually, doesn't start new segment */ - correct_payload[0] = correct_payload[0] - remainder; + correct_payload[0] = max_payload() - remainder; correct_payload[1] = remainder + 1; correct_payload[2] = remainder + 1; + printf("last segment sent individually: "); check_recv_pkts(rxfd, correct_payload, 3); + + /* machinery sub-tests */ + } else if (strcmp(testname, "single") == 0) { + printf("single data packet: "); + correct_payload[0] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 1); + } else if (strcmp(testname, "capacity") == 0) { + check_capacity_pkts(rxfd); + } else { - error(1, 0, "Test case error, should never trigger"); + error(1, 0, "Test case error: unknown testname %s", testname); } if (close(rxfd)) @@ -1251,16 +1732,20 @@ static void parse_args(int argc, char **argv) { "iface", required_argument, NULL, 'i' }, { "ipv4", no_argument, NULL, '4' }, { "ipv6", no_argument, NULL, '6' }, + { "ipip", no_argument, NULL, 'e' }, + { "ip6ip6", no_argument, NULL, 'E' }, + { "num-flows", required_argument, NULL, 'n' }, { "rx", no_argument, NULL, 'r' }, { "saddr", required_argument, NULL, 's' }, { "smac", required_argument, NULL, 'S' }, { "test", required_argument, NULL, 't' }, + { "order-check", no_argument, NULL, 'o' }, { "verbose", no_argument, NULL, 'v' }, { 0, 0, 0, 0 } }; int c; - while ((c = getopt_long(argc, argv, "46d:D:i:rs:S:t:v", opts, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "46d:D:eEi:n:rs:S:t:ov", opts, NULL)) != -1) { switch (c) { case '4': proto = PF_INET; @@ -1270,6 +1755,16 @@ static void parse_args(int argc, char **argv) proto = PF_INET6; ethhdr_proto = htons(ETH_P_IPV6); break; + case 'e': + ipip = true; + proto = PF_INET; + ethhdr_proto = htons(ETH_P_IP); + break; + case 'E': + ip6ip6 = true; + proto = PF_INET6; + ethhdr_proto = htons(ETH_P_IPV6); + break; case 'd': addr4_dst = addr6_dst = optarg; break; @@ -1279,6 +1774,9 @@ static void parse_args(int argc, char **argv) case 'i': ifname = optarg; break; + case 'n': + num_flows = atoi(optarg); + break; case 'r': tx_socket = false; break; @@ -1291,6 +1789,9 @@ static void parse_args(int argc, char **argv) case 't': testname = optarg; break; + case 'o': + order_check = true; + break; case 'v': verbose = true; break; @@ -1305,12 +1806,18 @@ int main(int argc, char **argv) { parse_args(argc, argv); - if (proto == PF_INET) { + if (ipip) { + tcp_offset = ETH_HLEN + sizeof(struct iphdr) * 2; + total_hdr_len = tcp_offset + sizeof(struct tcphdr); + } else if (ip6ip6) { + tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr) * 2; + total_hdr_len = tcp_offset + sizeof(struct tcphdr); + } else if (proto == PF_INET) { tcp_offset = ETH_HLEN + sizeof(struct iphdr); total_hdr_len = tcp_offset + sizeof(struct tcphdr); } else if (proto == PF_INET6) { tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr); - total_hdr_len = MAX_HDR_LEN; + total_hdr_len = tcp_offset + sizeof(struct tcphdr); } else { error(1, 0, "Protocol family is not ipv4 or ipv6"); } @@ -1318,11 +1825,13 @@ int main(int argc, char **argv) read_MAC(src_mac, smac); read_MAC(dst_mac, dmac); - if (tx_socket) + if (tx_socket) { gro_sender(); - else + } else { + /* Only the receiver exit status determines test success. */ gro_receiver(); + fprintf(stderr, "Gro::%s test passed.\n", testname); + } - fprintf(stderr, "Gro::%s test passed.\n", testname); return 0; } diff --git a/tools/testing/selftests/net/lib/ksft.h b/tools/testing/selftests/net/lib/ksft.h new file mode 100644 index 000000000000..03912902a6d3 --- /dev/null +++ b/tools/testing/selftests/net/lib/ksft.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(__NET_KSFT_H__) +#define __NET_KSFT_H__ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +static inline void ksft_ready(void) +{ + const char msg[7] = "ready\n"; + char *env_str; + int fd; + + env_str = getenv("KSFT_READY_FD"); + if (env_str) { + fd = atoi(env_str); + if (!fd) { + fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n", + env_str); + return; + } + } else { + fd = STDOUT_FILENO; + } + + if (write(fd, msg, sizeof(msg)) < 0) + perror("write()"); + if (fd != STDOUT_FILENO) + close(fd); +} + +static inline void ksft_wait(void) +{ + char *env_str; + char byte; + int fd; + + env_str = getenv("KSFT_WAIT_FD"); + if (env_str) { + fd = atoi(env_str); + if (!fd) { + fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n", + env_str); + return; + } + } else { + /* Not running in KSFT env, wait for input from STDIN instead */ + fd = STDIN_FILENO; + } + + if (read(fd, &byte, sizeof(byte)) < 0) + perror("read()"); + if (fd != STDIN_FILENO) + close(fd); +} + +#endif diff --git a/tools/testing/selftests/net/lib/ksft_setup_loopback.sh b/tools/testing/selftests/net/lib/ksft_setup_loopback.sh new file mode 100755 index 000000000000..3defbb1919c5 --- /dev/null +++ b/tools/testing/selftests/net/lib/ksft_setup_loopback.sh @@ -0,0 +1,111 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Setup script for running ksft tests over a real interface in loopback mode. +# This scripts replaces the historical setup_loopback.sh. It puts +# a (presumably) real hardware interface into loopback mode, creates macvlan +# interfaces on top and places them in a network namespace for isolation. +# +# NETIF env variable must be exported to indicate the real target device. +# Note that the test will override NETIF with one of the macvlans, the +# actual ksft test will only see the macvlans. +# +# Example use: +# export NETIF=eth0 +# ./net/lib/ksft_setup_loopback.sh ./drivers/net/gro.py + +if [ -z "$NETIF" ]; then + echo "Error: NETIF variable not set" + exit 1 +fi +if ! [ -d "/sys/class/net/$NETIF" ]; then + echo "Error: Can't find $NETIF, invalid netdevice" + exit 1 +fi + +# Save original settings for cleanup +readonly FLUSH_PATH="/sys/class/net/${NETIF}/gro_flush_timeout" +readonly IRQ_PATH="/sys/class/net/${NETIF}/napi_defer_hard_irqs" +FLUSH_TIMEOUT="$(< "${FLUSH_PATH}")" +readonly FLUSH_TIMEOUT +HARD_IRQS="$(< "${IRQ_PATH}")" +readonly HARD_IRQS + +SERVER_NS=$(mktemp -u server-XXXXXXXX) +readonly SERVER_NS +CLIENT_NS=$(mktemp -u client-XXXXXXXX) +readonly CLIENT_NS +readonly SERVER_MAC="aa:00:00:00:00:02" +readonly CLIENT_MAC="aa:00:00:00:00:01" + +# ksft expects addresses to communicate with remote +export LOCAL_V6=2001:db8:1::1 +export REMOTE_V6=2001:db8:1::2 + +cleanup() { + local exit_code=$? + + echo "Cleaning up..." + + # Remove macvlan interfaces and namespaces + ip -netns "${SERVER_NS}" link del dev server 2>/dev/null || true + ip netns del "${SERVER_NS}" 2>/dev/null || true + ip -netns "${CLIENT_NS}" link del dev client 2>/dev/null || true + ip netns del "${CLIENT_NS}" 2>/dev/null || true + + # Disable loopback + ethtool -K "${NETIF}" loopback off 2>/dev/null || true + sleep 1 + + echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}" + echo "${HARD_IRQS}" >"${IRQ_PATH}" + + exit $exit_code +} + +trap cleanup EXIT INT TERM + +# Enable loopback mode +echo "Enabling loopback on ${NETIF}..." +ethtool -K "${NETIF}" loopback on || { + echo "Failed to enable loopback mode" + exit 1 +} +# The interface may need time to get carrier back, but selftests +# will wait for carrier, so no need to wait / sleep here. + +# Use timer on host to trigger the network stack +# Also disable device interrupt to not depend on NIC interrupt +# Reduce test flakiness caused by unexpected interrupts +echo 100000 >"${FLUSH_PATH}" +echo 50 >"${IRQ_PATH}" + +# Create server namespace with macvlan +ip netns add "${SERVER_NS}" +ip link add link "${NETIF}" dev server address "${SERVER_MAC}" type macvlan +ip link set dev server netns "${SERVER_NS}" +ip -netns "${SERVER_NS}" link set dev server up +ip -netns "${SERVER_NS}" addr add $LOCAL_V6/64 dev server +ip -netns "${SERVER_NS}" link set dev lo up + +# Create client namespace with macvlan +ip netns add "${CLIENT_NS}" +ip link add link "${NETIF}" dev client address "${CLIENT_MAC}" type macvlan +ip link set dev client netns "${CLIENT_NS}" +ip -netns "${CLIENT_NS}" link set dev client up +ip -netns "${CLIENT_NS}" addr add $REMOTE_V6/64 dev client +ip -netns "${CLIENT_NS}" link set dev lo up + +echo "Setup complete!" +echo " Device: ${NETIF}" +echo " Server NS: ${SERVER_NS}" +echo " Client NS: ${CLIENT_NS}" +echo "" + +# Setup environment variables for tests +export NETIF=server +export REMOTE_TYPE=netns +export REMOTE_ARGS="${CLIENT_NS}" + +# Run the command +ip netns exec "${SERVER_NS}" "$@" diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 54d8f5eba810..7c81d86a7e97 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -1,9 +1,37 @@ # SPDX-License-Identifier: GPL-2.0 +""" +Python selftest helpers for netdev. +""" + from .consts import KSRC -from .ksft import * -from .netns import NetNS -from .nsim import * -from .utils import * -from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily -from .ynl import NetshaperFamily +from .ksft import KsftFailEx, KsftSkipEx, KsftXfailEx, ksft_pr, ksft_eq, \ + ksft_ne, ksft_true, ksft_not_none, ksft_in, ksft_not_in, ksft_is, \ + ksft_ge, ksft_gt, ksft_lt, ksft_raises, ksft_busy_wait, \ + ktap_result, ksft_disruptive, ksft_setup, ksft_run, ksft_exit, \ + ksft_variants, KsftNamedVariant +from .netns import NetNS, NetNSEnter +from .nsim import NetdevSim, NetdevSimDev +from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \ + bpftool, ip, ethtool, bpftrace, rand_port, rand_ports, wait_port_listen, \ + wait_file, tool +from .bpf import bpf_map_set, bpf_map_dump, bpf_prog_map_ids +from .ynl import NlError, NlctrlFamily, YnlFamily, \ + EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily +from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily, Netlink + +__all__ = ["KSRC", + "KsftFailEx", "KsftSkipEx", "KsftXfailEx", "ksft_pr", "ksft_eq", + "ksft_ne", "ksft_true", "ksft_not_none", "ksft_in", "ksft_not_in", + "ksft_is", "ksft_ge", "ksft_gt", "ksft_lt", "ksft_raises", + "ksft_busy_wait", "ktap_result", "ksft_disruptive", "ksft_setup", + "ksft_run", "ksft_exit", "ksft_variants", "KsftNamedVariant", + "NetNS", "NetNSEnter", + "CmdExitFailure", "fd_read_timeout", "cmd", "bkg", "defer", + "bpftool", "ip", "ethtool", "bpftrace", "rand_port", "rand_ports", + "wait_port_listen", "wait_file", "tool", + "bpf_map_set", "bpf_map_dump", "bpf_prog_map_ids", + "NetdevSim", "NetdevSimDev", + "NetshaperFamily", "DevlinkFamily", "PSPFamily", "NlError", + "YnlFamily", "EthtoolFamily", "NetdevFamily", "RtnlFamily", + "NlctrlFamily", "RtnlAddrFamily", "Netlink"] diff --git a/tools/testing/selftests/net/lib/py/bpf.py b/tools/testing/selftests/net/lib/py/bpf.py new file mode 100644 index 000000000000..beb6bf2896a8 --- /dev/null +++ b/tools/testing/selftests/net/lib/py/bpf.py @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: GPL-2.0 + +""" +BPF helper utilities for kernel selftests. + +Provides common operations for interacting with BPF maps and programs +via bpftool, used by XDP and other BPF-based test files. +""" + +from .utils import bpftool + +def _format_hex_bytes(value): + """ + Helper function that converts an integer into a formatted hexadecimal byte string. + + Args: + value: An integer representing the number to be converted. + + Returns: + A string representing hexadecimal equivalent of value, with bytes separated by spaces. + """ + hex_str = value.to_bytes(4, byteorder='little', signed=True) + return ' '.join(f'{byte:02x}' for byte in hex_str) + + +def bpf_map_set(map_name, key, value): + """ + Updates an XDP map with a given key-value pair using bpftool. + + Args: + map_name: The name of the XDP map to update. + key: The key to update in the map, formatted as a hexadecimal string. + value: The value to associate with the key, formatted as a hexadecimal string. + """ + key_formatted = _format_hex_bytes(key) + value_formatted = _format_hex_bytes(value) + bpftool( + f"map update name {map_name} key hex {key_formatted} value hex {value_formatted}" + ) + +def bpf_map_dump(map_id): + """Dump all entries of a BPF array map. + + Args: + map_id: Numeric map ID (as returned by bpftool prog show). + + Returns: + A dict mapping formatted key (int) to formatted value (int). + """ + raw = bpftool(f"map dump id {map_id}", json=True) + return {e["formatted"]["key"]: e["formatted"]["value"] for e in raw} + + +def bpf_prog_map_ids(prog_id): + """Get the map name-to-ID mapping for a loaded BPF program. + + Args: + prog_id: Numeric program ID. + + Returns: + A dict mapping map name (str) to map ID (int). + """ + map_ids = bpftool(f"prog show id {prog_id}", json=True)["map_ids"] + maps = {} + for mid in map_ids: + name = bpftool(f"map show id {mid}", json=True)["name"] + maps[name] = mid + return maps diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 3efe005436cd..81287c2daff0 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -1,13 +1,17 @@ # SPDX-License-Identifier: GPL-2.0 -import builtins +import fnmatch import functools +import getopt import inspect +import os +import signal import sys import time import traceback +from collections import namedtuple from .consts import KSFT_MAIN_NAME -from .utils import global_defer_queue +from . import utils KSFT_RESULT = None KSFT_RESULT_ALL = True @@ -26,8 +30,67 @@ class KsftXfailEx(Exception): pass +class KsftTerminate(KeyboardInterrupt): + pass + + +class _KsftArgs: + def __init__(self): + self.list_tests = False + self.filters = [] + + try: + opts, _ = getopt.getopt(sys.argv[1:], 'hlt:T:') + except getopt.GetoptError as e: + print(e, file=sys.stderr) + sys.exit(1) + + for opt, val in opts: + if opt == '-h': + print(f"Usage: {sys.argv[0]} [-h|-l] [-t|-T name]\n" + f"\t-h print help\n" + f"\t-l list tests (filtered, if filters were specified)\n" + f"\t-t name include test\n" + f"\t-T name exclude test", + file=sys.stderr) + sys.exit(0) + elif opt == '-l': + self.list_tests = True + elif opt == '-t': + self.filters.append((True, val)) + elif opt == '-T': + self.filters.append((False, val)) + + +@functools.lru_cache() +def _ksft_supports_color(): + if os.environ.get("NO_COLOR") is not None: + return False + if not hasattr(sys.stdout, "isatty") or not sys.stdout.isatty(): + return False + if os.environ.get("TERM") == "dumb": + return False + return True + + def ksft_pr(*objs, **kwargs): - print("#", *objs, **kwargs) + """ + Print logs to stdout. + + Behaves like print() but log lines will be prefixed + with # to prevent breaking the TAP output formatting. + + Extra arguments (on top of what print() supports): + line_pfx - add extra string before each line + """ + sep = kwargs.pop("sep", " ") + pfx = kwargs.pop("line_pfx", "") + pfx = "#" + (" " + pfx if pfx else "") + kwargs["flush"] = True + + text = sep.join(str(obj) for obj in objs) + prefixed = f"\n{pfx} ".join(text.split('\n')) + print(pfx, prefixed, **kwargs) def _fail(*args): @@ -66,11 +129,21 @@ def ksft_true(a, comment=""): _fail("Check failed", a, "does not eval to True", comment) +def ksft_not_none(a, comment=""): + if a is None: + _fail("Check failed", a, "is None", comment) + + def ksft_in(a, b, comment=""): if a not in b: _fail("Check failed", a, "not in", b, comment) +def ksft_not_in(a, b, comment=""): + if a in b: + _fail("Check failed", a, "in", b, comment) + + def ksft_is(a, b, comment=""): if a is not b: _fail("Check failed", a, "is not", b, comment) @@ -81,6 +154,11 @@ def ksft_ge(a, b, comment=""): _fail("Check failed", a, "<", b, comment) +def ksft_gt(a, b, comment=""): + if a <= b: + _fail("Check failed", a, "<=", b, comment) + + def ksft_lt(a, b, comment=""): if a >= b: _fail("Check failed", a, ">=", b, comment) @@ -115,7 +193,7 @@ def ksft_busy_wait(cond, sleep=0.005, deadline=1, comment=""): time.sleep(sleep) -def ktap_result(ok, cnt=1, case="", comment=""): +def ktap_result(ok, cnt=1, case_name="", comment=""): global KSFT_RESULT_ALL KSFT_RESULT_ALL = KSFT_RESULT_ALL and ok @@ -125,31 +203,46 @@ def ktap_result(ok, cnt=1, case="", comment=""): res += "ok " res += str(cnt) + " " res += KSFT_MAIN_NAME - if case: - res += "." + str(case.__name__) + if case_name: + res += "." + case_name if comment: res += " # " + comment - print(res) + if _ksft_supports_color(): + if comment.startswith(("SKIP", "XFAIL")): + color = "\033[33m" + elif ok: + color = "\033[32m" + else: + color = "\033[31m" + res = color + res + "\033[0m" + print(res, flush=True) + + +def _ksft_defer_arm(state): + """ Allow or disallow the use of defer() """ + utils.GLOBAL_DEFER_ARMED = state def ksft_flush_defer(): global KSFT_RESULT i = 0 - qlen_start = len(global_defer_queue) - while global_defer_queue: + qlen_start = len(utils.GLOBAL_DEFER_QUEUE) + while utils.GLOBAL_DEFER_QUEUE: i += 1 - entry = global_defer_queue.pop() + entry = utils.GLOBAL_DEFER_QUEUE.pop() try: entry.exec_only() - except: + except Exception: ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!") - tb = traceback.format_exc() - for line in tb.strip().split('\n'): - ksft_pr("Defer Exception|", line) + ksft_pr(traceback.format_exc(), line_pfx="Defer Exception|") KSFT_RESULT = False +KsftCaseFunction = namedtuple("KsftCaseFunction", + ['name', 'original_func', 'variants']) + + def ksft_disruptive(func): """ Decorator that marks the test as disruptive (e.g. the test @@ -160,11 +253,47 @@ def ksft_disruptive(func): @functools.wraps(func) def wrapper(*args, **kwargs): if not KSFT_DISRUPTIVE: - raise KsftSkipEx(f"marked as disruptive") + raise KsftSkipEx("marked as disruptive") return func(*args, **kwargs) return wrapper +class KsftNamedVariant: + """ Named string name + argument list tuple for @ksft_variants """ + + def __init__(self, name, *params): + self.params = params + self.name = name or "_".join([str(x) for x in self.params]) + + +def ksft_variants(params): + """ + Decorator defining the sets of inputs for a test. + The parameters will be included in the name of the resulting sub-case. + Parameters can be either single object, tuple or a KsftNamedVariant. + The argument can be a list or a generator. + + Example: + + @ksft_variants([ + (1, "a"), + (2, "b"), + KsftNamedVariant("three", 3, "c"), + ]) + def my_case(cfg, a, b): + pass # ... + + ksft_run(cases=[my_case], args=(cfg, )) + + Will generate cases: + my_case.1_a + my_case.2_b + my_case.three + """ + + return lambda func: KsftCaseFunction(func.__name__, func, params) + + def ksft_setup(env): """ Setup test framework global state from the environment. @@ -178,7 +307,7 @@ def ksft_setup(env): return False try: return bool(int(value)) - except: + except Exception: raise Exception(f"failed to parse {name}") if "DISRUPTIVE" in env: @@ -188,9 +317,42 @@ def ksft_setup(env): return env -def ksft_run(cases=None, globs=None, case_pfx=None, args=()): +def _ksft_intr(signum, frame): + # ksft runner.sh sends 2 SIGTERMs in a row on a timeout + # if we don't ignore the second one it will stop us from handling cleanup + global term_cnt + term_cnt += 1 + if term_cnt == 1: + raise KsftTerminate() + else: + ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...") + + +def _ksft_name_matches(name, pattern): + if '*' in pattern or '?' in pattern or '[' in pattern: + return fnmatch.fnmatchcase(name, pattern) + return name == pattern + + +def _ksft_test_enabled(name, filters): + has_positive = False + for include, pattern in filters: + has_positive |= include + if _ksft_name_matches(name, pattern): + return include + return not has_positive + + +def _ksft_generate_test_cases(cases, globs, case_pfx, args, cli_args): + """Generate a filtered list of (func, args, name) tuples. + + If -l is given, prints matching test names and exits. + """ + cases = cases or [] + test_cases = [] + # If using the globs method find all relevant functions if globs and case_pfx: for key, value in globs.items(): if not callable(value): @@ -200,22 +362,62 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases.append(value) break - totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0} + for func in cases: + if isinstance(func, KsftCaseFunction): + # Parametrized test - create case for each param + for param in func.variants: + if not isinstance(param, KsftNamedVariant): + if not isinstance(param, tuple): + param = (param, ) + param = KsftNamedVariant(None, *param) + + test_cases.append((func.original_func, + (*args, *param.params), + func.name + "." + param.name)) + else: + test_cases.append((func, args, func.__name__)) - print("KTAP version 1") - print("1.." + str(len(cases))) + if cli_args.filters: + test_cases = [tc for tc in test_cases + if _ksft_test_enabled(tc[2], cli_args.filters)] + + if cli_args.list_tests: + for _, _, name in test_cases: + print(name) + sys.exit(0) + + return test_cases + + +def ksft_run(cases=None, globs=None, case_pfx=None, args=()): + cli_args = _KsftArgs() + test_cases = _ksft_generate_test_cases(cases, globs, case_pfx, args, + cli_args) + + global term_cnt + term_cnt = 0 + prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr) + + totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0} global KSFT_RESULT + if KSFT_RESULT is not None: + raise RuntimeError("ksft_run() can't be called multiple times.") + + print("TAP version 13", flush=True) + print("1.." + str(len(test_cases)), flush=True) + cnt = 0 stop = False - for case in cases: + for func, args, name in test_cases: KSFT_RESULT = True cnt += 1 comment = "" cnt_key = "" + _ksft_defer_arm(True) try: - case(*args) + func(*args) except KsftSkipEx as e: comment = "SKIP " + str(e) cnt_key = 'skip' @@ -224,25 +426,38 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cnt_key = 'xfail' except BaseException as e: stop |= isinstance(e, KeyboardInterrupt) - tb = traceback.format_exc() - for line in tb.strip().split('\n'): - ksft_pr("Exception|", line) + ksft_pr(traceback.format_exc(), line_pfx="Exception|") if stop: - ksft_pr("Stopping tests due to KeyboardInterrupt.") + ksft_pr(f"Stopping tests due to {type(e).__name__}.") KSFT_RESULT = False cnt_key = 'fail' + _ksft_defer_arm(False) - ksft_flush_defer() + try: + ksft_flush_defer() + except BaseException as e: + ksft_pr(traceback.format_exc(), line_pfx="Exception|") + if isinstance(e, KeyboardInterrupt): + ksft_pr() + ksft_pr("WARN: defer() interrupted, cleanup may be incomplete.") + ksft_pr(" Attempting to finish cleanup before exiting.") + ksft_pr(" Interrupt again to exit immediately.") + ksft_pr() + stop = True + # Flush was interrupted, try to finish the job best we can + ksft_flush_defer() if not cnt_key: cnt_key = 'pass' if KSFT_RESULT else 'fail' - ktap_result(KSFT_RESULT, cnt, case, comment=comment) + ktap_result(KSFT_RESULT, cnt, name, comment=comment) totals[cnt_key] += 1 if stop: break + signal.signal(signal.SIGTERM, prev_sigterm) + print( f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0" ) diff --git a/tools/testing/selftests/net/lib/py/netns.py b/tools/testing/selftests/net/lib/py/netns.py index ecff85f9074f..8e9317044eef 100644 --- a/tools/testing/selftests/net/lib/py/netns.py +++ b/tools/testing/selftests/net/lib/py/netns.py @@ -1,9 +1,12 @@ # SPDX-License-Identifier: GPL-2.0 from .utils import ip +import ctypes import random import string +libc = ctypes.cdll.LoadLibrary('libc.so.6') + class NetNS: def __init__(self, name=None): @@ -29,3 +32,18 @@ class NetNS: def __repr__(self): return f"NetNS({self.name})" + + +class NetNSEnter: + def __init__(self, ns_name): + self.ns_path = f"/run/netns/{ns_name}" + + def __enter__(self): + self.saved = open("/proc/thread-self/ns/net") + with open(self.ns_path) as ns_file: + libc.setns(ns_file.fileno(), 0) + return self + + def __exit__(self, exc_type, exc_value, traceback): + libc.setns(self.saved.fileno(), 0) + self.saved.close() diff --git a/tools/testing/selftests/net/lib/py/nsim.py b/tools/testing/selftests/net/lib/py/nsim.py index 1a8cbe9acc48..7c640ed64c0b 100644 --- a/tools/testing/selftests/net/lib/py/nsim.py +++ b/tools/testing/selftests/net/lib/py/nsim.py @@ -27,7 +27,7 @@ class NetdevSim: self.port_index = port_index self.ns = ns self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index) - ret = ip("-j link show dev %s" % ifname, ns=ns) + ret = ip("-d -j link show dev %s" % ifname, ns=ns) self.dev = json.loads(ret.stdout)[0] self.ifindex = self.dev["ifindex"] diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 9e3bcddcf3e8..6c44a3d2bbf7 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -1,80 +1,193 @@ # SPDX-License-Identifier: GPL-2.0 -import errno import json as _json -import random +import os import re +import select import socket import subprocess import time +class CmdInitFailure(Exception): + """ Command failed to start. Only raised by bkg(). """ + def __init__(self, msg, cmd_obj): + super().__init__(msg + "\n" + repr(cmd_obj)) + self.cmd = cmd_obj + + class CmdExitFailure(Exception): + """ Command failed (returned non-zero exit code). """ def __init__(self, msg, cmd_obj): - super().__init__(msg) + super().__init__(msg + "\n" + repr(cmd_obj)) self.cmd = cmd_obj +def fd_read_timeout(fd, timeout): + rlist, _, _ = select.select([fd], [], [], timeout) + if rlist: + return os.read(fd, 1024) + raise TimeoutError("Timeout waiting for fd read") + + class cmd: - def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None, timeout=5): + """ + Execute a command on local or remote host. + + @shell defaults to false, and class will try to split @comm into a list + if it's a string with spaces. + + Use bkg() instead to run a command in the background. + """ + def __init__(self, comm, shell=None, fail=True, ns=None, background=False, + host=None, timeout=5, ksft_ready=None, ksft_wait=None): if ns: comm = f'ip netns exec {ns} ' + comm self.stdout = None self.stderr = None self.ret = None + self.ksft_term_fd = None + self.host = host self.comm = comm + if host: self.proc = host.cmd(comm) else: + # If user doesn't explicitly request shell try to avoid it. + if shell is None and isinstance(comm, str) and ' ' in comm: + comm = comm.split() + + # ksft_wait lets us wait for the background process to fully start, + # we pass an FD to the child process, and wait for it to write back. + # Similarly term_fd tells child it's time to exit. + pass_fds = [] + env = os.environ.copy() + if ksft_wait is not None: + wait_fd, self.ksft_term_fd = os.pipe() + pass_fds.append(wait_fd) + env["KSFT_WAIT_FD"] = str(wait_fd) + ksft_ready = True # ksft_wait implies ready + if ksft_ready is not None: + rfd, ready_fd = os.pipe() + pass_fds.append(ready_fd) + env["KSFT_READY_FD"] = str(ready_fd) + self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + stderr=subprocess.PIPE, pass_fds=pass_fds, + env=env) + if ksft_wait is not None: + os.close(wait_fd) + if ksft_ready is not None: + os.close(ready_fd) + msg = fd_read_timeout(rfd, ksft_wait) + os.close(rfd) + if not msg: + terminate = self.proc.poll() is None + self._process_terminate(terminate=terminate, timeout=1) + raise CmdInitFailure("Did not receive ready message", self) if not background: self.process(terminate=False, fail=fail, timeout=timeout) - def process(self, terminate=True, fail=None, timeout=5): - if fail is None: - fail = not terminate - + def _process_terminate(self, terminate, timeout): if terminate: self.proc.terminate() - stdout, stderr = self.proc.communicate(timeout) + stdout, stderr = self.proc.communicate(timeout=timeout) self.stdout = stdout.decode("utf-8") self.stderr = stderr.decode("utf-8") self.proc.stdout.close() self.proc.stderr.close() self.ret = self.proc.returncode + return stdout, stderr + + def process(self, terminate=True, fail=None, timeout=5): + if fail is None: + fail = not terminate + + if self.ksft_term_fd: + os.write(self.ksft_term_fd, b"1") + + stdout, stderr = self._process_terminate(terminate=terminate, + timeout=timeout) if self.proc.returncode != 0 and fail: if len(stderr) > 0 and stderr[-1] == "\n": stderr = stderr[:-1] - raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" % - (self.proc.args, stdout, stderr), self) + raise CmdExitFailure("Command failed", self) + + def __repr__(self): + def str_fmt(name, s): + name += ': ' + return (name + s.strip().replace('\n', '\n' + ' ' * len(name))) + + ret = "CMD" + if self.host: + ret += "[remote]" + if self.ret is None: + ret += f" (unterminated): {self.comm}\n" + elif self.ret == 0: + ret += f" (success): {self.comm}\n" + else: + ret += f": {self.comm}\n" + ret += f" EXIT: {self.ret}\n" + if self.stdout: + ret += str_fmt(" STDOUT", self.stdout) + "\n" + if self.stderr: + ret += str_fmt(" STDERR", self.stderr) + "\n" + return ret.strip() class bkg(cmd): - def __init__(self, comm, shell=True, fail=None, ns=None, host=None, - exit_wait=False): + """ + Run a command in the background. + + Examples usage: + + Run a command on remote host, and wait for it to finish. + This is usually paired with wait_port_listen() to make sure + the command has initialized: + + with bkg("socat ...", exit_wait=True, host=cfg.remote) as nc: + ... + + Run a command and expect it to let us know that it's ready + by writing to a special file descriptor passed via KSFT_READY_FD. + Command will be terminated when we exit the context manager: + + with bkg("my_binary", ksft_wait=5): + """ + def __init__(self, comm, shell=None, fail=None, ns=None, host=None, + exit_wait=False, ksft_ready=None, ksft_wait=None): super().__init__(comm, background=True, - shell=shell, fail=fail, ns=ns, host=host) - self.terminate = not exit_wait + shell=shell, fail=fail, ns=ns, host=host, + ksft_ready=ksft_ready, ksft_wait=ksft_wait) + self.terminate = not exit_wait and not ksft_wait + self._exit_wait = exit_wait self.check_fail = fail + if shell and self.terminate: + print("# Warning: combining shell and terminate is risky!") + print("# SIGTERM may not reach the child on zsh/ksh!") + def __enter__(self): return self def __exit__(self, ex_type, ex_value, ex_tb): - return self.process(terminate=self.terminate, fail=self.check_fail) + terminate = self.terminate + # Force termination on exception, but only if bkg() didn't already exit + # since forcing termination silences failures with fail=None + if self.proc.poll() is None: + terminate = terminate or (self._exit_wait and ex_type is not None) + return self.process(terminate=terminate, fail=self.check_fail) -global_defer_queue = [] +GLOBAL_DEFER_QUEUE = [] +GLOBAL_DEFER_ARMED = False class defer: def __init__(self, func, *args, **kwargs): - global global_defer_queue - if not callable(func): raise Exception("defer created with un-callable object, did you call the function instead of passing its name?") @@ -82,7 +195,9 @@ class defer: self.args = args self.kwargs = kwargs - self._queue = global_defer_queue + if not GLOBAL_DEFER_ARMED: + raise Exception("defer queue not armed, did you use defer() outside of a test case?") + self._queue = GLOBAL_DEFER_QUEUE self._queue.append(self) def __enter__(self): @@ -113,6 +228,10 @@ def tool(name, args, json=None, ns=None, host=None): return cmd_obj +def bpftool(args, json=None, ns=None, host=None): + return tool('bpftool', args, json=json, ns=ns, host=host) + + def ip(args, json=None, ns=None, host=None): if ns: args = f'-netns {ns} ' + args @@ -123,20 +242,67 @@ def ethtool(args, json=None, ns=None, host=None): return tool('ethtool', args, json=json, ns=ns, host=host) -def rand_port(): +def bpftrace(expr, json=None, ns=None, host=None, timeout=None): + """ + Run bpftrace and return map data (if json=True). + The output of bpftrace is inconvenient, so the helper converts + to a dict indexed by map name, e.g.: + { + "@": { ... }, + "@map2": { ... }, + } + """ + cmd_arr = ['bpftrace'] + # Throw in --quiet if json, otherwise the output has two objects + if json: + cmd_arr += ['-f', 'json', '-q'] + if timeout: + expr += ' interval:s:' + str(timeout) + ' { exit(); }' + timeout += 20 + cmd_arr += ['-e', expr] + cmd_obj = cmd(cmd_arr, ns=ns, host=host, shell=False, timeout=timeout) + if json: + # bpftrace prints objects as lines + ret = {} + for l in cmd_obj.stdout.split('\n'): + if not l.strip(): + continue + one = _json.loads(l) + if one.get('type') != 'map': + continue + for k, v in one["data"].items(): + if k.startswith('@'): + k = k.lstrip('@') + ret[k] = v + return ret + return cmd_obj + + +def rand_port(stype=socket.SOCK_STREAM): + """ + Get a random unprivileged port. """ - Get a random unprivileged port, try to make sure it's not already used. + return rand_ports(1, stype)[0] + + +def rand_ports(count, stype=socket.SOCK_STREAM): """ - for _ in range(1000): - port = random.randint(10000, 65535) - try: - with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: - s.bind(("", port)) - return port - except OSError as e: - if e.errno != errno.EADDRINUSE: - raise - raise Exception("Can't find any free unprivileged port") + Get a unique set of random unprivileged ports. + """ + sockets = [] + ports = [] + + try: + for _ in range(count): + s = socket.socket(socket.AF_INET6, stype) + sockets.append(s) + s.bind(("", 0)) + ports.append(s.getsockname()[1]) + finally: + for s in sockets: + s.close() + + return ports def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5): @@ -155,3 +321,21 @@ def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadlin if time.monotonic() > end: raise Exception("Waiting for port listen timed out") time.sleep(sleep) + + +def wait_file(fname, test_fn, sleep=0.005, deadline=5, encoding='utf-8'): + """ + Wait for file contents on the local system to satisfy a condition. + test_fn() should take one argument (file contents) and return whether + condition is met. + """ + end = time.monotonic() + deadline + + with open(fname, "r", encoding=encoding) as fp: + while True: + if test_fn(fp.read()): + break + fp.seek(0) + if time.monotonic() > end: + raise TimeoutError("Wait for file contents failed", fname) + time.sleep(sleep) diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index ad1e36baee2a..2e567062aa6c 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -13,20 +13,27 @@ try: SPEC_PATH = KSFT_DIR / "net/lib/specs" sys.path.append(tools_full_path.as_posix()) - from net.lib.ynl.pyynl.lib import YnlFamily, NlError + from net.lib.ynl.pyynl.lib import YnlFamily, NlError, NlPolicy, Netlink else: # Running in tree tools_full_path = KSRC / "tools" SPEC_PATH = KSRC / "Documentation/netlink/specs" sys.path.append(tools_full_path.as_posix()) - from net.ynl.pyynl.lib import YnlFamily, NlError + from net.ynl.pyynl.lib import YnlFamily, NlError, NlPolicy, Netlink except ModuleNotFoundError as e: ksft_pr("Failed importing `ynl` library from kernel sources") ksft_pr(str(e)) ktap_result(True, comment="SKIP") sys.exit(4) +__all__ = [ + "NlError", "NlPolicy", "Netlink", "YnlFamily", "SPEC_PATH", + "EthtoolFamily", "RtnlFamily", "RtnlAddrFamily", + "NetdevFamily", "NetshaperFamily", "NlctrlFamily", "DevlinkFamily", + "PSPFamily", +] + # # Wrapper classes, loading the right specs # Set schema='' to avoid jsonschema validation, it's slow @@ -39,9 +46,13 @@ class EthtoolFamily(YnlFamily): class RtnlFamily(YnlFamily): def __init__(self, recv_size=0): - super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(), + super().__init__((SPEC_PATH / Path('rt-link.yaml')).as_posix(), schema='', recv_size=recv_size) +class RtnlAddrFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('rt-addr.yaml')).as_posix(), + schema='', recv_size=recv_size) class NetdevFamily(YnlFamily): def __init__(self, recv_size=0): @@ -52,3 +63,20 @@ class NetshaperFamily(YnlFamily): def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(), schema='', recv_size=recv_size) + + +class NlctrlFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('nlctrl.yaml')).as_posix(), + schema='', recv_size=recv_size) + + +class DevlinkFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('devlink.yaml')).as_posix(), + schema='', recv_size=recv_size) + +class PSPFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('psp.yaml')).as_posix(), + schema='', recv_size=recv_size) diff --git a/tools/testing/selftests/net/lib/sh/defer.sh b/tools/testing/selftests/net/lib/sh/defer.sh index 082f5d38321b..47ab78c4d465 100644 --- a/tools/testing/selftests/net/lib/sh/defer.sh +++ b/tools/testing/selftests/net/lib/sh/defer.sh @@ -1,6 +1,10 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Whether to pause and allow debugging when an executed deferred command has a +# non-zero exit code. +: "${DEFER_PAUSE_ON_FAIL:=no}" + # map[(scope_id,track,cleanup_id) -> cleanup_command] # track={d=default | p=priority} declare -A __DEFER__JOBS @@ -38,8 +42,20 @@ __defer__run() local track=$1; shift local defer_ix=$1; shift local defer_key=$(__defer__defer_key $track $defer_ix) + local ret + + eval ${__DEFER__JOBS[$defer_key]} + ret=$? + + if [[ "$DEFER_PAUSE_ON_FAIL" == yes && "$ret" -ne 0 ]]; then + echo "Deferred command (track $track index $defer_ix):" + echo " ${__DEFER__JOBS[$defer_key]}" + echo "... ended with an exit status of $ret" + echo "Hit enter to continue, 'q' to quit" + read a + [[ "$a" == q ]] && exit 1 + fi - ${__DEFER__JOBS[$defer_key]} unset __DEFER__JOBS[$defer_key] } @@ -49,7 +65,7 @@ __defer__schedule() local ndefers=$(__defer__ndefers $track) local ndefers_key=$(__defer__ndefer_key $track) local defer_key=$(__defer__defer_key $track $ndefers) - local defer="$@" + local defer="${@@Q}" __DEFER__JOBS[$defer_key]="$defer" __DEFER__NJOBS[$ndefers_key]=$((ndefers + 1)) diff --git a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c index d988b2e0cee8..e73fab3edd9f 100644 --- a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c @@ -10,4 +10,10 @@ int xdp_dummy_prog(struct xdp_md *ctx) return XDP_PASS; } +SEC("xdp.frags") +int xdp_dummy_prog_frags(struct xdp_md *ctx) +{ + return XDP_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/net/lib/xdp_helper.c b/tools/testing/selftests/net/lib/xdp_helper.c new file mode 100644 index 000000000000..eb025a9f35b1 --- /dev/null +++ b/tools/testing/selftests/net/lib/xdp_helper.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <linux/if_xdp.h> +#include <linux/if_link.h> +#include <net/if.h> +#include <inttypes.h> + +#include "ksft.h" + +#define UMEM_SZ (1U << 16) +#define NUM_DESC (UMEM_SZ / 2048) + + +static void print_usage(const char *bin) +{ + fprintf(stderr, "Usage: %s ifindex queue_id [-z]\n\n" + "where:\n\t-z: force zerocopy mode", bin); +} + +/* this is a simple helper program that creates an XDP socket and does the + * minimum necessary to get bind() to succeed. + * + * this test program is not intended to actually process packets, but could be + * extended in the future if that is actually needed. + * + * it is used by queues.py to ensure the xsk netlinux attribute is set + * correctly. + */ +int main(int argc, char **argv) +{ + struct xdp_umem_reg umem_reg = { 0 }; + struct sockaddr_xdp sxdp = { 0 }; + int num_desc = NUM_DESC; + void *umem_area; + int retry = 0; + int ifindex; + int sock_fd; + int queue; + + if (argc != 3 && argc != 4) { + print_usage(argv[0]); + return 1; + } + + sock_fd = socket(AF_XDP, SOCK_RAW, 0); + if (sock_fd < 0) { + perror("socket creation failed"); + /* if the kernel doesn't support AF_XDP, let the test program + * know with -1. All other error paths return 1. + */ + if (errno == EAFNOSUPPORT) + return -1; + return 1; + } + + /* "Probing mode", just checking if AF_XDP sockets are supported */ + if (!strcmp(argv[1], "-") && !strcmp(argv[2], "-")) { + printf("AF_XDP support detected\n"); + close(sock_fd); + return 0; + } + + ifindex = atoi(argv[1]); + queue = atoi(argv[2]); + + umem_area = mmap(NULL, UMEM_SZ, PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (umem_area == MAP_FAILED) { + perror("mmap failed"); + return 1; + } + + umem_reg.addr = (uintptr_t)umem_area; + umem_reg.len = UMEM_SZ; + umem_reg.chunk_size = 2048; + umem_reg.headroom = 0; + + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg, + sizeof(umem_reg)); + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_FILL_RING, &num_desc, + sizeof(num_desc)); + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &num_desc, + sizeof(num_desc)); + setsockopt(sock_fd, SOL_XDP, XDP_RX_RING, &num_desc, sizeof(num_desc)); + + sxdp.sxdp_family = AF_XDP; + sxdp.sxdp_ifindex = ifindex; + sxdp.sxdp_queue_id = queue; + sxdp.sxdp_flags = 0; + + if (argc > 3) { + if (!strcmp(argv[3], "-z")) { + sxdp.sxdp_flags = XDP_ZEROCOPY; + } else { + print_usage(argv[0]); + return 1; + } + } + + while (1) { + if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0) + break; + + if (errno == EBUSY && retry < 3) { + retry++; + sleep(1); + continue; + } else { + perror("bind failed"); + munmap(umem_area, UMEM_SZ); + close(sock_fd); + return 1; + } + } + + ksft_ready(); + ksft_wait(); + + /* parent program will write a byte to stdin when its ready for this + * helper to exit + */ + + close(sock_fd); + return 0; +} diff --git a/tools/testing/selftests/net/lib/xdp_metadata.bpf.c b/tools/testing/selftests/net/lib/xdp_metadata.bpf.c new file mode 100644 index 000000000000..f71f59215239 --- /dev/null +++ b/tools/testing/selftests/net/lib/xdp_metadata.bpf.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stddef.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/udp.h> +#include <linux/tcp.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +enum { + XDP_PORT = 1, + XDP_PROTO = 4, +} xdp_map_setup_keys; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 5); + __type(key, __u32); + __type(value, __s32); +} map_xdp_setup SEC(".maps"); + +/* RSS hash results: key 0 = hash, key 1 = hash type, + * key 2 = packet count, key 3 = error count. + */ +enum { + RSS_KEY_HASH = 0, + RSS_KEY_TYPE = 1, + RSS_KEY_PKT_CNT = 2, + RSS_KEY_ERR_CNT = 3, +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 4); +} map_rss SEC(".maps"); + +/* Mirror of enum xdp_rss_hash_type from include/net/xdp.h. + * Needed because the enum is not part of UAPI headers. + */ +enum xdp_rss_hash_type { + XDP_RSS_L3_IPV4 = 1U << 0, + XDP_RSS_L3_IPV6 = 1U << 1, + XDP_RSS_L3_DYNHDR = 1U << 2, + XDP_RSS_L4 = 1U << 3, + XDP_RSS_L4_TCP = 1U << 4, + XDP_RSS_L4_UDP = 1U << 5, + XDP_RSS_L4_SCTP = 1U << 6, + XDP_RSS_L4_IPSEC = 1U << 7, + XDP_RSS_L4_ICMP = 1U << 8, +}; + +extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash, + enum xdp_rss_hash_type *rss_type) __ksym; + +static __always_inline __u16 get_dest_port(void *l4, void *data_end, + __u8 protocol) +{ + if (protocol == IPPROTO_UDP) { + struct udphdr *udp = l4; + + if ((void *)(udp + 1) > data_end) + return 0; + return udp->dest; + } else if (protocol == IPPROTO_TCP) { + struct tcphdr *tcp = l4; + + if ((void *)(tcp + 1) > data_end) + return 0; + return tcp->dest; + } + + return 0; +} + +SEC("xdp") +int xdp_rss_hash(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + enum xdp_rss_hash_type rss_type = 0; + struct ethhdr *eth = data; + __u8 l4_proto = 0; + __u32 hash = 0; + __u32 key, val; + void *l4 = NULL; + __u32 *cnt; + int ret; + + if ((void *)(eth + 1) > data_end) + return XDP_PASS; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = (void *)(eth + 1); + + if ((void *)(iph + 1) > data_end) + return XDP_PASS; + l4_proto = iph->protocol; + l4 = (void *)(iph + 1); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = (void *)(eth + 1); + + if ((void *)(ip6h + 1) > data_end) + return XDP_PASS; + l4_proto = ip6h->nexthdr; + l4 = (void *)(ip6h + 1); + } + + if (!l4) + return XDP_PASS; + + /* Filter on the configured protocol (map_xdp_setup key XDP_PROTO). + * When set, only process packets matching the requested L4 protocol. + */ + key = XDP_PROTO; + __s32 *proto_cfg = bpf_map_lookup_elem(&map_xdp_setup, &key); + + if (proto_cfg && *proto_cfg != 0 && l4_proto != (__u8)*proto_cfg) + return XDP_PASS; + + /* Filter on the configured port (map_xdp_setup key XDP_PORT). + * Only applies to protocols with ports (UDP, TCP). + */ + key = XDP_PORT; + __s32 *port_cfg = bpf_map_lookup_elem(&map_xdp_setup, &key); + + if (port_cfg && *port_cfg != 0) { + __u16 dest = get_dest_port(l4, data_end, l4_proto); + + if (!dest || bpf_ntohs(dest) != (__u16)*port_cfg) + return XDP_PASS; + } + + ret = bpf_xdp_metadata_rx_hash(ctx, &hash, &rss_type); + if (ret < 0) { + key = RSS_KEY_ERR_CNT; + cnt = bpf_map_lookup_elem(&map_rss, &key); + if (cnt) + __sync_fetch_and_add(cnt, 1); + return XDP_PASS; + } + + key = RSS_KEY_HASH; + bpf_map_update_elem(&map_rss, &key, &hash, BPF_ANY); + + key = RSS_KEY_TYPE; + val = (__u32)rss_type; + bpf_map_update_elem(&map_rss, &key, &val, BPF_ANY); + + key = RSS_KEY_PKT_CNT; + cnt = bpf_map_lookup_elem(&map_rss, &key); + if (cnt) + __sync_fetch_and_add(cnt, 1); + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c new file mode 100644 index 000000000000..64f05229ab24 --- /dev/null +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -0,0 +1,680 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stddef.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/udp.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +#define MAX_ADJST_OFFSET 256 +#define MAX_PAYLOAD_LEN 5000 +#define MAX_HDR_LEN 64 + +extern int bpf_xdp_pull_data(struct xdp_md *xdp, __u32 len) __ksym __weak; + +enum { + XDP_MODE = 0, + XDP_PORT = 1, + XDP_ADJST_OFFSET = 2, + XDP_ADJST_TAG = 3, +} xdp_map_setup_keys; + +enum { + XDP_MODE_PASS = 0, + XDP_MODE_DROP = 1, + XDP_MODE_TX = 2, + XDP_MODE_TAIL_ADJST = 3, + XDP_MODE_HEAD_ADJST = 4, +} xdp_map_modes; + +enum { + STATS_RX = 0, + STATS_PASS = 1, + STATS_DROP = 2, + STATS_TX = 3, + STATS_ABORT = 4, +} xdp_stats; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 5); + __type(key, __u32); + __type(value, __s32); +} map_xdp_setup SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 5); + __type(key, __u32); + __type(value, __u64); +} map_xdp_stats SEC(".maps"); + +static __u32 min(__u32 a, __u32 b) +{ + return a < b ? a : b; +} + +static void record_stats(struct xdp_md *ctx, __u32 stat_type) +{ + __u64 *count; + + count = bpf_map_lookup_elem(&map_xdp_stats, &stat_type); + + if (count) + __sync_fetch_and_add(count, 1); +} + +static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + void *data, *data_end; + struct ethhdr *eth; + int err; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth)); + if (err) + return NULL; + + data_end = (void *)(long)ctx->data_end; + data = eth = (void *)(long)ctx->data; + + if (data + sizeof(*eth) > data_end) + return NULL; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) + + sizeof(*udph)); + if (err) + return NULL; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + iph = data + sizeof(*eth); + + if (iph + 1 > (struct iphdr *)data_end || + iph->protocol != IPPROTO_UDP) + return NULL; + + udph = data + sizeof(*iph) + sizeof(*eth); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) + + sizeof(*udph)); + if (err) + return NULL; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + ipv6h = data + sizeof(*eth); + + if (ipv6h + 1 > (struct ipv6hdr *)data_end || + ipv6h->nexthdr != IPPROTO_UDP) + return NULL; + + udph = data + sizeof(*ipv6h) + sizeof(*eth); + } else { + return NULL; + } + + if (udph + 1 > (struct udphdr *)data_end) + return NULL; + + if (udph->dest != bpf_htons(port)) + return NULL; + + record_stats(ctx, STATS_RX); + + return udph; +} + +static int xdp_mode_pass(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + record_stats(ctx, STATS_PASS); + + return XDP_PASS; +} + +static int xdp_mode_drop_handler(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + record_stats(ctx, STATS_DROP); + + return XDP_DROP; +} + +static void swap_machdr(void *data) +{ + struct ethhdr *eth = data; + __u8 tmp_mac[ETH_ALEN]; + + __builtin_memcpy(tmp_mac, eth->h_source, ETH_ALEN); + __builtin_memcpy(eth->h_source, eth->h_dest, ETH_ALEN); + __builtin_memcpy(eth->h_dest, tmp_mac, ETH_ALEN); +} + +static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + void *data, *data_end; + struct ethhdr *eth; + int err; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth)); + if (err) + return XDP_PASS; + + data_end = (void *)(long)ctx->data_end; + data = eth = (void *)(long)ctx->data; + + if (data + sizeof(*eth) > data_end) + return XDP_PASS; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph; + __be32 tmp_ip; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) + + sizeof(*udph)); + if (err) + return XDP_PASS; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + iph = data + sizeof(*eth); + + if (iph + 1 > (struct iphdr *)data_end || + iph->protocol != IPPROTO_UDP) + return XDP_PASS; + + udph = data + sizeof(*iph) + sizeof(*eth); + + if (udph + 1 > (struct udphdr *)data_end) + return XDP_PASS; + if (udph->dest != bpf_htons(port)) + return XDP_PASS; + + record_stats(ctx, STATS_RX); + eth = data; + swap_machdr((void *)eth); + + tmp_ip = iph->saddr; + iph->saddr = iph->daddr; + iph->daddr = tmp_ip; + + record_stats(ctx, STATS_TX); + + return XDP_TX; + + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct in6_addr tmp_ipv6; + struct ipv6hdr *ipv6h; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) + + sizeof(*udph)); + if (err) + return XDP_PASS; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + ipv6h = data + sizeof(*eth); + + if (ipv6h + 1 > (struct ipv6hdr *)data_end || + ipv6h->nexthdr != IPPROTO_UDP) + return XDP_PASS; + + udph = data + sizeof(*ipv6h) + sizeof(*eth); + + if (udph + 1 > (struct udphdr *)data_end) + return XDP_PASS; + if (udph->dest != bpf_htons(port)) + return XDP_PASS; + + record_stats(ctx, STATS_RX); + eth = data; + swap_machdr((void *)eth); + + __builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6)); + __builtin_memcpy(&ipv6h->saddr, &ipv6h->daddr, + sizeof(tmp_ipv6)); + __builtin_memcpy(&ipv6h->daddr, &tmp_ipv6, sizeof(tmp_ipv6)); + + record_stats(ctx, STATS_TX); + + return XDP_TX; + } + + return XDP_PASS; +} + +static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + struct ethhdr *eth = data; + __u32 len, len_new; + + if (data + sizeof(*eth) > data_end) + return NULL; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + __u16 total_len; + + if (iph + 1 > (struct iphdr *)data_end) + return NULL; + + iph->tot_len = bpf_htons(bpf_ntohs(iph->tot_len) + offset); + + udph = (void *)eth + sizeof(*iph) + sizeof(*eth); + if (!udph || udph + 1 > (struct udphdr *)data_end) + return NULL; + + len_new = bpf_htons(bpf_ntohs(udph->len) + offset); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h = data + sizeof(*eth); + __u16 payload_len; + + if (ipv6h + 1 > (struct ipv6hdr *)data_end) + return NULL; + + udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth); + if (!udph || udph + 1 > (struct udphdr *)data_end) + return NULL; + + *udp_csum = ~((__u32)udph->check); + + len = ipv6h->payload_len; + len_new = bpf_htons(bpf_ntohs(len) + offset); + ipv6h->payload_len = len_new; + + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + + len = udph->len; + len_new = bpf_htons(bpf_ntohs(udph->len) + offset); + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + } else { + return NULL; + } + + udph->len = len_new; + + return udph; +} + +static __u16 csum_fold_helper(__u32 csum) +{ + return ~((csum & 0xffff) + (csum >> 16)) ? : 0xffff; +} + +static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset, + unsigned long hdr_len) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + __u32 buff_pos, udp_csum = 0; + struct udphdr *udph = NULL; + __u32 buff_len; + + udph = update_pkt(ctx, 0 - offset, &udp_csum); + if (!udph) + return -1; + + buff_len = bpf_xdp_get_buff_len(ctx); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + /* Make sure we have enough data to avoid eating the header */ + if (buff_len - offset < hdr_len) + return -1; + + buff_pos = buff_len - offset; + if (bpf_xdp_load_bytes(ctx, buff_pos, tmp_buff, offset) < 0) + return -1; + + udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (bpf_xdp_adjust_tail(ctx, 0 - offset) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + __u32 buff_pos, udp_csum = 0; + __u32 buff_len, hdr_len, key; + struct udphdr *udph; + __s32 *val; + __u8 tag; + + /* Proceed to update the packet headers before attempting to adjuste + * the tail. Once the tail is adjusted we lose access to the offset + * amount of data at the end of the packet which is crucial to update + * the checksum. + * Since any failure beyond this would abort the packet, we should + * not worry about passing a packet up the stack with wrong headers + */ + udph = update_pkt(ctx, offset, &udp_csum); + if (!udph) + return -1; + + key = XDP_ADJST_TAG; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return -1; + + tag = (__u8)(*val); + + for (int i = 0; i < MAX_ADJST_OFFSET; i++) + __builtin_memcpy(&tmp_buff[i], &tag, 1); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + udp_csum = bpf_csum_diff(0, 0, (__be32 *)tmp_buff, offset, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + buff_len = bpf_xdp_get_buff_len(ctx); + + if (bpf_xdp_adjust_tail(ctx, offset) < 0) { + bpf_printk("Failed to adjust tail\n"); + return -1; + } + + if (bpf_xdp_store_bytes(ctx, buff_len, tmp_buff, offset) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + __s32 *adjust_offset, *val; + unsigned long hdr_len; + void *offset_ptr; + __u32 key; + __u8 tag; + int ret; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + hdr_len = (void *)udph - (void *)(long)ctx->data + + sizeof(struct udphdr); + key = XDP_ADJST_OFFSET; + adjust_offset = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!adjust_offset) + return XDP_PASS; + + if (*adjust_offset < 0) + ret = xdp_adjst_tail_shrnk_data(ctx, + (__u16)(0 - *adjust_offset), + hdr_len); + else + ret = xdp_adjst_tail_grow_data(ctx, (__u16)(*adjust_offset)); + if (ret) + goto abort_pkt; + + record_stats(ctx, STATS_PASS); + return XDP_PASS; + +abort_pkt: + record_stats(ctx, STATS_ABORT); + return XDP_ABORTED; +} + +static int xdp_adjst_head_shrnk_data(struct xdp_md *ctx, __u64 hdr_len, + __u32 offset) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + struct udphdr *udph; + void *offset_ptr; + __u32 udp_csum = 0; + + /* Update the length information in the IP and UDP headers before + * adjusting the headroom. This simplifies accessing the relevant + * fields in the IP and UDP headers for fragmented packets. Any + * failure beyond this point will result in the packet being aborted, + * so we don't need to worry about incorrect length information for + * passed packets. + */ + udph = update_pkt(ctx, (__s16)(0 - offset), &udp_csum); + if (!udph) + return -1; + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + if (bpf_xdp_load_bytes(ctx, hdr_len, tmp_buff, offset) < 0) + return -1; + + udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); + + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (bpf_xdp_load_bytes(ctx, 0, tmp_buff, MAX_ADJST_OFFSET) < 0) + return -1; + + if (bpf_xdp_adjust_head(ctx, offset) < 0) + return -1; + + if (offset > MAX_ADJST_OFFSET) + return -1; + + if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0) + return -1; + + /* Added here to handle clang complain about negative value */ + hdr_len = hdr_len & 0xff; + + if (hdr_len == 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, 0, tmp_buff, hdr_len) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len, + __u32 offset) +{ + char hdr_buff[MAX_HDR_LEN]; + char data_buff[MAX_ADJST_OFFSET]; + void *offset_ptr; + __s32 *val; + __u32 key; + __u8 tag; + __u32 udp_csum = 0; + struct udphdr *udph; + + udph = update_pkt(ctx, (__s16)(offset), &udp_csum); + if (!udph) + return -1; + + key = XDP_ADJST_TAG; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return -1; + + tag = (__u8)(*val); + for (int i = 0; i < MAX_ADJST_OFFSET; i++) + __builtin_memcpy(&data_buff[i], &tag, 1); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + udp_csum = bpf_csum_diff(0, 0, (__be32 *)data_buff, offset, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0) + return -1; + + /* Added here to handle clang complain about negative value */ + hdr_len = hdr_len & 0xff; + + if (hdr_len == 0) + return -1; + + if (bpf_xdp_load_bytes(ctx, 0, hdr_buff, hdr_len) < 0) + return -1; + + if (offset > MAX_ADJST_OFFSET) + return -1; + + if (bpf_xdp_adjust_head(ctx, 0 - offset) < 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, 0, hdr_buff, hdr_len) < 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, hdr_len, data_buff, offset) < 0) + return -1; + + return 0; +} + +static int xdp_head_adjst(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph_ptr = NULL; + __u32 key, size, hdr_len; + __s32 *val; + int res; + + /* Filter packets based on UDP port */ + udph_ptr = filter_udphdr(ctx, port); + if (!udph_ptr) + return XDP_PASS; + + hdr_len = (void *)udph_ptr - (void *)(long)ctx->data + + sizeof(struct udphdr); + + key = XDP_ADJST_OFFSET; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return XDP_PASS; + + switch (*val) { + case -16: + case 16: + size = 16; + break; + case -32: + case 32: + size = 32; + break; + case -64: + case 64: + size = 64; + break; + case -128: + case 128: + size = 128; + break; + case -256: + case 256: + size = 256; + break; + default: + bpf_printk("Invalid adjustment offset: %d\n", *val); + goto abort; + } + + if (*val < 0) + res = xdp_adjst_head_grow_data(ctx, hdr_len, size); + else + res = xdp_adjst_head_shrnk_data(ctx, hdr_len, size); + + if (res) + goto abort; + + record_stats(ctx, STATS_PASS); + return XDP_PASS; + +abort: + record_stats(ctx, STATS_ABORT); + return XDP_ABORTED; +} + +static int xdp_prog_common(struct xdp_md *ctx) +{ + __u32 key, *port; + __s32 *mode; + + key = XDP_MODE; + mode = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!mode) + return XDP_PASS; + + key = XDP_PORT; + port = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!port) + return XDP_PASS; + + switch (*mode) { + case XDP_MODE_PASS: + return xdp_mode_pass(ctx, (__u16)(*port)); + case XDP_MODE_DROP: + return xdp_mode_drop_handler(ctx, (__u16)(*port)); + case XDP_MODE_TX: + return xdp_mode_tx_handler(ctx, (__u16)(*port)); + case XDP_MODE_TAIL_ADJST: + return xdp_adjst_tail(ctx, (__u16)(*port)); + case XDP_MODE_HEAD_ADJST: + return xdp_head_adjst(ctx, (__u16)(*port)); + } + + /* Default action is to simple pass */ + return XDP_PASS; +} + +SEC("xdp") +int xdp_prog(struct xdp_md *ctx) +{ + return xdp_prog_common(ctx); +} + +SEC("xdp.frags") +int xdp_prog_frags(struct xdp_md *ctx) +{ + return xdp_prog_common(ctx); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/net/link_netns.py b/tools/testing/selftests/net/link_netns.py new file mode 100755 index 000000000000..aab043c59d69 --- /dev/null +++ b/tools/testing/selftests/net/link_netns.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import time + +from lib.py import ksft_run, ksft_exit, ksft_true +from lib.py import ip +from lib.py import NetNS, NetNSEnter +from lib.py import RtnlFamily + + +LINK_NETNSID = 100 + + +def test_event() -> None: + with NetNS() as ns1, NetNS() as ns2: + with NetNSEnter(str(ns2)): + rtnl = RtnlFamily() + + rtnl.ntf_subscribe("rtnlgrp-link") + + ip(f"netns set {ns2} {LINK_NETNSID}", ns=str(ns1)) + ip(f"link add netns {ns1} link-netnsid {LINK_NETNSID} dummy1 type dummy") + ip(f"link add netns {ns1} dummy2 type dummy", ns=str(ns2)) + + ip("link del dummy1", ns=str(ns1)) + ip("link del dummy2", ns=str(ns1)) + + time.sleep(1) + rtnl.check_ntf() + ksft_true(rtnl.async_msg_queue.empty(), + "Received unexpected link notification") + + +def validate_link_netns(netns, ifname, link_netnsid) -> bool: + link_info = ip(f"-d link show dev {ifname}", ns=netns, json=True) + if not link_info: + return False + return link_info[0].get("link_netnsid") == link_netnsid + + +def test_link_net() -> None: + configs = [ + # type, common args, type args, fallback to dev_net + ("ipvlan", "link dummy1", "", False), + ("macsec", "link dummy1", "", False), + ("macvlan", "link dummy1", "", False), + ("macvtap", "link dummy1", "", False), + ("vlan", "link dummy1", "id 100", False), + ("gre", "", "local 192.0.2.1", True), + ("vti", "", "local 192.0.2.1", True), + ("ipip", "", "local 192.0.2.1", True), + ("ip6gre", "", "local 2001:db8::1", True), + ("ip6tnl", "", "local 2001:db8::1", True), + ("vti6", "", "local 2001:db8::1", True), + ("sit", "", "local 192.0.2.1", True), + ("xfrm", "", "if_id 1", True), + ] + + with NetNS() as ns1, NetNS() as ns2, NetNS() as ns3: + net1, net2, net3 = str(ns1), str(ns2), str(ns3) + + # prepare link netnsid and a dummy link needed by certain drivers + ip(f"netns set {net3} {LINK_NETNSID}", ns=str(net2)) + ip("link add dummy1 type dummy", ns=net3) + + cases = [ + # source, "netns", "link-netns", expected link-netns + (net3, None, None, None, None), + (net3, net2, None, None, LINK_NETNSID), + (net2, None, net3, LINK_NETNSID, LINK_NETNSID), + (net1, net2, net3, LINK_NETNSID, LINK_NETNSID), + ] + + for src_net, netns, link_netns, exp1, exp2 in cases: + tgt_net = netns or src_net + for typ, cargs, targs, fb_dev_net in configs: + cmd = "link add" + if netns: + cmd += f" netns {netns}" + if link_netns: + cmd += f" link-netns {link_netns}" + cmd += f" {cargs} foo type {typ} {targs}" + ip(cmd, ns=src_net) + if fb_dev_net: + ksft_true(validate_link_netns(tgt_net, "foo", exp1), + f"{typ} link_netns validation failed") + else: + ksft_true(validate_link_netns(tgt_net, "foo", exp2), + f"{typ} link_netns validation failed") + ip(f"link del foo", ns=tgt_net) + + +def test_peer_net() -> None: + types = [ + "vxcan", + "netkit", + "veth", + ] + + with NetNS() as ns1, NetNS() as ns2, NetNS() as ns3, NetNS() as ns4: + net1, net2, net3, net4 = str(ns1), str(ns2), str(ns3), str(ns4) + + ip(f"netns set {net3} {LINK_NETNSID}", ns=str(net2)) + + cases = [ + # source, "netns", "link-netns", "peer netns", expected + (net1, None, None, None, None), + (net1, net2, None, None, None), + (net2, None, net3, None, LINK_NETNSID), + (net1, net2, net3, None, None), + (net2, None, None, net3, LINK_NETNSID), + (net1, net2, None, net3, LINK_NETNSID), + (net2, None, net2, net3, LINK_NETNSID), + (net1, net2, net4, net3, LINK_NETNSID), + ] + + for src_net, netns, link_netns, peer_netns, exp in cases: + tgt_net = netns or src_net + for typ in types: + cmd = "link add" + if netns: + cmd += f" netns {netns}" + if link_netns: + cmd += f" link-netns {link_netns}" + cmd += f" foo type {typ}" + if peer_netns: + cmd += f" peer netns {peer_netns}" + ip(cmd, ns=src_net) + ksft_true(validate_link_netns(tgt_net, "foo", exp), + f"{typ} peer_netns validation failed") + ip(f"link del foo", ns=tgt_net) + + +def main() -> None: + ksft_run([test_event, test_link_net, test_peer_net]) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh new file mode 100755 index 000000000000..881eb399798f --- /dev/null +++ b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh @@ -0,0 +1,246 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Author: Justin Iurman <justin.iurman@uliege.be> +# +# WARNING +# ------- +# This is just a dummy script that triggers encap cases with possible dst cache +# reference loops in affected lwt users (see list below). Some cases are +# pathological configurations for simplicity, others are valid. Overall, we +# don't want this issue to happen, no matter what. In order to catch any +# reference loops, kmemleak MUST be used. The results alone are always blindly +# successful, don't rely on them. Note that the following tests may crash the +# kernel if the fix to prevent lwtunnel_{input|output|xmit}() reentry loops is +# not present. +# +# Affected lwt users so far (please update accordingly if needed): +# - ila_lwt (output only) +# - ioam6_iptunnel (output only) +# - rpl_iptunnel (both input and output) +# - seg6_iptunnel (both input and output) + +source lib.sh + +check_compatibility() +{ + setup_ns tmp_node &>/dev/null + if [ $? != 0 ]; then + echo "SKIP: Cannot create netns." + exit $ksft_skip + fi + + ip link add name veth0 netns $tmp_node type veth \ + peer name veth1 netns $tmp_node &>/dev/null + local ret=$? + + ip -netns $tmp_node link set veth0 up &>/dev/null + ret=$((ret + $?)) + + ip -netns $tmp_node link set veth1 up &>/dev/null + ret=$((ret + $?)) + + if [ $ret != 0 ]; then + echo "SKIP: Cannot configure links." + cleanup_ns $tmp_node + exit $ksft_skip + fi + + lsmod 2>/dev/null | grep -q "ila" + ila_lsmod=$? + [ $ila_lsmod != 0 ] && modprobe ila &>/dev/null + + ip -netns $tmp_node route add 2001:db8:1::/64 \ + encap ila 1:2:3:4 csum-mode no-action ident-type luid \ + hook-type output \ + dev veth0 &>/dev/null + + ip -netns $tmp_node route add 2001:db8:2::/64 \ + encap ioam6 trace prealloc type 0x800000 ns 0 size 4 \ + dev veth0 &>/dev/null + + ip -netns $tmp_node route add 2001:db8:3::/64 \ + encap rpl segs 2001:db8:3::1 dev veth0 &>/dev/null + + ip -netns $tmp_node route add 2001:db8:4::/64 \ + encap seg6 mode inline segs 2001:db8:4::1 dev veth0 &>/dev/null + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ila" + skip_ila=$? + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ioam6" + skip_ioam6=$? + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap rpl" + skip_rpl=$? + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap seg6" + skip_seg6=$? + + cleanup_ns $tmp_node +} + +setup() +{ + setup_ns alpha beta gamma &>/dev/null + + ip link add name veth-alpha netns $alpha type veth \ + peer name veth-betaL netns $beta &>/dev/null + + ip link add name veth-betaR netns $beta type veth \ + peer name veth-gamma netns $gamma &>/dev/null + + ip -netns $alpha link set veth-alpha name veth0 &>/dev/null + ip -netns $beta link set veth-betaL name veth0 &>/dev/null + ip -netns $beta link set veth-betaR name veth1 &>/dev/null + ip -netns $gamma link set veth-gamma name veth0 &>/dev/null + + ip -netns $alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null + ip -netns $alpha link set veth0 up &>/dev/null + ip -netns $alpha link set lo up &>/dev/null + ip -netns $alpha route add 2001:db8:2::/64 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + ip -netns $beta addr add 2001:db8:1::1/64 dev veth0 &>/dev/null + ip -netns $beta addr add 2001:db8:2::1/64 dev veth1 &>/dev/null + ip -netns $beta link set veth0 up &>/dev/null + ip -netns $beta link set veth1 up &>/dev/null + ip -netns $beta link set lo up &>/dev/null + ip -netns $beta route del 2001:db8:2::/64 + ip -netns $beta route add 2001:db8:2::/64 dev veth1 + ip netns exec $beta \ + sysctl -wq net.ipv6.conf.all.forwarding=1 &>/dev/null + + ip -netns $gamma addr add 2001:db8:2::2/64 dev veth0 &>/dev/null + ip -netns $gamma link set veth0 up &>/dev/null + ip -netns $gamma link set lo up &>/dev/null + ip -netns $gamma route add 2001:db8:1::/64 \ + via 2001:db8:2::1 dev veth0 &>/dev/null + + sleep 1 + + ip netns exec $alpha ping6 -c 5 -W 1 2001:db8:2::2 &>/dev/null + if [ $? != 0 ]; then + echo "SKIP: Setup failed." + exit $ksft_skip + fi + + sleep 1 +} + +cleanup() +{ + cleanup_ns $alpha $beta $gamma + [ $ila_lsmod != 0 ] && modprobe -r ila &>/dev/null +} + +run_ila() +{ + if [ $skip_ila != 0 ]; then + echo "SKIP: ila (output)" + return + fi + + ip -netns $beta route del 2001:db8:2::/64 + ip -netns $beta route add 2001:db8:2:0:0:0:0:2/128 \ + encap ila 2001:db8:2:0 csum-mode no-action ident-type luid \ + hook-type output \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: ila (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 + + ip -netns $beta route del 2001:db8:2:0:0:0:0:2/128 + ip -netns $beta route add 2001:db8:2::/64 dev veth1 + sleep 1 +} + +run_ioam6() +{ + if [ $skip_ioam6 != 0 ]; then + echo "SKIP: ioam6 (output)" + return + fi + + ip -netns $beta route change 2001:db8:2::/64 \ + encap ioam6 trace prealloc type 0x800000 ns 1 size 4 \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: ioam6 (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 +} + +run_rpl() +{ + if [ $skip_rpl != 0 ]; then + echo "SKIP: rpl (input)" + echo "SKIP: rpl (output)" + return + fi + + ip -netns $beta route change 2001:db8:2::/64 \ + encap rpl segs 2001:db8:2::2 \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: rpl (input)" + ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 + + echo "TEST: rpl (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 +} + +run_seg6() +{ + if [ $skip_seg6 != 0 ]; then + echo "SKIP: seg6 (input)" + echo "SKIP: seg6 (output)" + return + fi + + ip -netns $beta route change 2001:db8:2::/64 \ + encap seg6 mode inline segs 2001:db8:2::2 \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: seg6 (input)" + ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 + + echo "TEST: seg6 (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 +} + +run() +{ + run_ila + run_ioam6 + run_rpl + run_seg6 +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges." + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool." + exit $ksft_skip +fi + +check_compatibility + +trap cleanup EXIT + +setup +run + +exit $ksft_pass diff --git a/tools/testing/selftests/net/macvlan_mcast_shared_mac.sh b/tools/testing/selftests/net/macvlan_mcast_shared_mac.sh new file mode 100755 index 000000000000..ff5b89347247 --- /dev/null +++ b/tools/testing/selftests/net/macvlan_mcast_shared_mac.sh @@ -0,0 +1,93 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test multicast delivery to macvlan bridge ports when the source MAC +# matches the macvlan's own MAC address (e.g., VRRP virtual MAC shared +# across multiple hosts). +# +# Topology: +# +# NS_SRC NS_BRIDGE +# veth_src (SHARED_MAC) <-----> veth_dst +# | +# +-- macvlan0 (bridge mode, SHARED_MAC) +# +# A multicast packet sent from NS_SRC with source MAC equal to +# macvlan0's MAC must still be delivered to macvlan0. + +source lib.sh + +SHARED_MAC="00:00:5e:00:01:01" +MCAST_ADDR="239.0.0.1" + +setup() { + setup_ns NS_SRC NS_BRIDGE + + ip -net "${NS_BRIDGE}" link add veth_dst type veth \ + peer name veth_src netns "${NS_SRC}" + + ip -net "${NS_SRC}" link set veth_src address "${SHARED_MAC}" + ip -net "${NS_SRC}" link set veth_src up + ip -net "${NS_SRC}" addr add 192.168.1.1/24 dev veth_src + + ip -net "${NS_BRIDGE}" link set veth_dst up + + ip -net "${NS_BRIDGE}" link add macvlan0 link veth_dst \ + type macvlan mode bridge + ip -net "${NS_BRIDGE}" link set macvlan0 address "${SHARED_MAC}" + ip -net "${NS_BRIDGE}" link set macvlan0 up + ip -net "${NS_BRIDGE}" addr add 192.168.1.2/24 dev macvlan0 + + # Accept all multicast so the mc_filter passes for any group. + ip -net "${NS_BRIDGE}" link set macvlan0 allmulticast on +} + +cleanup() { + rm -f "${CAPFILE}" "${CAPOUT}" + cleanup_ns "${NS_SRC}" "${NS_BRIDGE}" +} + +test_macvlan_mcast_shared_mac() { + CAPFILE=$(mktemp) + CAPOUT=$(mktemp) + + echo "Testing multicast delivery to macvlan with shared source MAC" + + # Listen for one ICMP packet on macvlan0. + timeout 5s ip netns exec "${NS_BRIDGE}" \ + tcpdump -i macvlan0 -c 1 -w "${CAPFILE}" icmp &> "${CAPOUT}" & + local pid=$! + if ! slowwait 1 grep -qs "listening" "${CAPOUT}"; then + echo "[FAIL] tcpdump did not start listening" + return "${ksft_fail}" + fi + + # Send multicast ping from NS_SRC; source MAC equals macvlan0's MAC. + ip netns exec "${NS_SRC}" \ + ping -W 0.1 -c 3 -I veth_src "${MCAST_ADDR}" &> /dev/null + + wait "${pid}" + + local count + count=$(tcpdump -r "${CAPFILE}" 2>/dev/null | wc -l) + if [[ "${count}" -ge 1 ]]; then + echo "[ OK ]" + return "${ksft_pass}" + else + echo "[FAIL] expected at least 1 ICMP packet on macvlan0," \ + "got ${count}" + return "${ksft_fail}" + fi +} + +if [ ! -x "$(command -v tcpdump)" ]; then + echo "SKIP: Could not run test without tcpdump tool" + exit "${ksft_skip}" +fi + +trap cleanup EXIT + +setup +test_macvlan_mcast_shared_mac + +exit $? diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore index 49daae73c41e..833279fb34e2 100644 --- a/tools/testing/selftests/net/mptcp/.gitignore +++ b/tools/testing/selftests/net/mptcp/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only mptcp_connect +mptcp_diag mptcp_inq mptcp_sockopt pm_nl_ctl diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index c76525fe2b84..22ba0da2adb8 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -3,15 +3,36 @@ top_srcdir = ../../../../.. CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) - -TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ - simult_flows.sh mptcp_sockopt.sh userspace_pm.sh - -TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq - -TEST_FILES := mptcp_lib.sh settings - -TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh) ../net_helper.sh +CFLAGS += -I$(top_srcdir)/tools/include + +TEST_PROGS := \ + diag.sh \ + mptcp_connect.sh \ + mptcp_connect_checksum.sh \ + mptcp_connect_mmap.sh \ + mptcp_connect_sendfile.sh \ + mptcp_connect_splice.sh \ + mptcp_join.sh \ + mptcp_sockopt.sh \ + pm_netlink.sh \ + simult_flows.sh \ + userspace_pm.sh \ +# end of TEST_PROGS + +TEST_GEN_FILES := \ + mptcp_connect \ + mptcp_diag \ + mptcp_inq \ + mptcp_sockopt \ + pm_nl_ctl \ +# end of TEST_GEN_FILES + +TEST_FILES := \ + mptcp_lib.sh \ + settings \ +# end of TEST_FILES + +TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh) EXTRA_CLEAN := *.pcap diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 4f80014cae49..59051ee2a986 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -1,34 +1,36 @@ -CONFIG_KALLSYMS=y -CONFIG_MPTCP=y -CONFIG_IPV6=y -CONFIG_MPTCP_IPV6=y CONFIG_INET_DIAG=m CONFIG_INET_MPTCP_DIAG=m -CONFIG_VETH=y -CONFIG_NET_SCH_NETEM=m -CONFIG_SYN_COOKIES=y -CONFIG_NETFILTER=y -CONFIG_NETFILTER_ADVANCED=y -CONFIG_NETFILTER_NETLINK=m -CONFIG_NF_TABLES=m -CONFIG_NFT_COMPAT=m -CONFIG_NETFILTER_XTABLES=m -CONFIG_NETFILTER_XT_MATCH_BPF=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_MATCH_STATISTIC=m -CONFIG_NETFILTER_XT_TARGET_MARK=m -CONFIG_NF_TABLES_INET=y -CONFIG_NFT_TPROXY=m -CONFIG_NFT_SOCKET=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IPV6=y CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_IP6_NF_FILTER=m +CONFIG_KALLSYMS=y +CONFIG_MPTCP=y +CONFIG_MPTCP_IPV6=y CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_CLS_ACT=y CONFIG_NET_CLS_FW=m +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XTABLES_LEGACY=y +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=y +CONFIG_NFT_COMPAT=m +CONFIG_NFT_SOCKET=m +CONFIG_NFT_TPROXY=m +CONFIG_SYN_COOKIES=y +CONFIG_VETH=y diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 2bd0c1eb70c5..d847ff1737c3 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -28,7 +28,7 @@ flush_pids() } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null @@ -200,6 +200,62 @@ chk_msk_cestab() "${expected}" "${msg}" "" } +chk_dump_one() +{ + local ss_token + local token + local msg + + ss_token="$(ss -inmHMN $ns | + mptcp_lib_get_info_value "token" "token")" + + token="$(ip netns exec $ns ./mptcp_diag -t $ss_token |\ + awk -F':[ \t]+' '/^token/ {print $2}')" + + msg="....chk dump_one" + + mptcp_lib_print_title "$msg" + if [ -n "$ss_token" ] && [ "$ss_token" = "$token" ]; then + mptcp_lib_pr_ok + mptcp_lib_result_pass "${msg}" + else + mptcp_lib_pr_fail "expected $ss_token found $token" + mptcp_lib_result_fail "${msg}" + ret=${KSFT_FAIL} + fi +} + +chk_dump_subflow() +{ + local inet_diag_token + local subflow_line + local ss_output + local ss_token + local msg + + ss_output=$(ss -tniN $ns) + + subflow_line=$(echo "$ss_output" | \ + grep -m1 -Eo '[0-9.]+:[0-9].+ +[0-9.]+:[0-9.]+') + + ss_token=$(echo "$ss_output" | grep -m1 -Eo 'token:[^ ]+') + + inet_diag_token=$(ip netns exec $ns ./mptcp_diag -s "$subflow_line" | \ + grep -Eo 'token:[^ ]+') + + msg="....chk dump_subflow" + + mptcp_lib_print_title "$msg" + if [ -n "$ss_token" ] && [ "$ss_token" = "$inet_diag_token" ]; then + mptcp_lib_pr_ok + mptcp_lib_result_pass "${msg}" + else + mptcp_lib_pr_fail "expected $ss_token found $inet_diag_token" + mptcp_lib_result_fail "${msg}" + ret=${KSFT_FAIL} + fi +} + msk_info_get_value() { local port="${1}" @@ -290,6 +346,8 @@ chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" chk_msk_inuse 2 chk_msk_cestab 2 +chk_dump_one +chk_dump_subflow flush_pids chk_msk_inuse 0 "2->0" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index d240d02fa443..cbe573c4ab3a 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -33,6 +33,7 @@ #include <linux/tcp.h> #include <linux/time_types.h> #include <linux/sockios.h> +#include <linux/compiler.h> extern int optind; @@ -51,6 +52,7 @@ enum cfg_mode { CFG_MODE_POLL, CFG_MODE_MMAP, CFG_MODE_SENDFILE, + CFG_MODE_SPLICE, }; enum cfg_peek { @@ -123,7 +125,7 @@ static void die_usage(void) fprintf(stderr, "\t-j -- add additional sleep at connection start and tear down " "-- for MPJ tests\n"); fprintf(stderr, "\t-l -- listens mode, accepts incoming connection\n"); - fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n"); + fprintf(stderr, "\t-m [poll|mmap|sendfile|splice] -- use poll(default)/mmap+write/sendfile/splice\n"); fprintf(stderr, "\t-M mark -- set socket packet mark\n"); fprintf(stderr, "\t-o option -- test sockopt <option>\n"); fprintf(stderr, "\t-p num -- use port num\n"); @@ -140,7 +142,7 @@ static void die_usage(void) exit(1); } -static void xerror(const char *fmt, ...) +static void __noreturn xerror(const char *fmt, ...) { va_list ap; @@ -180,13 +182,27 @@ static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen, } static void xgetaddrinfo(const char *node, const char *service, - const struct addrinfo *hints, + struct addrinfo *hints, struct addrinfo **res) { - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { - const char *errstr = getxinfo_strerr(err); + const char *errstr; + + /* glibc starts to support MPTCP since v2.42. + * For older versions, use IPPROTO_TCP to resolve, + * and use TCP/MPTCP to create socket. + * Link: https://sourceware.org/git/?p=glibc.git;a=commit;h=a8e9022e0f82 + */ + if (err == EAI_SOCKTYPE) { + hints->ai_protocol = IPPROTO_TCP; + goto again; + } + + errstr = getxinfo_strerr(err); fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", node ? node : "", service ? service : "", errstr); @@ -243,7 +259,7 @@ static void set_transparent(int fd, int pf) } } -static void set_mptfo(int fd, int pf) +static void set_mptfo(int fd) { int qlen = 25; @@ -292,7 +308,7 @@ static int sock_listen_mptcp(const char * const listenaddr, { int sock = -1; struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, .ai_flags = AI_PASSIVE | AI_NUMERICHOST }; @@ -320,7 +336,7 @@ static int sock_listen_mptcp(const char * const listenaddr, set_transparent(sock, pf); if (cfg_sockopt_types.mptfo) - set_mptfo(sock, pf); + set_mptfo(sock); if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) break; /* success */ @@ -356,7 +372,7 @@ static int sock_connect_mptcp(const char * const remoteaddr, int infd, struct wstate *winfo) { struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; @@ -391,21 +407,18 @@ static int sock_connect_mptcp(const char * const remoteaddr, *peer = a; break; /* success */ } + perror("sendto()"); } else { if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { *peer = a; break; /* success */ } - } - if (cfg_sockopt_types.mptfo) { - perror("sendto()"); - close(sock); - sock = -1; - } else { perror("connect()"); - close(sock); - sock = -1; } + + /* error */ + close(sock); + sock = -1; } freeaddrinfo(addr); @@ -696,8 +709,14 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len); if (bw < 0) { - if (cfg_rcv_trunc) - return 0; + /* expected reset, continue to read */ + if (cfg_rcv_trunc && + (errno == ECONNRESET || + errno == EPIPE)) { + fds.events &= ~POLLOUT; + continue; + } + perror("write"); return 111; } @@ -723,8 +742,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, } if (fds.revents & (POLLERR | POLLNVAL)) { - if (cfg_rcv_trunc) - return 0; + if (cfg_rcv_trunc) { + fds.events &= ~(POLLERR | POLLNVAL); + continue; + } fprintf(stderr, "Unexpected revents: " "POLLERR/POLLNVAL(%x)\n", fds.revents); return 5; @@ -912,6 +933,71 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, return err; } +static int do_splice(const int infd, const int outfd, const size_t len, + struct wstate *winfo) +{ + ssize_t in_bytes, out_bytes; + int pipefd[2]; + int err; + + err = pipe(pipefd); + if (err) { + perror("pipe"); + return 2; + } + +again: + in_bytes = splice(infd, NULL, pipefd[1], NULL, len - winfo->total_len, + SPLICE_F_MOVE | SPLICE_F_MORE); + if (in_bytes < 0) { + perror("splice in"); + err = 3; + } else if (in_bytes > 0) { + out_bytes = splice(pipefd[0], NULL, outfd, NULL, in_bytes, + SPLICE_F_MOVE | SPLICE_F_MORE); + if (out_bytes < 0) { + perror("splice out"); + err = 4; + } else if (in_bytes != out_bytes) { + fprintf(stderr, "Unexpected transfer: %zu vs %zu\n", + in_bytes, out_bytes); + err = 5; + } else { + goto again; + } + } + + close(pipefd[0]); + close(pipefd[1]); + + return err; +} + +static int copyfd_io_splice(int infd, int peerfd, int outfd, unsigned int size, + bool *in_closed_after_out, struct wstate *winfo) +{ + int err; + + if (listen_mode) { + err = do_splice(peerfd, outfd, size, winfo); + if (err) + return err; + + err = do_splice(infd, peerfd, size, winfo); + } else { + err = do_splice(infd, peerfd, size, winfo); + if (err) + return err; + + shut_wr(peerfd); + + err = do_splice(peerfd, outfd, size, winfo); + *in_closed_after_out = true; + } + + return err; +} + static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo) { bool in_closed_after_out = false; @@ -944,6 +1030,14 @@ static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct &in_closed_after_out, winfo); break; + case CFG_MODE_SPLICE: + file_size = get_infd_size(infd); + if (file_size < 0) + return file_size; + ret = copyfd_io_splice(infd, peerfd, outfd, file_size, + &in_closed_after_out, winfo); + break; + default: fprintf(stderr, "Invalid mode %d\n", cfg_mode); @@ -1050,6 +1144,8 @@ static void check_getpeername_connect(int fd) socklen_t salen = sizeof(ss); char a[INET6_ADDRSTRLEN]; char b[INET6_ADDRSTRLEN]; + const char *iface; + size_t len; if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) { perror("getpeername"); @@ -1059,7 +1155,13 @@ static void check_getpeername_connect(int fd) xgetnameinfo((struct sockaddr *)&ss, salen, a, sizeof(a), b, sizeof(b)); - if (strcmp(cfg_host, a) || strcmp(cfg_port, b)) + iface = strchr(cfg_host, '%'); + if (iface) + len = iface - cfg_host; + else + len = strlen(cfg_host) + 1; + + if (strncmp(cfg_host, a, len) || strcmp(cfg_port, b)) fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__, cfg_host, a, cfg_port, b); } @@ -1079,6 +1181,7 @@ int main_loop_s(int listensock) struct pollfd polls; socklen_t salen; int remotesock; + int err = 0; int fd = 0; again: @@ -1111,7 +1214,7 @@ again: SOCK_TEST_TCPULP(remotesock, 0); memset(&winfo, 0, sizeof(winfo)); - copyfd_io(fd, remotesock, 1, true, &winfo); + err = copyfd_io(fd, remotesock, 1, true, &winfo); } else { perror("accept"); return 1; @@ -1120,10 +1223,10 @@ again: if (cfg_input) close(fd); - if (--cfg_repeat > 0) + if (!err && --cfg_repeat > 0) goto again; - return 0; + return err; } static void init_rng(void) @@ -1233,7 +1336,7 @@ void xdisconnect(int fd) else xerror("bad family"); - strcpy(cmd, "ss -M | grep -q "); + strcpy(cmd, "ss -Mnt | grep -q "); cmdlen = strlen(cmd); if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen], sizeof(cmd) - cmdlen)) @@ -1243,7 +1346,7 @@ void xdisconnect(int fd) /* * wait until the pending data is completely flushed and all - * the MPTCP sockets reached the closed status. + * the sockets reached the closed status. * disconnect will bypass/ignore/drop any pending data. */ for (i = 0; ; i += msec_sleep) { @@ -1264,13 +1367,13 @@ void xdisconnect(int fd) int main_loop(void) { + struct addrinfo *peer = NULL; int fd = 0, ret, fd_in = 0; - struct addrinfo *peer; struct wstate winfo; if (cfg_input && cfg_sockopt_types.mptfo) { fd_in = open(cfg_input, O_RDONLY); - if (fd < 0) + if (fd_in < 0) xerror("can't open %s:%d", cfg_input, errno); } @@ -1293,13 +1396,13 @@ again: if (cfg_input && !cfg_sockopt_types.mptfo) { fd_in = open(cfg_input, O_RDONLY); - if (fd < 0) + if (fd_in < 0) xerror("can't open %s:%d", cfg_input, errno); } ret = copyfd_io(fd_in, fd, 1, 0, &winfo); if (ret) - return ret; + goto out; if (cfg_truncate > 0) { shutdown(fd, SHUT_WR); @@ -1320,7 +1423,10 @@ again: close(fd); } - return 0; +out: + if (cfg_input) + close(fd_in); + return ret; } int parse_proto(const char *proto) @@ -1345,12 +1451,15 @@ int parse_mode(const char *mode) return CFG_MODE_MMAP; if (!strcasecmp(mode, "sendfile")) return CFG_MODE_SENDFILE; + if (!strcasecmp(mode, "splice")) + return CFG_MODE_SPLICE; fprintf(stderr, "Unknown test mode: %s\n", mode); fprintf(stderr, "Supported modes are:\n"); fprintf(stderr, "\t\t\"poll\" - interleaved read/write using poll()\n"); fprintf(stderr, "\t\t\"mmap\" - send entire input file (mmap+write), then read response (-l will read input first)\n"); fprintf(stderr, "\t\t\"sendfile\" - send entire input file (sendfile), then read response (-l will read input first)\n"); + fprintf(stderr, "\t\t\"splice\" - send entire input file (splice), then read response (-l will read input first)\n"); die_usage(); @@ -1415,7 +1524,7 @@ static void parse_opts(int argc, char **argv) */ if (cfg_truncate < 0) { cfg_rcv_trunc = true; - signal(SIGPIPE, handle_signal); + signal(SIGPIPE, SIG_IGN); } break; case 'j': diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 5e3c56253274..a6447f7a31fe 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -134,7 +134,7 @@ ns4="" TEST_GROUP="" # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { rm -f "$cin_disconnect" @@ -211,6 +211,11 @@ if $checksum; then done fi +if $capture; then + rndh="${ns1:4}" + mptcp_lib_pr_info "Packet capture files will have this prefix: ${rndh}-" +fi + set_ethtool_flags() { local ns="$1" local dev="$2" @@ -361,7 +366,6 @@ do_transfer() if $capture; then local capuser - local rndh="${connector_ns:4}" if [ -z $SUDO_USER ] ; then capuser="" else @@ -371,81 +375,75 @@ do_transfer() local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}" local capopt="-i any -s 65535 -B 32768 ${capuser}" - ip netns exec ${listener_ns} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 & + ip netns exec ${listener_ns} tcpdump ${capopt} \ + -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 & local cappid_listener=$! - ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 & - local cappid_connector=$! + if [ ${listener_ns} != ${connector_ns} ]; then + ip netns exec ${connector_ns} tcpdump ${capopt} \ + -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 & + local cappid_connector=$! + fi sleep 1 fi - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat -n + mptcp_lib_nstat_init "${listener_ns}" if [ ${listener_ns} != ${connector_ns} ]; then - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat -n - fi - - local stat_synrx_last_l - local stat_ackrx_last_l - local stat_cookietx_last - local stat_cookierx_last - local stat_csum_err_s - local stat_csum_err_c - local stat_tcpfb_last_l - stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") - stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") - stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") - stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") - stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") - - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_args $local_addr < "$sin" > "$sout" & + mptcp_lib_nstat_init "${connector_ns}" + fi + + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + $extra_args $local_addr < "$sin" > "$sout" & local spid=$! mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" local start start=$(date +%s%3N) - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_args $connect_addr < "$cin" > "$cout" & + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_args $connect_addr < "$cin" > "$cout" & local cpid=$! + mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \ + "${connector_ns}" "${port}" "${cpid}" "${spid}" & + local timeout_pid=$! + wait $cpid local retc=$? wait $spid local rets=$? + if kill -0 $timeout_pid; then + # Finished before the timeout: kill the background job + mptcp_lib_kill_group_wait $timeout_pid + timeout_pid=0 + fi + local stop stop=$(date +%s%3N) if $capture; then sleep 1 kill ${cappid_listener} - kill ${cappid_connector} + if [ ${listener_ns} != ${connector_ns} ]; then + kill ${cappid_connector} + fi fi - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat | grep Tcp > /tmp/${listener_ns}.out + mptcp_lib_nstat_get "${listener_ns}" if [ ${listener_ns} != ${connector_ns} ]; then - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat | grep Tcp > /tmp/${connector_ns}.out + mptcp_lib_nstat_get "${connector_ns}" fi local duration duration=$((stop-start)) printf "(duration %05sms) " "${duration}" - if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" - mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \ - "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" echo cat "$capout" @@ -459,38 +457,38 @@ do_transfer() rets=$? local extra="" - local stat_synrx_now_l - local stat_ackrx_now_l - local stat_cookietx_now - local stat_cookierx_now - local stat_ooo_now - local stat_tcpfb_now_l - stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") - stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") - stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") - stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") - - expect_synrx=$((stat_synrx_last_l)) - expect_ackrx=$((stat_ackrx_last_l)) + local stat_synrx + local stat_ackrx + local stat_cookietx + local stat_cookierx + local stat_ooo + local stat_tcpfb + stat_synrx=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + stat_ackrx=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + stat_cookietx=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + stat_cookierx=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + stat_ooo=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") + stat_tcpfb=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") + + expect_synrx=0 + expect_ackrx=0 cookies=$(ip netns exec ${listener_ns} sysctl net.ipv4.tcp_syncookies) cookies=${cookies##*=} if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then - expect_synrx=$((stat_synrx_last_l+connect_per_transfer)) - expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer)) + expect_synrx=${connect_per_transfer} + expect_ackrx=${connect_per_transfer} fi - if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then - mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx_now_l})" \ + if [ ${stat_synrx} -lt ${expect_synrx} ]; then + mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx})" \ "than expected (${expect_synrx})" retc=1 fi - if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then - if [ ${stat_ooo_now} -eq 0 ]; then - mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \ + if [ ${stat_ackrx} -lt ${expect_ackrx} ]; then + if [ ${stat_ooo} -eq 0 ]; then + mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx})" \ "than expected (${expect_ackrx})" rets=1 else @@ -504,47 +502,45 @@ do_transfer() csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") - local csum_err_s_nr=$((csum_err_s - stat_csum_err_s)) - if [ $csum_err_s_nr -gt 0 ]; then - mptcp_lib_pr_fail "server got ${csum_err_s_nr} data checksum error[s]" + if [ $csum_err_s -gt 0 ]; then + mptcp_lib_pr_fail "server got ${csum_err_s} data checksum error[s]" rets=1 fi - local csum_err_c_nr=$((csum_err_c - stat_csum_err_c)) - if [ $csum_err_c_nr -gt 0 ]; then - mptcp_lib_pr_fail "client got ${csum_err_c_nr} data checksum error[s]" + if [ $csum_err_c -gt 0 ]; then + mptcp_lib_pr_fail "client got ${csum_err_c} data checksum error[s]" retc=1 fi fi - if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then + if [ ${stat_ooo} -eq 0 ] && [ ${stat_tcpfb} -gt 0 ]; then mptcp_lib_pr_fail "unexpected fallback to TCP" rets=1 fi if [ $cookies -eq 2 ];then - if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then + if [ $stat_cookietx -eq 0 ] ;then extra+=" WARN: CookieSent: did not advance" fi - if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then + if [ $stat_cookierx -eq 0 ] ;then extra+=" WARN: CookieRecv: did not advance" fi else - if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then + if [ $stat_cookietx -gt 0 ] ;then extra+=" WARN: CookieSent: changed" fi - if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then + if [ $stat_cookierx -gt 0 ] ;then extra+=" WARN: CookieRecv: changed" fi fi - if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then + if [ ${stat_synrx} -gt ${expect_synrx} ]; then extra+=" WARN: SYNRX: expect ${expect_synrx}," - extra+=" got ${stat_synrx_now_l} (probably retransmissions)" + extra+=" got ${stat_synrx} (probably retransmissions)" fi - if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then + if [ ${stat_ackrx} -gt ${expect_ackrx} ]; then extra+=" WARN: ACKRX: expect ${expect_ackrx}," - extra+=" got ${stat_ackrx_now_l} (probably retransmissions)" + extra+=" got ${stat_ackrx} (probably retransmissions)" fi if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh new file mode 100755 index 000000000000..ce93ec2f107f --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -C "${@}" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh new file mode 100755 index 000000000000..5dd30f9394af --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -m mmap "${@}" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh new file mode 100755 index 000000000000..1d16fb1cc9bb --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -m sendfile "${@}" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_splice.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_splice.sh new file mode 100755 index 000000000000..241254a966c9 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_splice.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -m splice "${@}" diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c new file mode 100644 index 000000000000..5e222ba977e4 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c @@ -0,0 +1,439 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025, Kylin Software */ + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <sys/socket.h> + +#include <arpa/inet.h> + +#include <netinet/in.h> + +#include <linux/compiler.h> +#include <linux/inet_diag.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/sock_diag.h> +#include <linux/tcp.h> + +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif + +#define parse_rtattr_nested(tb, max, rta) \ + (parse_rtattr_flags((tb), (max), RTA_DATA(rta), RTA_PAYLOAD(rta), \ + NLA_F_NESTED)) + +struct params { + __u32 target_token; + char subflow_addrs[1024]; +}; + +struct mptcp_info { + __u8 mptcpi_subflows; + __u8 mptcpi_add_addr_signal; + __u8 mptcpi_add_addr_accepted; + __u8 mptcpi_subflows_max; + __u8 mptcpi_add_addr_signal_max; + __u8 mptcpi_add_addr_accepted_max; + __u32 mptcpi_flags; + __u32 mptcpi_token; + __u64 mptcpi_write_seq; + __u64 mptcpi_snd_una; + __u64 mptcpi_rcv_nxt; + __u8 mptcpi_local_addr_used; + __u8 mptcpi_local_addr_max; + __u8 mptcpi_csum_enabled; + __u32 mptcpi_retransmits; + __u64 mptcpi_bytes_retrans; + __u64 mptcpi_bytes_sent; + __u64 mptcpi_bytes_received; + __u64 mptcpi_bytes_acked; + __u8 mptcpi_subflows_total; + __u8 reserved[3]; + __u32 mptcpi_last_data_sent; + __u32 mptcpi_last_data_recv; + __u32 mptcpi_last_ack_recv; +}; + +enum { + MPTCP_SUBFLOW_ATTR_UNSPEC, + MPTCP_SUBFLOW_ATTR_TOKEN_REM, + MPTCP_SUBFLOW_ATTR_TOKEN_LOC, + MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ, + MPTCP_SUBFLOW_ATTR_MAP_SEQ, + MPTCP_SUBFLOW_ATTR_MAP_SFSEQ, + MPTCP_SUBFLOW_ATTR_SSN_OFFSET, + MPTCP_SUBFLOW_ATTR_MAP_DATALEN, + MPTCP_SUBFLOW_ATTR_FLAGS, + MPTCP_SUBFLOW_ATTR_ID_REM, + MPTCP_SUBFLOW_ATTR_ID_LOC, + MPTCP_SUBFLOW_ATTR_PAD, + + __MPTCP_SUBFLOW_ATTR_MAX +}; + +#define MPTCP_SUBFLOW_ATTR_MAX (__MPTCP_SUBFLOW_ATTR_MAX - 1) + +#define MPTCP_SUBFLOW_FLAG_MCAP_REM _BITUL(0) +#define MPTCP_SUBFLOW_FLAG_MCAP_LOC _BITUL(1) +#define MPTCP_SUBFLOW_FLAG_JOIN_REM _BITUL(2) +#define MPTCP_SUBFLOW_FLAG_JOIN_LOC _BITUL(3) +#define MPTCP_SUBFLOW_FLAG_BKUP_REM _BITUL(4) +#define MPTCP_SUBFLOW_FLAG_BKUP_LOC _BITUL(5) +#define MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED _BITUL(6) +#define MPTCP_SUBFLOW_FLAG_CONNECTED _BITUL(7) +#define MPTCP_SUBFLOW_FLAG_MAPVALID _BITUL(8) + +#define rta_getattr(type, value) (*(type *)RTA_DATA(value)) + +static void __noreturn die_perror(const char *msg) +{ + perror(msg); + exit(1); +} + +static void die_usage(int r) +{ + fprintf(stderr, "Usage:\n" + "mptcp_diag -t <token>\n" + "mptcp_diag -s \"<saddr>:<sport> <daddr>:<dport>\"\n"); + exit(r); +} + +static void send_query(int fd, struct inet_diag_req_v2 *r, __u32 proto) +{ + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK + }; + struct { + struct nlmsghdr nlh; + struct inet_diag_req_v2 r; + } req = { + .nlh = { + .nlmsg_len = sizeof(req), + .nlmsg_type = SOCK_DIAG_BY_FAMILY, + .nlmsg_flags = NLM_F_REQUEST + }, + .r = *r + }; + struct rtattr rta_proto; + struct iovec iov[6]; + int iovlen = 0; + + iov[iovlen++] = (struct iovec) { + .iov_base = &req, + .iov_len = sizeof(req) + }; + + if (proto == IPPROTO_MPTCP) { + rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL; + rta_proto.rta_len = RTA_LENGTH(sizeof(proto)); + + iov[iovlen++] = (struct iovec){ &rta_proto, sizeof(rta_proto)}; + iov[iovlen++] = (struct iovec){ &proto, sizeof(proto)}; + req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto)); + } + + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = iovlen + }; + + for (;;) { + if (sendmsg(fd, &msg, 0) < 0) { + if (errno == EINTR) + continue; + die_perror("sendmsg"); + } + break; + } +} + +static void parse_rtattr_flags(struct rtattr *tb[], int max, struct rtattr *rta, + int len, unsigned short flags) +{ + unsigned short type; + + memset(tb, 0, sizeof(struct rtattr *) * (max + 1)); + while (RTA_OK(rta, len)) { + type = rta->rta_type & ~flags; + if (type <= max && !tb[type]) + tb[type] = rta; + rta = RTA_NEXT(rta, len); + } +} + +static void print_info_msg(struct mptcp_info *info) +{ + printf("Token & Flags\n"); + printf("token: %x\n", info->mptcpi_token); + printf("flags: %x\n", info->mptcpi_flags); + printf("csum_enabled: %u\n", info->mptcpi_csum_enabled); + + printf("\nBasic Info\n"); + printf("subflows: %u\n", info->mptcpi_subflows); + printf("subflows_max: %u\n", info->mptcpi_subflows_max); + printf("subflows_total: %u\n", info->mptcpi_subflows_total); + printf("local_addr_used: %u\n", info->mptcpi_local_addr_used); + printf("local_addr_max: %u\n", info->mptcpi_local_addr_max); + printf("add_addr_signal: %u\n", info->mptcpi_add_addr_signal); + printf("add_addr_accepted: %u\n", info->mptcpi_add_addr_accepted); + printf("add_addr_signal_max: %u\n", info->mptcpi_add_addr_signal_max); + printf("add_addr_accepted_max: %u\n", info->mptcpi_add_addr_accepted_max); + + printf("\nTransmission Info\n"); + printf("write_seq: %llu\n", info->mptcpi_write_seq); + printf("snd_una: %llu\n", info->mptcpi_snd_una); + printf("rcv_nxt: %llu\n", info->mptcpi_rcv_nxt); + printf("last_data_sent: %u\n", info->mptcpi_last_data_sent); + printf("last_data_recv: %u\n", info->mptcpi_last_data_recv); + printf("last_ack_recv: %u\n", info->mptcpi_last_ack_recv); + printf("retransmits: %u\n", info->mptcpi_retransmits); + printf("retransmit bytes: %llu\n", info->mptcpi_bytes_retrans); + printf("bytes_sent: %llu\n", info->mptcpi_bytes_sent); + printf("bytes_received: %llu\n", info->mptcpi_bytes_received); + printf("bytes_acked: %llu\n", info->mptcpi_bytes_acked); +} + +/* + * 'print_subflow_info' is from 'mptcp_subflow_info' + * which is a function in 'misc/ss.c' of iproute2. + */ +static void print_subflow_info(struct rtattr *tb[]) +{ + u_int32_t flags = 0; + + printf("It's a mptcp subflow, the subflow info:\n"); + if (tb[MPTCP_SUBFLOW_ATTR_FLAGS]) { + char caps[32 + 1] = { 0 }, *cap = &caps[0]; + + flags = rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_FLAGS]); + + if (flags & MPTCP_SUBFLOW_FLAG_MCAP_REM) + *cap++ = 'M'; + if (flags & MPTCP_SUBFLOW_FLAG_MCAP_LOC) + *cap++ = 'm'; + if (flags & MPTCP_SUBFLOW_FLAG_JOIN_REM) + *cap++ = 'J'; + if (flags & MPTCP_SUBFLOW_FLAG_JOIN_LOC) + *cap++ = 'j'; + if (flags & MPTCP_SUBFLOW_FLAG_BKUP_REM) + *cap++ = 'B'; + if (flags & MPTCP_SUBFLOW_FLAG_BKUP_LOC) + *cap++ = 'b'; + if (flags & MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED) + *cap++ = 'e'; + if (flags & MPTCP_SUBFLOW_FLAG_CONNECTED) + *cap++ = 'c'; + if (flags & MPTCP_SUBFLOW_FLAG_MAPVALID) + *cap++ = 'v'; + + if (flags) + printf(" flags:%s", caps); + } + if (tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM] && + tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC] && + tb[MPTCP_SUBFLOW_ATTR_ID_REM] && + tb[MPTCP_SUBFLOW_ATTR_ID_LOC]) + printf(" token:%04x(id:%u)/%04x(id:%u)", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM]), + rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_REM]), + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC]), + rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_LOC])); + if (tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ]) + printf(" seq:%llu", + rta_getattr(__u64, tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ])); + if (tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ]) + printf(" sfseq:%u", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ])); + if (tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET]) + printf(" ssnoff:%u", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET])); + if (tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN]) + printf(" maplen:%u", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN])); + printf("\n"); +} + +static void parse_nlmsg(struct nlmsghdr *nlh, __u32 proto) +{ + struct inet_diag_msg *r = NLMSG_DATA(nlh); + struct rtattr *tb[INET_DIAG_MAX + 1]; + + parse_rtattr_flags(tb, INET_DIAG_MAX, (struct rtattr *)(r + 1), + nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)), + NLA_F_NESTED); + + if (proto == IPPROTO_MPTCP && tb[INET_DIAG_INFO]) { + int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]); + struct mptcp_info *info; + + /* workaround fort older kernels with less fields */ + if (len < sizeof(*info)) { + info = alloca(sizeof(*info)); + memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len); + memset((char *)info + len, 0, sizeof(*info) - len); + } else { + info = RTA_DATA(tb[INET_DIAG_INFO]); + } + print_info_msg(info); + } + if (proto == IPPROTO_TCP && tb[INET_DIAG_ULP_INFO]) { + struct rtattr *ulpinfo[INET_ULP_INFO_MAX + 1] = { 0 }; + + parse_rtattr_nested(ulpinfo, INET_ULP_INFO_MAX, + tb[INET_DIAG_ULP_INFO]); + + if (ulpinfo[INET_ULP_INFO_MPTCP]) { + struct rtattr *sfinfo[MPTCP_SUBFLOW_ATTR_MAX + 1] = { 0 }; + + parse_rtattr_nested(sfinfo, MPTCP_SUBFLOW_ATTR_MAX, + ulpinfo[INET_ULP_INFO_MPTCP]); + print_subflow_info(sfinfo); + } else { + printf("It's a normal TCP!\n"); + } + } +} + +static void recv_nlmsg(int fd, __u32 proto) +{ + char rcv_buff[8192]; + struct nlmsghdr *nlh = (struct nlmsghdr *)rcv_buff; + struct sockaddr_nl rcv_nladdr = { + .nl_family = AF_NETLINK + }; + struct iovec rcv_iov = { + .iov_base = rcv_buff, + .iov_len = sizeof(rcv_buff) + }; + struct msghdr rcv_msg = { + .msg_name = &rcv_nladdr, + .msg_namelen = sizeof(rcv_nladdr), + .msg_iov = &rcv_iov, + .msg_iovlen = 1 + }; + int len; + + len = recvmsg(fd, &rcv_msg, 0); + + while (NLMSG_OK(nlh, len)) { + if (nlh->nlmsg_type == NLMSG_DONE) { + printf("NLMSG_DONE\n"); + break; + } else if (nlh->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err; + + err = (struct nlmsgerr *)NLMSG_DATA(nlh); + printf("Error %d:%s\n", + -(err->error), strerror(-(err->error))); + break; + } + parse_nlmsg(nlh, proto); + nlh = NLMSG_NEXT(nlh, len); + } +} + +static void get_mptcpinfo(__u32 token) +{ + struct inet_diag_req_v2 r = { + .sdiag_family = AF_INET, + /* Real proto is set via INET_DIAG_REQ_PROTOCOL */ + .sdiag_protocol = IPPROTO_TCP, + .idiag_ext = 1 << (INET_DIAG_INFO - 1), + .id.idiag_cookie[0] = token, + }; + __u32 proto = IPPROTO_MPTCP; + int fd; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + if (fd < 0) + die_perror("Netlink socket"); + + send_query(fd, &r, proto); + recv_nlmsg(fd, proto); + + close(fd); +} + +static void get_subflow_info(char *subflow_addrs) +{ + struct inet_diag_req_v2 r = { + .sdiag_family = AF_INET, + .sdiag_protocol = IPPROTO_TCP, + .idiag_ext = 1 << (INET_DIAG_INFO - 1), + .id.idiag_cookie[0] = INET_DIAG_NOCOOKIE, + .id.idiag_cookie[1] = INET_DIAG_NOCOOKIE, + }; + char saddr[64], daddr[64]; + int sport, dport; + int ret; + int fd; + + ret = sscanf(subflow_addrs, "%[^:]:%d %[^:]:%d", saddr, &sport, daddr, &dport); + if (ret != 4) + die_perror("IP PORT Pairs has style problems!"); + + printf("%s:%d -> %s:%d\n", saddr, sport, daddr, dport); + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + if (fd < 0) + die_perror("Netlink socket"); + + r.id.idiag_sport = htons(sport); + r.id.idiag_dport = htons(dport); + + inet_pton(AF_INET, saddr, &r.id.idiag_src); + inet_pton(AF_INET, daddr, &r.id.idiag_dst); + send_query(fd, &r, IPPROTO_TCP); + recv_nlmsg(fd, IPPROTO_TCP); +} + +static void parse_opts(int argc, char **argv, struct params *p) +{ + int c; + + if (argc < 2) + die_usage(1); + + while ((c = getopt(argc, argv, "ht:s:")) != -1) { + switch (c) { + case 'h': + die_usage(0); + break; + case 't': + sscanf(optarg, "%x", &p->target_token); + break; + case 's': + strncpy(p->subflow_addrs, optarg, + sizeof(p->subflow_addrs) - 1); + break; + default: + die_usage(1); + break; + } + } +} + +int main(int argc, char *argv[]) +{ + struct params p = { 0 }; + + parse_opts(argc, argv, &p); + + if (p.target_token) + get_mptcpinfo(p.target_token); + + if (p.subflow_addrs[0] != '\0') + get_subflow_info(p.subflow_addrs); + + return 0; +} + diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 218aac467321..5716998da192 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -28,6 +28,7 @@ #include <linux/tcp.h> #include <linux/sockios.h> +#include <linux/compiler.h> #ifndef IPPROTO_MPTCP #define IPPROTO_MPTCP 262 @@ -40,7 +41,7 @@ static int pf = AF_INET; static int proto_tx = IPPROTO_MPTCP; static int proto_rx = IPPROTO_MPTCP; -static void die_perror(const char *msg) +static void __noreturn die_perror(const char *msg) { perror(msg); exit(1); @@ -52,7 +53,7 @@ static void die_usage(int r) exit(r); } -static void xerror(const char *fmt, ...) +static void __noreturn xerror(const char *fmt, ...) { va_list ap; @@ -72,13 +73,22 @@ static const char *getxinfo_strerr(int err) } static void xgetaddrinfo(const char *node, const char *service, - const struct addrinfo *hints, + struct addrinfo *hints, struct addrinfo **res) { - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { - const char *errstr = getxinfo_strerr(err); + const char *errstr; + + if (err == EAI_SOCKTYPE) { + hints->ai_protocol = IPPROTO_TCP; + goto again; + } + + errstr = getxinfo_strerr(err); fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", node ? node : "", service ? service : "", errstr); @@ -91,7 +101,7 @@ static int sock_listen_mptcp(const char * const listenaddr, { int sock = -1; struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, .ai_flags = AI_PASSIVE | AI_NUMERICHOST }; @@ -136,7 +146,7 @@ static int sock_connect_mptcp(const char * const remoteaddr, const char * const port, int proto) { struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; @@ -493,6 +503,7 @@ static int server(int unixfd) process_one_client(r, unixfd); + close(fd); return 0; } @@ -571,8 +582,12 @@ int main(int argc, char *argv[]) die_perror("pipe"); s = xfork(); - if (s == 0) - return server(unixfds[1]); + if (s == 0) { + close(unixfds[0]); + ret = server(unixfds[1]); + close(unixfds[1]); + return ret; + } close(unixfds[1]); diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 13a3b68181ee..beec41f6662a 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -8,7 +8,7 @@ # ShellCheck incorrectly believes that most of the code here is unreachable # because it's invoked by variable name, see how the "tests" array is used -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 . "$(dirname "${0}")/mptcp_lib.sh" @@ -62,6 +62,8 @@ unset sflags unset fastclose unset fullmesh unset speed +unset bind_addr +unset join_syn_rej unset join_csum_ns1 unset join_csum_ns2 unset join_fail_nr @@ -73,6 +75,17 @@ unset join_create_err unset join_bind_err unset join_connect_err +unset fb_ns1 +unset fb_ns2 +unset fb_infinite_map_tx +unset fb_dss_corruption +unset fb_simult_conn +unset fb_mpc_passive +unset fb_mpc_active +unset fb_mpc_data +unset fb_md5_sig +unset fb_dss + # generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) || # (ip6 && (ip6[74] & 0xf0) == 0x30)'" CBPF_MPTCP_SUBOPTION_ADD_ADDR="14, @@ -91,6 +104,24 @@ CBPF_MPTCP_SUBOPTION_ADD_ADDR="14, 6 0 0 65535, 6 0 0 0" +# IPv4: TCP hdr of 48B, a first suboption of 12B (DACK8), the RM_ADDR suboption +# generated using "nfbpf_compile '(ip[32] & 0xf0) == 0xc0 && ip[53] == 0x0c && +# (ip[66] & 0xf0) == 0x40'" +CBPF_MPTCP_SUBOPTION_RM_ADDR="13, + 48 0 0 0, + 84 0 0 240, + 21 0 9 64, + 48 0 0 32, + 84 0 0 240, + 21 0 6 192, + 48 0 0 53, + 21 0 4 12, + 48 0 0 66, + 84 0 0 240, + 21 0 1 64, + 6 0 0 65535, + 6 0 0 0" + init_partial() { capout=$(mktemp) @@ -346,6 +377,7 @@ reset_with_add_addr_timeout() tables="${ip6tables}" fi + # set a maximum, to avoid too long timeout with exponential backoff ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 if ! ip netns exec $ns2 $tables -A OUTPUT -p tcp \ @@ -589,8 +621,7 @@ wait_rm_addr() local old_cnt="${2}" local cnt - local i - for i in $(seq 10); do + for _ in $(seq 10); do cnt=$(rm_addr_count ${ns}) [ "$cnt" = "${old_cnt}" ] || break sleep 0.1 @@ -609,29 +640,46 @@ wait_rm_sf() local old_cnt="${2}" local cnt - local i - for i in $(seq 10); do + for _ in $(seq 10); do cnt=$(rm_sf_count ${ns}) [ "$cnt" = "${old_cnt}" ] || break sleep 0.1 done } +# $1: expected MPJ ACK Rx counter in $ns1 wait_mpj() { - local ns="${1}" - local cnt old_cnt + local exp_cnt="${1}" + local cnt - old_cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx") + for _ in $(seq 10); do + cnt=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx") + [ "${cnt}" = "${exp_cnt}" ] && break + sleep 0.1 + done +} - local i - for i in $(seq 10); do - cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx") - [ "$cnt" = "${old_cnt}" ] || break +wait_ll_ready() +{ + local ns="${1}" + + for _ in $(seq 50); do + ip -n "${ns}" -6 addr show scope link | grep "inet6 fe80" | + grep -qw "tentative" || break sleep 0.1 done } +get_ll_addr() +{ + local ns="${1}" + local iface="${2}" + + ip -n "${ns}" -6 addr show dev "${iface}" scope link | + grep "inet6 fe80" | sed 's#.*\(fe80::.*\)/.*#\1#' +} + kill_events_pids() { mptcp_lib_kill_wait $evts_ns1_pid @@ -938,6 +986,9 @@ do_transfer() local FAILING_LINKS=${FAILING_LINKS:-""} local fastclose=${fastclose:-""} local speed=${speed:-"fast"} + local bind_addr=${bind_addr:-"::"} + local listener_in="${sin}" + local connector_in="${cin}" port=$(get_port) :> "$cout" @@ -945,10 +996,8 @@ do_transfer() cond_start_capture ${listener_ns} - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat -n - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat -n + mptcp_lib_nstat_init "${listener_ns}" + mptcp_lib_nstat_init "${connector_ns}" local extra_args if [ $speed = "fast" ]; then @@ -986,42 +1035,40 @@ do_transfer() extra_srv_args="$extra_args $extra_srv_args" if [ "$test_linkfail" -gt 1 ];then - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_srv_args "::" < "$sinfail" > "$sout" & - else - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_srv_args "::" < "$sin" > "$sout" & + listener_in="${sinfail}" fi + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p ${port} -s ${srv_proto} \ + ${extra_srv_args} "${bind_addr}" < "${listener_in}" > "${sout}" & local spid=$! mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" extra_cl_args="$extra_args $extra_cl_args" if [ "$test_linkfail" -eq 0 ];then - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_cl_args $connect_addr < "$cin" > "$cout" & + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr < "$cin" > "$cout" & elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then + connector_in="${cinsent}" ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \ tee "$cinsent" | \ - timeout ${timeout_test} \ ip netns exec ${connector_ns} \ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ $extra_cl_args $connect_addr > "$cout" & else + connector_in="${cinsent}" tee "$cinsent" < "$cinfail" | \ - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_cl_args $connect_addr > "$cout" & + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr > "$cout" & fi local cpid=$! + mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \ + "${connector_ns}" "${port}" "${cpid}" "${spid}" & + local timeout_pid=$! + pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr check_cestab $listener_ns $connector_ns @@ -1030,31 +1077,26 @@ do_transfer() wait $spid local rets=$? + if kill -0 $timeout_pid; then + # Finished before the timeout: kill the background job + mptcp_lib_kill_group_wait $timeout_pid + timeout_pid=0 + fi + cond_stop_capture - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat | grep Tcp > /tmp/${listener_ns}.out - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat | grep Tcp > /tmp/${connector_ns}.out + mptcp_lib_nstat_get "${listener_ns}" + mptcp_lib_nstat_get "${connector_ns}" - if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then fail_test "client exit code $retc, server $rets" - mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \ - "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" return 1 fi - if [ "$test_linkfail" -gt 1 ];then - check_transfer $sinfail $cout "file received by client" $trunc_size - else - check_transfer $sin $cout "file received by client" $trunc_size - fi + check_transfer $listener_in $cout "file received by client" $trunc_size retc=$? - if [ "$test_linkfail" -eq 0 ];then - check_transfer $cin $sout "file received by server" $trunc_size - else - check_transfer $cinsent $sout "file received by server" $trunc_size - fi + check_transfer $connector_in $sout "file received by server" $trunc_size rets=$? [ $retc -eq 0 ] && [ $rets -eq 0 ] @@ -1123,12 +1165,20 @@ run_tests() do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} } +_dump_stats() +{ + local ns="${1}" + local side="${2}" + + mptcp_lib_print_err "${side} ns stats (${ns2})" + mptcp_lib_pr_nstat "${ns}" + echo +} + dump_stats() { - echo Server ns stats - ip netns exec $ns1 nstat -as | grep Tcp - echo Client ns stats - ip netns exec $ns2 nstat -as | grep Tcp + _dump_stats "${ns1}" "Server" + _dump_stats "${ns2}" "Client" } chk_csum_nr() @@ -1370,7 +1420,7 @@ chk_join_tx_nr() count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxCreatSkErr") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "$create" ]; then rc=${KSFT_FAIL} print_check "syn tx create socket error" @@ -1379,7 +1429,7 @@ chk_join_tx_nr() count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxBindErr") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "$bind" ]; then rc=${KSFT_FAIL} print_check "syn tx bind error" @@ -1388,7 +1438,7 @@ chk_join_tx_nr() count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxConnectErr") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "$connect" ]; then rc=${KSFT_FAIL} print_check "syn tx connect error" @@ -1398,11 +1448,121 @@ chk_join_tx_nr() print_results "join Tx" ${rc} } +chk_fallback_nr() +{ + local infinite_map_tx=${fb_infinite_map_tx:-0} + local dss_corruption=${fb_dss_corruption:-0} + local simult_conn=${fb_simult_conn:-0} + local mpc_passive=${fb_mpc_passive:-0} + local mpc_active=${fb_mpc_active:-0} + local mpc_data=${fb_mpc_data:-0} + local md5_sig=${fb_md5_sig:-0} + local dss=${fb_dss:-0} + local rc=${KSFT_PASS} + local ns=$1 + local count + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtInfiniteMapTx") + if [ -z "$count" ]; then + : # ignore skip + elif [ "$count" != "$infinite_map_tx" ]; then + rc=${KSFT_FAIL} + print_check "$ns infinite map tx fallback" + fail_test "got $count infinite map tx fallback[s] in $ns expected $infinite_map_tx" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDSSCorruptionFallback") + if [ -z "$count" ]; then + : # ignore skip + elif [ "$count" != "$dss_corruption" ]; then + rc=${KSFT_FAIL} + print_check "$ns dss corruption fallback" + fail_test "got $count dss corruption fallback[s] in $ns expected $dss_corruption" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtSimultConnectFallback") + if [ -z "$count" ]; then + : # ignore skip + elif [ "$count" != "$simult_conn" ]; then + rc=${KSFT_FAIL} + print_check "$ns simult conn fallback" + fail_test "got $count simult conn fallback[s] in $ns expected $simult_conn" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackACK") + if [ -z "$count" ]; then + : # ignore skip + elif [ "$count" != "$mpc_passive" ]; then + rc=${KSFT_FAIL} + print_check "$ns mpc passive fallback" + fail_test "got $count mpc passive fallback[s] in $ns expected $mpc_passive" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackSYNACK") + if [ -z "$count" ]; then + : # ignore skip + elif [ "$count" != "$mpc_active" ]; then + rc=${KSFT_FAIL} + print_check "$ns mpc active fallback" + fail_test "got $count mpc active fallback[s] in $ns expected $mpc_active" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableDataFallback") + if [ -z "$count" ]; then + : # ignore skip + elif [ "$count" != "$mpc_data" ]; then + rc=${KSFT_FAIL} + print_check "$ns mpc data fallback" + fail_test "got $count mpc data fallback[s] in $ns expected $mpc_data" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMD5SigFallback") + if [ -z "$count" ]; then + : # ignore skip + elif [ "$count" != "$md5_sig" ]; then + rc=${KSFT_FAIL} + print_check "$ns MD5 Sig fallback" + fail_test "got $count MD5 Sig fallback[s] in $ns expected $md5_sig" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDssFallback") + if [ -z "$count" ]; then + : # ignore skip + elif [ "$count" != "$dss" ]; then + rc=${KSFT_FAIL} + print_check "$ns dss fallback" + fail_test "got $count dss fallback[s] in $ns expected $dss" + fi + + return $rc +} + +chk_fallback_nr_all() +{ + local netns=("ns1" "ns2") + local fb_ns=("fb_ns1" "fb_ns2") + local rc=${KSFT_PASS} + + for i in 0 1; do + if [ -n "${!fb_ns[i]}" ]; then + eval "${!fb_ns[i]}" \ + chk_fallback_nr ${netns[i]} || rc=${?} + else + chk_fallback_nr ${netns[i]} || rc=${?} + fi + done + + if [ "${rc}" != "${KSFT_PASS}" ]; then + print_results "fallback" ${rc} + fi +} + chk_join_nr() { local syn_nr=$1 local syn_ack_nr=$2 local ack_nr=$3 + local syn_rej=${join_syn_rej:-0} local csum_ns1=${join_csum_ns1:-0} local csum_ns2=${join_csum_ns2:-0} local fail_nr=${join_fail_nr:-0} @@ -1441,20 +1601,49 @@ chk_join_nr() fi fi + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckHMacFailure") + if [ -z "$count" ]; then + : # ignore skip + elif [ "$count" != "0" ]; then + rc=${KSFT_FAIL} + print_check "synack HMAC" + fail_test "got $count JOIN[s] synack HMAC failure expected 0" + fi + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "$ack_nr" ]; then rc=${KSFT_FAIL} print_check "ack rx" fail_test "got $count JOIN[s] ack rx expected $ack_nr" fi + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckHMacFailure") + if [ -z "$count" ]; then + : # ignore skip + elif [ "$count" != "0" ]; then + rc=${KSFT_FAIL} + print_check "ack HMAC" + fail_test "got $count JOIN[s] ack HMAC failure expected 0" + fi + + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinRejected") + if [ -z "$count" ]; then + : # ignore skip + elif [ "$count" != "$syn_rej" ]; then + rc=${KSFT_FAIL} + print_check "syn rejected" + fail_test "got $count JOIN[s] syn rejected expected $syn_rej" + fi + print_results "join Rx" ${rc} join_syn_tx="${join_syn_tx:-${syn_nr}}" \ chk_join_tx_nr + chk_fallback_nr_all + if $validate_checksum; then chk_csum_nr $csum_ns1 $csum_ns2 chk_fail_nr $fail_nr $fail_nr @@ -1474,7 +1663,6 @@ chk_stale_nr() local stale_min=$2 local stale_max=$3 local stale_delta=$4 - local dump_stats local stale_nr local recover_nr @@ -1490,16 +1678,11 @@ chk_stale_nr() fail_test "got $stale_nr stale[s] $recover_nr recover[s], " \ " expected stale in range [$stale_min..$stale_max]," \ " stale-recover delta $stale_delta" - dump_stats=1 + echo $ns stats + ip -n $ns -s link show else print_ok fi - - if [ "${dump_stats}" = 1 ]; then - echo $ns stats - ip netns exec $ns ip -s link show - ip netns exec $ns nstat -as | grep MPTcp - fi } chk_add_nr() @@ -1518,7 +1701,6 @@ chk_add_nr() local tx="" local rx="" local count - local timeout if [[ $ns_invert = "invert" ]]; then ns_tx=$ns2 @@ -1527,15 +1709,13 @@ chk_add_nr() rx=" server" fi - timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout) - print_check "add addr rx${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr") if [ -z "$count" ]; then print_skip - # if the test configured a short timeout tolerate greater then expected - # add addrs options, due to retransmissions - elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then + # Tolerate more ADD_ADDR then expected (if any), due to retransmissions + elif [ "$count" != "$add_nr" ] && + { [ "$add_nr" -eq 0 ] || [ "$count" -lt "$add_nr" ]; }; then fail_test "got $count ADD_ADDR[s] expected $add_nr" else print_ok @@ -1623,18 +1803,15 @@ chk_add_tx_nr() { local add_tx_nr=$1 local echo_tx_nr=$2 - local timeout local count - timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) - print_check "add addr tx" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx") if [ -z "$count" ]; then print_skip - # if the test configured a short timeout tolerate greater then expected - # add addrs options, due to retransmissions - elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then + # Tolerate more ADD_ADDR then expected (if any), due to retransmissions + elif [ "$count" != "$add_tx_nr" ] && + { [ "$add_tx_nr" -eq 0 ] || [ "$count" -lt "$add_tx_nr" ]; }; then fail_test "got $count ADD_ADDR[s] TX, expected $add_tx_nr" else print_ok @@ -1945,7 +2122,8 @@ subflows_tests() pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 0 + join_syn_rej=1 \ + chk_join_nr 1 1 0 fi # subflow @@ -1974,7 +2152,8 @@ subflows_tests() pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 2 2 1 + join_syn_rej=1 \ + chk_join_nr 2 2 1 fi # single subflow, dev @@ -2120,7 +2299,8 @@ signal_address_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.4.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 chk_add_nr 3 3 fi @@ -2132,7 +2312,8 @@ signal_address_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.14.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 join_syn_tx=3 \ chk_join_nr 1 1 1 chk_add_nr 3 3 @@ -2155,21 +2336,88 @@ signal_address_tests() ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 # It is not directly linked to the commit introducing this # symbol but for the parent one which is linked anyway. - if ! mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then - chk_join_nr 3 3 2 - chk_add_nr 4 4 - else - chk_join_nr 3 3 3 + if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then # the server will not signal the address terminating # the MPC subflow chk_add_nr 3 3 + else + chk_add_nr 4 4 fi fi } +laminar_endp_tests() +{ + # no laminar endpoints: routing rules are used + if reset_with_tcp_filter "without a laminar endpoint" ns1 10.0.2.2 REJECT && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + join_syn_tx=1 \ + chk_join_nr 0 0 0 + chk_add_nr 1 1 + fi + + # laminar endpoints: this endpoint is used + if reset_with_tcp_filter "with a laminar endpoint" ns1 10.0.2.2 REJECT && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # laminar endpoints: these endpoints are used + if reset_with_tcp_filter "with multiple laminar endpoints" ns1 10.0.2.2 REJECT && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns2 dead:beef:3::2 flags laminar + pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar + pm_nl_add_endpoint $ns2 10.0.4.2 flags laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 2 2 + fi + + # laminar endpoints: only one endpoint is used + if reset_with_tcp_filter "single laminar endpoint" ns1 10.0.2.2 REJECT && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 2 2 + fi + + # laminar endpoints: subflow and laminar flags + if reset_with_tcp_filter "sublow + laminar endpoints" ns1 10.0.2.2 REJECT && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 4 + pm_nl_set_limits $ns2 2 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,laminar + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi +} + link_failure_tests() { # accept and use add_addr with additional subflows and link loss @@ -2314,7 +2562,7 @@ remove_tests() if reset "remove single subflow"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup addr_nr_ns2=-1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 @@ -2327,8 +2575,8 @@ remove_tests() if reset "remove multiple subflows"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 - pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup addr_nr_ns2=-2 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 @@ -2339,7 +2587,7 @@ remove_tests() # single address, remove if reset "remove single address"; then pm_nl_set_limits $ns1 0 1 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 1 addr_nr_ns1=-1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2352,9 +2600,9 @@ remove_tests() # subflow and signal, remove if reset "remove subflow and signal"; then pm_nl_set_limits $ns1 0 2 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 2 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 @@ -2366,10 +2614,10 @@ remove_tests() # subflows and signal, remove if reset "remove subflows and signal"; then pm_nl_set_limits $ns1 0 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 3 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup addr_nr_ns1=-1 addr_nr_ns2=-2 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -2378,12 +2626,25 @@ remove_tests() chk_rst_nr 0 0 fi + # signal+subflow with limits, remove + if reset "remove signal+subflow with limits"; then + pm_nl_set_limits $ns1 0 0 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,subflow + pm_nl_set_limits $ns2 0 0 + addr_nr_ns1=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_add_nr 1 1 + chk_rm_nr 1 0 invert + chk_rst_nr 0 0 + fi + # addresses remove if reset "remove addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 @@ -2396,10 +2657,10 @@ remove_tests() # invalid addresses remove if reset "remove invalid addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup # broadcast IP: no packet for this address will be received on ns1 - pm_nl_add_endpoint $ns1 224.0.0.1 flags signal - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 224.0.0.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup pm_nl_set_limits $ns2 2 2 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 @@ -2413,10 +2674,10 @@ remove_tests() # subflows and signal, flush if reset "flush subflows and signal"; then pm_nl_set_limits $ns1 0 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 3 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -2429,9 +2690,9 @@ remove_tests() if reset "flush subflows"; then pm_nl_set_limits $ns1 3 3 pm_nl_set_limits $ns2 3 3 - pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup id 150 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -2448,9 +2709,9 @@ remove_tests() # addresses flush if reset "flush addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2463,9 +2724,9 @@ remove_tests() # invalid addresses flush if reset "flush invalid addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.12.1 flags signal - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.14.1 flags signal + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.14.1 flags signal,backup pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2734,7 +2995,11 @@ mixed_tests() pm_nl_add_endpoint $ns1 10.0.1.1 flags signal speed=slow \ run_tests $ns1 $ns2 dead:beef:2::1 - chk_join_nr 1 1 1 + if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_fullmesh_max$"; then + chk_join_nr 0 0 0 + else + chk_join_nr 1 1 1 + fi fi # fullmesh still tries to create all the possibly subflows with @@ -3015,6 +3280,133 @@ add_addr_ports_tests() fi } +bind_tests() +{ + # bind to one address should not allow extra subflows to other addresses + if reset "bind main address v4, no join v4"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + bind_addr="10.0.1.1" \ + run_tests $ns1 $ns2 10.0.1.1 + join_syn_tx=1 \ + chk_join_nr 0 0 0 + chk_add_nr 1 1 + fi + + # bind to one address should not allow extra subflows to other addresses + if reset "bind main address v6, no join v6"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + bind_addr="dead:beef:1::1" \ + run_tests $ns1 $ns2 dead:beef:1::1 + join_syn_tx=1 \ + chk_join_nr 0 0 0 + chk_add_nr 1 1 + fi + + # multiple binds to allow extra subflows to other addresses + if reset "multiple bind to allow joins v4"; then + local extra_bind + + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + + # Launching another app listening on a different address + # Note: it could be a totally different app, e.g. nc, socat, ... + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ + -s MPTCP 10.0.2.1 & + extra_bind=$! + + bind_addr="10.0.1.1" \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + + kill ${extra_bind} + fi + + # multiple binds to allow extra subflows to other addresses + if reset "multiple bind to allow joins v6"; then + local extra_bind + + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + + # Launching another app listening on a different address + # Note: it could be a totally different app, e.g. nc, socat, ... + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ + -s MPTCP dead:beef:2::1 & + extra_bind=$! + + bind_addr="dead:beef:1::1" \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + + kill ${extra_bind} + fi + + # multiple binds to allow extra subflows to other addresses: v6 LL case + if reset "multiple bind to allow joins v6 link-local routing"; then + local extra_bind ns1ll1 ns1ll2 + + ns1ll1="$(get_ll_addr $ns1 ns1eth1)" + ns1ll2="$(get_ll_addr $ns1 ns1eth2)" + + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal + + wait_ll_ready $ns1 # to be able to bind + wait_ll_ready $ns2 # also needed to bind on the client side + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ + -s MPTCP "${ns1ll2}%ns1eth2" & + extra_bind=$! + + bind_addr="${ns1ll1}%ns1eth1" \ + run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1" + # it is not possible to connect to the announced LL addr without + # specifying the outgoing interface. + join_connect_err=1 \ + chk_join_nr 0 0 0 + chk_add_nr 1 1 + + kill ${extra_bind} + fi + + # multiple binds to allow extra subflows to v6 LL addresses: laminar + if reset "multiple bind to allow joins v6 link-local laminar" && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + local extra_bind ns1ll1 ns1ll2 ns2ll2 + + ns1ll1="$(get_ll_addr $ns1 ns1eth1)" + ns1ll2="$(get_ll_addr $ns1 ns1eth2)" + ns2ll2="$(get_ll_addr $ns2 ns2eth2)" + + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal + pm_nl_add_endpoint $ns2 "${ns2ll2}" flags laminar dev ns2eth2 + + wait_ll_ready $ns1 # to be able to bind + wait_ll_ready $ns2 # also needed to bind on the client side + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ + -s MPTCP "${ns1ll2}%ns1eth2" & + extra_bind=$! + + bind_addr="${ns1ll1}%ns1eth1" \ + run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1" + chk_join_nr 1 1 1 + chk_add_nr 1 1 + + kill ${extra_bind} + fi +} + syncookies_tests() { # single subflow, syncookies @@ -3043,7 +3435,8 @@ syncookies_tests() pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 2 1 1 + join_syn_rej=1 \ + chk_join_nr 2 1 1 fi # test signal address with cookies @@ -3155,6 +3548,17 @@ deny_join_id0_tests() run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 fi + + # default limits, server deny join id 0 + signal + if reset_with_allow_join_id0 "default limits, server deny join id 0" 0 1; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + fi } fullmesh_tests() @@ -3270,7 +3674,6 @@ fullmesh_tests() fastclose_tests() { if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then - MPTCP_LIB_SUBTEST_FLAKY=1 test_linkfail=1024 fastclose=client \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 @@ -3279,7 +3682,6 @@ fastclose_tests() fi if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then - MPTCP_LIB_SUBTEST_FLAKY=1 test_linkfail=1024 fastclose=server \ run_tests $ns1 $ns2 10.0.1.1 join_rst_nr=1 \ @@ -3305,6 +3707,7 @@ fail_tests() join_csum_ns1=+1 join_csum_ns2=+0 \ join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \ join_corrupted_pkts="$(pedit_action_pkts)" \ + fb_ns1="fb_dss=1" fb_ns2="fb_infinite_map_tx=1" \ chk_join_nr 0 0 0 chk_fail_nr 1 -1 invert fi @@ -3335,7 +3738,6 @@ userspace_pm_add_addr() tk=$(mptcp_lib_evts_get_info token "$evts") ip netns exec $1 ./pm_nl_ctl ann $2 token $tk id $3 - sleep 1 } # $1: ns ; $2: id @@ -3366,7 +3768,6 @@ userspace_pm_add_sf() ip netns exec $1 ./pm_nl_ctl csf lip $2 lid $3 \ rip $da rport $dp token $tk - sleep 1 } # $1: ns ; $2: addr $3: event type @@ -3463,21 +3864,28 @@ userspace_pm_chk_get_addr() fi } -# $1: ns ; $2: event type ; $3: count +# $1: ns ; $2: event type ; $3: count ; [ $4: attr ; $5: attr count ] chk_evt_nr() { local ns=${1} local evt_name="${2}" local exp="${3}" + local attr="${4}" + local attr_exp="${5}" local evts="${evts_ns1}" local evt="${!evt_name}" + local attr_name local count + if [ -n "${attr}" ]; then + attr_name=", ${attr}: ${attr_exp}" + fi + evt_name="${evt_name:16}" # without MPTCP_LIB_EVENT_ [ "${ns}" == "ns2" ] && evts="${evts_ns2}" - print_check "event ${ns} ${evt_name} (${exp})" + print_check "event ${ns} ${evt_name} (${exp}${attr_name})" if [[ "${evt_name}" = "LISTENER_"* ]] && ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then @@ -3488,11 +3896,42 @@ chk_evt_nr() count=$(grep -cw "type:${evt}" "${evts}") if [ "${count}" != "${exp}" ]; then fail_test "got ${count} events, expected ${exp}" + cat "${evts}" + return + elif [ -z "${attr}" ]; then + print_ok + return + fi + + count=$(grep -w "type:${evt}" "${evts}" | grep -c ",${attr}:") + if [ "${count}" != "${attr_exp}" ]; then + fail_test "got ${count} event attributes, expected ${attr_exp}" + grep -w "type:${evt}" "${evts}" else print_ok fi } +# $1: ns ; $2: event type ; $3: expected count +wait_event() +{ + local ns="${1}" + local evt_name="${2}" + local exp="${3}" + + local evt="${!evt_name}" + local evts="${evts_ns1}" + local count + + [ "${ns}" == "ns2" ] && evts="${evts_ns2}" + + for _ in $(seq 100); do + count=$(grep -cw "type:${evt}" "${evts}") + [ "${count}" -ge "${exp}" ] && break + sleep 0.1 + done +} + userspace_tests() { # userspace pm type prevents add_addr @@ -3527,7 +3966,8 @@ userspace_tests() pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 0 + join_syn_rej=1 \ + chk_join_nr 1 1 0 fi # userspace pm type does not send join @@ -3550,7 +3990,8 @@ userspace_tests() pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 0 + join_syn_rej=1 \ + chk_join_nr 1 1 0 chk_prio_nr 0 0 0 0 fi @@ -3573,12 +4014,14 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 2 2 - { speed=5 \ + { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! - wait_mpj $ns1 + wait_event ns1 MPTCP_LIB_EVENT_ESTABLISHED 1 userspace_pm_add_addr $ns1 10.0.2.1 10 + wait_event ns2 MPTCP_LIB_EVENT_ANNOUNCED 1 userspace_pm_add_addr $ns1 10.0.3.1 20 + wait_event ns2 MPTCP_LIB_EVENT_ANNOUNCED 2 chk_join_nr 2 2 2 chk_add_nr 2 2 chk_mptcp_info subflows 2 subflows 2 @@ -3598,7 +4041,7 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm create destroy subflow @@ -3606,11 +4049,12 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - { speed=5 \ + { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! - wait_mpj $ns2 + wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 userspace_pm_add_sf $ns2 10.0.3.2 20 + wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1 chk_join_nr 1 1 1 chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 @@ -3626,7 +4070,7 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm create id 0 subflow @@ -3634,20 +4078,21 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - { speed=5 \ + { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! - wait_mpj $ns2 + wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 userspace_pm_add_sf $ns2 10.0.3.2 0 + wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1 userspace_pm_chk_dump_addr "${ns2}" \ "id 0 flags subflow 10.0.3.2" "id 0 subflow" chk_join_nr 1 1 1 chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm remove initial subflow @@ -3655,11 +4100,12 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - { speed=5 \ + { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! - wait_mpj $ns2 + wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 userspace_pm_add_sf $ns2 10.0.3.2 20 + wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1 chk_join_nr 1 1 1 chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 @@ -3671,7 +4117,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm send RM_ADDR for ID 0 @@ -3679,11 +4125,12 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 1 1 - { speed=5 \ + { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! - wait_mpj $ns1 + wait_event ns1 MPTCP_LIB_EVENT_ESTABLISHED 1 userspace_pm_add_addr $ns1 10.0.2.1 10 + wait_event ns2 MPTCP_LIB_EVENT_ANNOUNCED 1 chk_join_nr 1 1 1 chk_add_nr 1 1 chk_mptcp_info subflows 1 subflows 1 @@ -3697,7 +4144,38 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid + fi + + # userspace pm no duplicated spurious close events after an error + if reset_with_events "userspace pm no dup close events after error" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 2 + { timeout_test=120 test_linkfail=128 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null + local tests_pid=$! + wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 + userspace_pm_add_sf $ns2 10.0.3.2 20 + wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + + # force quick loss + ip netns exec $ns2 sysctl -q net.ipv4.tcp_syn_retries=1 + if ip netns exec "${ns1}" ${iptables} -A INPUT -s "10.0.1.2" \ + -p tcp --tcp-option 30 -j REJECT --reject-with tcp-reset && + ip netns exec "${ns2}" ${iptables} -A INPUT -d "10.0.1.2" \ + -p tcp --tcp-option 30 -j REJECT --reject-with tcp-reset; then + wait_event ns2 MPTCP_LIB_EVENT_SUB_CLOSED 1 + wait_event ns1 MPTCP_LIB_EVENT_SUB_CLOSED 1 + chk_subflows_total 1 1 + userspace_pm_add_sf $ns2 10.0.1.2 0 + wait_event ns2 MPTCP_LIB_EVENT_SUB_CLOSED 2 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 2 error 2 + fi + kill_events_pids + mptcp_lib_kill_group_wait $tests_pid fi } @@ -3705,16 +4183,16 @@ endpoint_tests() { # subflow_rebuild_header is needed to support the implicit flag # userspace pm type prevents add_addr - if reset "implicit EP" && - mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + if reset_with_events "implicit EP" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - { speed=slow \ + { timeout_test=120 test_linkfail=128 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! - wait_mpj $ns1 + wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 pm_nl_check_endpoint "creation" \ $ns2 10.0.2.2 id 1 flags implicit chk_mptcp_info subflows 1 subflows 1 @@ -3727,21 +4205,22 @@ endpoint_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags signal pm_nl_check_endpoint "modif is allowed" \ $ns2 10.0.2.2 id 1 flags signal - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid + kill_events_pids fi if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && - mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then start_events pm_nl_set_limits $ns1 0 3 pm_nl_set_limits $ns2 0 3 pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - { test_linkfail=4 speed=5 \ + { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! - wait_mpj $ns2 + wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 pm_nl_check_endpoint "creation" \ $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2 chk_subflow_nr "before delete id 2" 2 @@ -3753,7 +4232,7 @@ endpoint_tests() chk_mptcp_info subflows 0 subflows 0 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - wait_mpj $ns2 + wait_mpj 2 chk_subflow_nr "after re-add id 2" 2 chk_mptcp_info subflows 1 subflows 1 @@ -3765,10 +4244,18 @@ endpoint_tests() ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT pm_nl_del_endpoint $ns2 3 10.0.3.2 pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow - wait_mpj $ns2 + wait_mpj 3 chk_subflow_nr "after no reject" 3 chk_mptcp_info subflows 2 subflows 2 + # To make sure RM_ADDR are sent over a different subflow, but + # allow the rest to quickly and cleanly close the subflow + local ipt=1 + ip netns exec "${ns2}" ${iptables} -I OUTPUT -s "10.0.1.2" \ + -p tcp -m tcp --tcp-option 30 \ + -m bpf --bytecode \ + "$CBPF_MPTCP_SUBOPTION_RM_ADDR" \ + -j DROP || ipt=0 local i for i in $(seq 3); do pm_nl_del_endpoint $ns2 1 10.0.1.2 @@ -3777,12 +4264,13 @@ endpoint_tests() chk_mptcp_info subflows 2 subflows 2 # only decr for additional sf pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow - wait_mpj $ns2 + wait_mpj $((3 + i)) chk_subflow_nr "after re-add id 0 ($i)" 3 chk_mptcp_info subflows 3 subflows 3 done + [ ${ipt} = 1 ] && ip netns exec "${ns2}" ${iptables} -D OUTPUT 1 - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid kill_events_pids chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 @@ -3807,55 +4295,72 @@ endpoint_tests() # remove and re-add if reset_with_events "delete re-add signal" && - mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=0 pm_nl_set_limits $ns1 0 3 pm_nl_set_limits $ns2 3 3 pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal # broadcast IP: no packet for this address will be received on ns1 pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal - { test_linkfail=4 speed=5 \ + { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! - wait_mpj $ns2 + wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 pm_nl_check_endpoint "creation" \ $ns1 10.0.2.1 id 1 flags signal chk_subflow_nr "before delete" 2 chk_mptcp_info subflows 1 subflows 1 + chk_mptcp_info add_addr_signal 2 add_addr_accepted 1 pm_nl_del_endpoint $ns1 1 10.0.2.1 pm_nl_del_endpoint $ns1 2 224.0.0.1 sleep 0.5 chk_subflow_nr "after delete" 1 chk_mptcp_info subflows 0 subflows 0 + chk_mptcp_info add_addr_signal 0 add_addr_accepted 0 pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal - wait_mpj $ns2 + wait_mpj 3 chk_subflow_nr "after re-add" 3 chk_mptcp_info subflows 2 subflows 2 + chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 + # To make sure RM_ADDR are sent over a different subflow, but + # allow the rest to quickly and cleanly close the subflow + local ipt=1 + ip netns exec "${ns1}" ${iptables} -I OUTPUT -s "10.0.1.1" \ + -p tcp -m tcp --tcp-option 30 \ + -m bpf --bytecode \ + "$CBPF_MPTCP_SUBOPTION_RM_ADDR" \ + -j DROP || ipt=0 pm_nl_del_endpoint $ns1 42 10.0.1.1 sleep 0.5 chk_subflow_nr "after delete ID 0" 2 chk_mptcp_info subflows 2 subflows 2 + chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 + [ ${ipt} = 1 ] && ip netns exec "${ns1}" ${iptables} -D OUTPUT 1 - pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal - wait_mpj $ns2 + pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal + wait_mpj 4 chk_subflow_nr "after re-add ID 0" 3 chk_mptcp_info subflows 3 subflows 3 + chk_mptcp_info add_addr_signal 3 add_addr_accepted 2 - pm_nl_del_endpoint $ns1 99 10.0.1.1 + pm_nl_del_endpoint $ns1 42 10.0.1.1 sleep 0.5 chk_subflow_nr "after re-delete ID 0" 2 chk_mptcp_info subflows 2 subflows 2 + chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal - wait_mpj $ns2 + wait_mpj 5 chk_subflow_nr "after re-re-add ID 0" 3 chk_mptcp_info subflows 3 subflows 3 - mptcp_lib_kill_wait $tests_pid + chk_mptcp_info add_addr_signal 3 add_addr_accepted 2 + mptcp_lib_kill_group_wait $tests_pid kill_events_pids chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 @@ -3881,13 +4386,13 @@ endpoint_tests() # flush and re-add if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT && - mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 1 2 # broadcast IP: no packet for this address will be received on ns1 pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow - { test_linkfail=4 speed=20 \ + { timeout_test=120 test_linkfail=128 speed=20 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! @@ -3900,10 +4405,10 @@ endpoint_tests() wait_rm_addr $ns2 0 ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow - wait_mpj $ns2 + wait_mpj 1 pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal - wait_mpj $ns2 - mptcp_lib_kill_wait $tests_pid + wait_mpj 2 + mptcp_lib_kill_group_wait $tests_pid join_syn_tx=3 join_connect_err=1 \ chk_join_nr 2 2 2 @@ -3943,6 +4448,7 @@ all_tests_sorted=( f@subflows_tests e@subflows_error_tests s@signal_address_tests + L@laminar_endp_tests l@link_failure_tests t@add_addr_timeout_tests r@remove_tests @@ -3952,6 +4458,7 @@ all_tests_sorted=( M@mixed_tests b@backup_tests p@add_addr_ports_tests + B@bind_tests k@syncookies_tests S@checksum_tests d@deny_join_id0_tests diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 051e289d7967..5fea7e7df628 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -2,7 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 . "$(dirname "${0}")/../lib.sh" -. "$(dirname "${0}")/../net_helper.sh" readonly KSFT_PASS=0 readonly KSFT_FAIL=1 @@ -107,23 +106,32 @@ mptcp_lib_pr_info() { mptcp_lib_print_info "INFO: ${*}" } -# $1-2: listener/connector ns ; $3 port ; $4-5 listener/connector stat file +mptcp_lib_pr_nstat() { + local ns="${1}" + local hist="/tmp/${ns}.out" + + if [ -f "${hist}" ]; then + awk '$2 != 0 { print " "$0 }' "${hist}" + else + ip netns exec "${ns}" nstat -as | grep Tcp + fi +} + +# $1-2: listener/connector ns ; $3 port mptcp_lib_pr_err_stats() { local lns="${1}" local cns="${2}" local port="${3}" - local lstat="${4}" - local cstat="${5}" echo -en "${MPTCP_LIB_COLOR_RED}" { printf "\nnetns %s (listener) socket stat for %d:\n" "${lns}" "${port}" ip netns exec "${lns}" ss -Menitam -o "sport = :${port}" - cat "${lstat}" + mptcp_lib_pr_nstat "${lns}" printf "\nnetns %s (connector) socket stat for %d:\n" "${cns}" "${port}" ip netns exec "${cns}" ss -Menitam -o "dport = :${port}" - [ "${lstat}" != "${cstat}" ] && cat "${cstat}" + [ "${lns}" != "${cns}" ] && mptcp_lib_pr_nstat "${cns}" } 1>&2 echo -en "${MPTCP_LIB_COLOR_RESET}" } @@ -331,12 +339,28 @@ mptcp_lib_result_print_all_tap() { # get the value of keyword $1 in the line marked by keyword $2 mptcp_lib_get_info_value() { - grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q' + grep "${2}" 2>/dev/null | + sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q' + # the ';q' at the end limits to the first matched entry. } # $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]] mptcp_lib_evts_get_info() { - grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1}," + grep "${4:-}" "${2}" 2>/dev/null | + mptcp_lib_get_info_value "${1}" "^type:${3:-1}," +} + +mptcp_lib_wait_timeout() { + local timeout_test="${1}" + local listener_ns="${2}" + local connector_ns="${3}" + local port="${4}" + shift 4 # rest are PIDs + + sleep "${timeout_test}" + mptcp_lib_print_err "timeout" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" + kill "${@}" 2>/dev/null } # $1: PID @@ -348,19 +372,62 @@ mptcp_lib_kill_wait() { wait "${1}" 2>/dev/null } +# $1: PID +mptcp_lib_pid_list_children() { + local curr="${1}" + # evoke 'ps' only once + local pids="${2:-"$(ps o pid,ppid)"}" + + echo "${curr}" + + local pid + for pid in $(echo "${pids}" | awk "\$2 == ${curr} { print \$1 }"); do + mptcp_lib_pid_list_children "${pid}" "${pids}" + done +} + +# $1: PID +mptcp_lib_kill_group_wait() { + # Some users might not have procps-ng: cannot use "kill -- -PID" + mptcp_lib_pid_list_children "${1}" | xargs -r kill &>/dev/null + wait "${1}" 2>/dev/null +} + # $1: IP address mptcp_lib_is_v6() { [ -z "${1##*:*}" ] } +mptcp_lib_nstat_init() { + local ns="${1}" + + rm -f "/tmp/${ns}."{nstat,out} + NSTAT_HISTORY="/tmp/${ns}.nstat" ip netns exec "${ns}" nstat -n +} + +mptcp_lib_nstat_get() { + local ns="${1}" + + # filter out non-*TCP stats, and the rate (last column) + NSTAT_HISTORY="/tmp/${ns}.nstat" ip netns exec "${ns}" nstat -sz | + grep -o ".*Tcp\S\+\s\+[0-9]\+" > "/tmp/${ns}.out" +} + # $1: ns, $2: MIB counter +# Get the counter from the history (mptcp_lib_nstat_{init,get}()) if available. +# If not, get the counter from nstat ignoring any history. mptcp_lib_get_counter() { local ns="${1}" local counter="${2}" + local hist="/tmp/${ns}.out" local count - count=$(ip netns exec "${ns}" nstat -asz "${counter}" | - awk 'NR==1 {next} {print $2}') + if [[ -s "${hist}" && "${counter}" == *"Tcp"* ]]; then + count=$(awk "/^${counter} / {print \$2; exit}" "${hist}") + else + count=$(ip netns exec "${ns}" nstat -asz "${counter}" | + awk 'NR==1 {next} {print $2}') + fi if [ -z "${count}" ]; then mptcp_lib_fail_if_expected_feature "${counter} counter" return 1 @@ -382,7 +449,7 @@ mptcp_lib_make_file() { mptcp_lib_print_file_err() { ls -l "${1}" 1>&2 echo "Trailing bytes are: " - tail -c 27 "${1}" + tail -c 32 "${1}" | od -x | head -n2 } # $1: input file ; $2: output file ; $3: what kind of file @@ -476,8 +543,6 @@ mptcp_lib_ns_init() { local netns for netns in "${@}"; do ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1 - ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0 - ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0 done } diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index 926b0be87c99..b6e58d936ebe 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -25,6 +25,7 @@ #include <netinet/in.h> #include <linux/tcp.h> +#include <linux/compiler.h> static int pf = AF_INET; @@ -127,7 +128,7 @@ struct so_state { #define MIN(a, b) ((a) < (b) ? (a) : (b)) #endif -static void die_perror(const char *msg) +static void __noreturn die_perror(const char *msg) { perror(msg); exit(1); @@ -139,7 +140,7 @@ static void die_usage(int r) exit(r); } -static void xerror(const char *fmt, ...) +static void __noreturn xerror(const char *fmt, ...) { va_list ap; @@ -159,13 +160,22 @@ static const char *getxinfo_strerr(int err) } static void xgetaddrinfo(const char *node, const char *service, - const struct addrinfo *hints, + struct addrinfo *hints, struct addrinfo **res) { - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { - const char *errstr = getxinfo_strerr(err); + const char *errstr; + + if (err == EAI_SOCKTYPE) { + hints->ai_protocol = IPPROTO_TCP; + goto again; + } + + errstr = getxinfo_strerr(err); fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", node ? node : "", service ? service : "", errstr); @@ -178,7 +188,7 @@ static int sock_listen_mptcp(const char * const listenaddr, { int sock = -1; struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, .ai_flags = AI_PASSIVE | AI_NUMERICHOST }; @@ -223,7 +233,7 @@ static int sock_connect_mptcp(const char * const remoteaddr, const char * const port, int proto) { struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; @@ -658,22 +668,26 @@ static void process_one_client(int fd, int pipefd) do_getsockopts(&s, fd, ret, ret2); if (s.mptcpi_rcv_delta != (uint64_t)ret + 1) - xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret); + xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64 ", diff %" PRId64, + s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - (ret + 1)); /* be nice when running on top of older kernel */ if (s.pkt_stats_avail) { if (s.last_sample.mptcpi_bytes_sent != ret2) - xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64, + xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64 + ", diff %" PRId64, s.last_sample.mptcpi_bytes_sent, ret2, s.last_sample.mptcpi_bytes_sent - ret2); if (s.last_sample.mptcpi_bytes_received != ret) - xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64, + xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64 + ", diff %" PRId64, s.last_sample.mptcpi_bytes_received, ret, s.last_sample.mptcpi_bytes_received - ret); if (s.last_sample.mptcpi_bytes_acked != ret) - xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64, - s.last_sample.mptcpi_bytes_acked, ret2, - s.last_sample.mptcpi_bytes_acked - ret2); + xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64 + ", diff %" PRId64, + s.last_sample.mptcpi_bytes_acked, ret, + s.last_sample.mptcpi_bytes_acked - ret); } close(fd); @@ -713,6 +727,7 @@ static int server(int pipefd) process_one_client(r, pipefd); + close(fd); return 0; } @@ -838,8 +853,12 @@ int main(int argc, char *argv[]) die_perror("pipe"); s = xfork(); - if (s == 0) - return server(pipefds[1]); + if (s == 0) { + close(pipefds[0]); + ret = server(pipefds[1]); + close(pipefds[1]); + return ret; + } close(pipefds[1]); diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 418a903c3a4d..ab8bce06b262 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -95,7 +95,7 @@ init() } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}" @@ -169,41 +169,44 @@ do_transfer() cmsg+=",TCPINQ" fi - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat -n - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat -n + mptcp_lib_nstat_init "${listener_ns}" + mptcp_lib_nstat_init "${connector_ns}" - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \ - ${local_addr} < "$sin" > "$sout" & + ip netns exec ${listener_ns} \ + $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \ + ${local_addr} < "$sin" > "$sout" & local spid=$! - sleep 1 + mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \ - $connect_addr < "$cin" > "$cout" & + ip netns exec ${connector_ns} \ + $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \ + $connect_addr < "$cin" > "$cout" & local cpid=$! + mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \ + "${connector_ns}" "${port}" "${cpid}" "${spid}" & + local timeout_pid=$! + wait $cpid local retc=$? wait $spid local rets=$? - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat | grep Tcp > /tmp/${listener_ns}.out - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat | grep Tcp > /tmp/${connector_ns}.out + if kill -0 $timeout_pid; then + # Finished before the timeout: kill the background job + mptcp_lib_kill_group_wait $timeout_pid + timeout_pid=0 + fi + + mptcp_lib_nstat_get "${listener_ns}" + mptcp_lib_nstat_get "${connector_ns}" print_title "Transfer ${ip:2}" - if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" - mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \ - "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" mptcp_lib_result_fail "transfer ${ip}" diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 2e6648a2b2c0..123d9d7a0278 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -32,7 +32,7 @@ ns1="" err=$(mktemp) # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { rm -f "${err}" @@ -70,8 +70,9 @@ format_endpoints() { mptcp_lib_pm_nl_format_endpoints "${@}" } +# This function is invoked indirectly +#shellcheck disable=SC2317,SC2329 get_endpoint() { - # shellcheck disable=SC2317 # invoked indirectly mptcp_lib_pm_nl_get_endpoint "${ns1}" "${@}" } @@ -191,6 +192,10 @@ check "show_endpoints" \ flush_endpoint check "show_endpoints" "" "flush addrs" +add_endpoint 10.0.1.1 flags unknown +check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" "ignore unknown flags" +flush_endpoint + set_limits 9 1 2>/dev/null check "get_limits" "${default_limits}" "rcv addrs above hard limit" @@ -198,6 +203,7 @@ set_limits 1 9 2>/dev/null check "get_limits" "${default_limits}" "subflows above hard limit" set_limits 8 8 +flush_endpoint ## to make sure it doesn't affect the limits check "get_limits" "$(format_limits 8 8)" "set limits" flush_endpoint diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 994a556f46c1..99eecccbf0c8 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -2,6 +2,7 @@ #include <errno.h> #include <error.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -23,6 +24,8 @@ #define IPPROTO_MPTCP 262 #endif +#define MPTCP_PM_ADDR_FLAG_UNKNOWN _BITUL(7) + static void syntax(char *argv[]) { fprintf(stderr, "%s add|ann|rem|csf|dsf|get|set|del|flush|dump|events|listen|accept [<args>]\n", argv[0]); @@ -113,6 +116,8 @@ static int capture_events(int fd, int event_group) error(1, errno, "could not join the " MPTCP_PM_EV_GRP_NAME " mcast group"); do { + bool server_side = false; + FD_ZERO(&rfds); FD_SET(fd, &rfds); res_len = NLMSG_ALIGN(sizeof(struct nlmsghdr)) + @@ -187,11 +192,22 @@ static int capture_events(int fd, int event_group) else if (attrs->rta_type == MPTCP_ATTR_ERROR) fprintf(stderr, ",error:%u", *(__u8 *)RTA_DATA(attrs)); else if (attrs->rta_type == MPTCP_ATTR_SERVER_SIDE) - fprintf(stderr, ",server_side:%u", *(__u8 *)RTA_DATA(attrs)); + server_side = !!*(__u8 *)RTA_DATA(attrs); + else if (attrs->rta_type == MPTCP_ATTR_FLAGS) { + __u16 flags = *(__u16 *)RTA_DATA(attrs); + + /* only print when present, easier */ + if (flags & MPTCP_PM_EV_FLAG_DENY_JOIN_ID0) + fprintf(stderr, ",deny_join_id0:1"); + if (flags & MPTCP_PM_EV_FLAG_SERVER_SIDE) + server_side = true; + } attrs = RTA_NEXT(attrs, msg_len); } } + if (server_side) + fprintf(stderr, ",server_side:1"); fprintf(stderr, "\n"); } while (1); @@ -816,10 +832,14 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW; else if (!strcmp(tok, "signal")) flags |= MPTCP_PM_ADDR_FLAG_SIGNAL; + else if (!strcmp(tok, "laminar")) + flags |= MPTCP_PM_ADDR_FLAG_LAMINAR; else if (!strcmp(tok, "backup")) flags |= MPTCP_PM_ADDR_FLAG_BACKUP; else if (!strcmp(tok, "fullmesh")) flags |= MPTCP_PM_ADDR_FLAG_FULLMESH; + else if (!strcmp(tok, "unknown")) + flags |= MPTCP_PM_ADDR_FLAG_UNKNOWN; else error(1, errno, "unknown flag %s", argv[arg]); @@ -1004,6 +1024,13 @@ static void print_addr(struct rtattr *attrs, int len) printf(","); } + if (flags & MPTCP_PM_ADDR_FLAG_LAMINAR) { + printf("laminar"); + flags &= ~MPTCP_PM_ADDR_FLAG_LAMINAR; + if (flags) + printf(","); + } + if (flags & MPTCP_PM_ADDR_FLAG_BACKUP) { printf("backup"); flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; @@ -1025,6 +1052,13 @@ static void print_addr(struct rtattr *attrs, int len) printf(","); } + if (flags & MPTCP_PM_ADDR_FLAG_UNKNOWN) { + printf("unknown"); + flags &= ~MPTCP_PM_ADDR_FLAG_UNKNOWN; + if (flags) + printf(","); + } + /* bump unknown flags, if any */ if (flags) printf("0x%x", flags); diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 9c2a415976cb..d11a8b949aab 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -28,14 +28,14 @@ size=0 usage() { echo "Usage: $0 [ -b ] [ -c ] [ -d ] [ -i]" - echo -e "\t-b: bail out after first error, otherwise runs al testcases" + echo -e "\t-b: bail out after first error, otherwise runs all testcases" echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" echo -e "\t-d: debug this script" echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'" } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { rm -f "$cout" "$sout" @@ -155,48 +155,53 @@ do_transfer() sleep 1 fi - NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \ - nstat -n - NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \ - nstat -n + mptcp_lib_nstat_init "${ns3}" + mptcp_lib_nstat_init "${ns1}" - timeout ${timeout_test} \ - ip netns exec ${ns3} \ - ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \ - 0.0.0.0 < "$sin" > "$sout" & + ip netns exec ${ns3} \ + ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \ + 0.0.0.0 < "$sin" > "$sout" & local spid=$! mptcp_lib_wait_local_port_listen "${ns3}" "${port}" - timeout ${timeout_test} \ - ip netns exec ${ns1} \ - ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \ - 10.0.3.3 < "$cin" > "$cout" & + ip netns exec ${ns1} \ + ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \ + 10.0.3.3 < "$cin" > "$cout" & local cpid=$! + mptcp_lib_wait_timeout "${timeout_test}" "${ns3}" "${ns1}" "${port}" \ + "${cpid}" "${spid}" & + local timeout_pid=$! + wait $cpid local retc=$? wait $spid local rets=$? + if kill -0 $timeout_pid; then + # Finished before the timeout: kill the background job + mptcp_lib_kill_group_wait $timeout_pid + timeout_pid=0 + fi + if $capture; then sleep 1 kill ${cappid_listener} kill ${cappid_connector} fi - NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \ - nstat | grep Tcp > /tmp/${ns3}.out - NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \ - nstat | grep Tcp > /tmp/${ns1}.out + mptcp_lib_nstat_get "${ns3}" + mptcp_lib_nstat_get "${ns1}" cmp $sin $cout > /dev/null 2>&1 local cmps=$? cmp $cin $sout > /dev/null 2>&1 local cmpc=$? - if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \ - [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then + if [ $retc -eq 0 ] && [ $rets -eq 0 ] && + [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] && + [ $timeout_pid -eq 0 ]; then printf "%-16s" " max $max_time " mptcp_lib_pr_ok cat "$capout" @@ -204,8 +209,7 @@ do_transfer() fi mptcp_lib_pr_fail "client exit code $retc, server $rets" - mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}" \ - "/tmp/${ns3}.out" "/tmp/${ns1}.out" + mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}" ls -l $sin $cout ls -l $cin $sout @@ -233,10 +237,13 @@ run_test() for dev in ns2eth1 ns2eth2; do tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1 done - tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 - tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 - tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 - tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 + + # keep the queued pkts number low, or the RTT estimator will see + # increasing latency over time. + tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 limit 50 + tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 limit 50 + tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 limit 50 + tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 limit 50 # time is measured in ms, account for transfer size, aggregated link speed # and header overhead (10%) diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 3651f73451cf..e9ae1806ab07 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -94,7 +94,7 @@ test_fail() } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { print_title "Cleanup" @@ -117,7 +117,36 @@ cleanup() trap cleanup EXIT # Create and configure network namespaces for testing +print_title "Init" mptcp_lib_ns_init ns1 ns2 + +# check path_manager and pm_type sysctl mapping +if [ -f /proc/sys/net/mptcp/path_manager ]; then + ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=userspace + pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)" + if [ "${pm_type}" != "1" ]; then + test_fail "unexpected pm_type: ${pm_type}" + mptcp_lib_result_print_all_tap + exit ${KSFT_FAIL} + fi + + ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=error 2>/dev/null + pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)" + if [ "${pm_type}" != "1" ]; then + test_fail "unexpected pm_type after error: ${pm_type}" + mptcp_lib_result_print_all_tap + exit ${KSFT_FAIL} + fi + + ip netns exec "$ns1" sysctl -q net.mptcp.pm_type=0 + pm_name="$(ip netns exec "$ns1" sysctl -n net.mptcp.path_manager)" + if [ "${pm_name}" != "kernel" ]; then + test_fail "unexpected path-manager: ${pm_name}" + mptcp_lib_result_print_all_tap + exit ${KSFT_FAIL} + fi +fi + for i in "$ns1" "$ns2" ;do ip netns exec "$i" sysctl -q net.mptcp.pm_type=1 done @@ -152,7 +181,6 @@ mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid sleep 0.5 mptcp_lib_subtests_last_ts_reset -print_title "Init" print_test "Created network namespaces ns1, ns2" test_pass @@ -173,6 +201,9 @@ make_connection() is_v6="v4" fi + # set this on the client side only: will not affect the rest + ip netns exec "$ns2" sysctl -q net.mptcp.allow_join_initial_addr_port=0 + :>"$client_evts" :>"$server_evts" @@ -180,7 +211,8 @@ make_connection() ip netns exec "$ns1" \ ./mptcp_connect -s MPTCP -w 300 -p $app_port -l $listen_addr > /dev/null 2>&1 & local server_pid=$! - sleep 0.5 + + mptcp_lib_wait_local_port_listen "${ns1}" "${port}" # Run the client, transfer $file and stay connected to the server # to conduct tests @@ -195,23 +227,28 @@ make_connection() local client_token local client_port local client_serverside + local client_nojoin local server_token local server_serverside + local server_nojoin client_token=$(mptcp_lib_evts_get_info token "$client_evts") client_port=$(mptcp_lib_evts_get_info sport "$client_evts") client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts") + client_nojoin=$(mptcp_lib_evts_get_info deny_join_id0 "$client_evts") server_token=$(mptcp_lib_evts_get_info token "$server_evts") server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts") + server_nojoin=$(mptcp_lib_evts_get_info deny_join_id0 "$server_evts") print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1" - if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] && - [ "$server_serverside" = 1 ] + if [ "${client_token}" != "" ] && [ "${server_token}" != "" ] && + [ "${client_serverside:-0}" = 0 ] && [ "${server_serverside:-0}" = 1 ] && + [ "${client_nojoin:-0}" = 0 ] && [ "${server_nojoin:-0}" = 1 ] then test_pass print_title "Connection info: ${client_addr}:${client_port} -> ${connect_addr}:${app_port}" else - test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})" + test_fail "Expected tokens (c:${client_token} - s:${server_token}), server (c:${client_serverside} - s:${server_serverside}), nojoin (c:${client_nojoin} - s:${server_nojoin})" mptcp_lib_result_print_all_tap exit ${KSFT_FAIL} fi diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index 7ea5fb28c93d..1d5d3c4e7e87 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -77,6 +77,7 @@ static int cfg_cork; static bool cfg_cork_mixed; static int cfg_cpu = -1; /* default: pin to last cpu */ +static int cfg_expect_zerocopy = -1; static int cfg_family = PF_UNSPEC; static int cfg_ifindex = 1; static int cfg_payload_len; @@ -92,9 +93,9 @@ static socklen_t cfg_alen; static struct sockaddr_storage cfg_dst_addr; static struct sockaddr_storage cfg_src_addr; +static int exitcode; static char payload[IP_MAXPACKET]; static long packets, bytes, completions, expected_completions; -static int zerocopied = -1; static uint32_t next_completion; static uint32_t sends_since_notify; @@ -444,11 +445,13 @@ static bool do_recv_completion(int fd, int domain) next_completion = hi + 1; zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED); - if (zerocopied == -1) - zerocopied = zerocopy; - else if (zerocopied != zerocopy) { - fprintf(stderr, "serr: inconsistent\n"); - zerocopied = zerocopy; + if (cfg_expect_zerocopy != -1 && + cfg_expect_zerocopy != zerocopy) { + fprintf(stderr, "serr: ee_code: %u != expected %u\n", + zerocopy, cfg_expect_zerocopy); + exitcode = 1; + /* suppress repeated messages */ + cfg_expect_zerocopy = zerocopy; } if (cfg_verbose >= 2) @@ -571,7 +574,7 @@ static void do_tx(int domain, int type, int protocol) fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n", packets, bytes >> 20, completions, - zerocopied == 1 ? 'y' : 'n'); + cfg_zerocopy && cfg_expect_zerocopy == 1 ? 'y' : 'n'); } static int do_setup_rx(int domain, int type, int protocol) @@ -715,7 +718,7 @@ static void parse_opts(int argc, char **argv) cfg_payload_len = max_payload_len; - while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) { + while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vzZ:")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) @@ -770,6 +773,9 @@ static void parse_opts(int argc, char **argv) case 'z': cfg_zerocopy = true; break; + case 'Z': + cfg_expect_zerocopy = !!atoi(optarg); + break; } } @@ -817,5 +823,5 @@ int main(int argc, char **argv) else error(1, 0, "unknown cfg_test %s", cfg_test); - return 0; + return exitcode; } diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh index 89c22f5320e0..28178a38a4e7 100755 --- a/tools/testing/selftests/net/msg_zerocopy.sh +++ b/tools/testing/selftests/net/msg_zerocopy.sh @@ -6,6 +6,7 @@ set -e readonly DEV="veth0" +readonly DUMMY_DEV="dummy0" readonly DEV_MTU=65535 readonly BIN="./msg_zerocopy" @@ -14,21 +15,25 @@ readonly NSPREFIX="ns-${RAND}" readonly NS1="${NSPREFIX}1" readonly NS2="${NSPREFIX}2" -readonly SADDR4='192.168.1.1' -readonly DADDR4='192.168.1.2' -readonly SADDR6='fd::1' -readonly DADDR6='fd::2' +readonly LPREFIX4='192.168.1' +readonly RPREFIX4='192.168.2' +readonly LPREFIX6='fd' +readonly RPREFIX6='fc' + readonly path_sysctl_mem="net.core.optmem_max" # No arguments: automated test if [[ "$#" -eq "0" ]]; then - $0 4 tcp -t 1 - $0 6 tcp -t 1 - $0 4 udp -t 1 - $0 6 udp -t 1 - echo "OK. All tests passed" - exit 0 + ret=0 + + $0 4 tcp -t 1 || ret=1 + $0 6 tcp -t 1 || ret=1 + $0 4 udp -t 1 || ret=1 + $0 6 udp -t 1 || ret=1 + + [[ "$ret" == "0" ]] && echo "OK. All tests passed" + exit $ret fi # Argument parsing @@ -45,11 +50,18 @@ readonly EXTRA_ARGS="$@" # Argument parsing: configure addresses if [[ "${IP}" == "4" ]]; then - readonly SADDR="${SADDR4}" - readonly DADDR="${DADDR4}" + readonly SADDR="${LPREFIX4}.1" + readonly DADDR="${LPREFIX4}.2" + readonly DUMMY_ADDR="${RPREFIX4}.1" + readonly DADDR_TXONLY="${RPREFIX4}.2" + readonly MASK="24" elif [[ "${IP}" == "6" ]]; then - readonly SADDR="${SADDR6}" - readonly DADDR="${DADDR6}" + readonly SADDR="${LPREFIX6}::1" + readonly DADDR="${LPREFIX6}::2" + readonly DUMMY_ADDR="${RPREFIX6}::1" + readonly DADDR_TXONLY="${RPREFIX6}::2" + readonly MASK="64" + readonly NODAD="nodad" else echo "Invalid IP version ${IP}" exit 1 @@ -89,33 +101,61 @@ ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000" ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \ peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}" +ip link add "${DUMMY_DEV}" mtu "${DEV_MTU}" netns "${NS2}" type dummy + # Bring the devices up ip -netns "${NS1}" link set "${DEV}" up ip -netns "${NS2}" link set "${DEV}" up +ip -netns "${NS2}" link set "${DUMMY_DEV}" up # Set fixed MAC addresses on the devices ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02 ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06 # Add fixed IP addresses to the devices -ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}" -ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}" -ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad -ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad +ip -netns "${NS1}" addr add "${SADDR}/${MASK}" dev "${DEV}" ${NODAD} +ip -netns "${NS2}" addr add "${DADDR}/${MASK}" dev "${DEV}" ${NODAD} +ip -netns "${NS2}" addr add "${DUMMY_ADDR}/${MASK}" dev "${DUMMY_DEV}" ${NODAD} + +ip -netns "${NS1}" route add default via "${DADDR}" dev "${DEV}" +ip -netns "${NS2}" route add default via "${DADDR_TXONLY}" dev "${DUMMY_DEV}" + +ip netns exec "${NS2}" sysctl -wq net.ipv4.ip_forward=1 +ip netns exec "${NS2}" sysctl -wq net.ipv6.conf.all.forwarding=1 # Optionally disable sg or csum offload to test edge cases # ip netns exec "${NS1}" ethtool -K "${DEV}" sg off +ret=0 + do_test() { local readonly ARGS="$1" - echo "ipv${IP} ${TXMODE} ${ARGS}" - ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" & + # tx-rx test + # packets queued to a local socket are copied, + # sender notification has SO_EE_CODE_ZEROCOPY_COPIED. + + echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-rx\n" + ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 \ + -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" & sleep 0.2 - ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" + ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \ + -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" -Z 0 || ret=1 wait + + # next test is unconnected tx to dummy0, cannot exercise with tcp + [[ "${TXMODE}" == "tcp" ]] && return + + # tx-only test: send out dummy0 + # packets leaving the host are not copied, + # sender notification does not have SO_EE_CODE_ZEROCOPY_COPIED. + + echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-only\n" + ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \ + -S "${SADDR}" -D "${DADDR_TXONLY}" ${ARGS} "${TXMODE}" -Z 1 || ret=1 } do_test "${EXTRA_ARGS}" do_test "-z ${EXTRA_ARGS}" -echo ok + +[[ "$ret" == "0" ]] && echo "OK" diff --git a/tools/testing/selftests/net/nat6to4.sh b/tools/testing/selftests/net/nat6to4.sh new file mode 100755 index 000000000000..0ee859b622a4 --- /dev/null +++ b/tools/testing/selftests/net/nat6to4.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NS="ns-peer-$(mktemp -u XXXXXX)" + +ip netns add "${NS}" +ip -netns "${NS}" link set lo up +ip -netns "${NS}" route add default via 127.0.0.2 dev lo + +tc -n "${NS}" qdisc add dev lo ingress +tc -n "${NS}" filter add dev lo ingress prio 4 protocol ip \ + bpf object-file nat6to4.bpf.o section schedcls/egress4/snat4 direct-action + +ip netns exec "${NS}" \ + bash -c 'echo 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abc | socat - UDP4-DATAGRAM:224.1.0.1:6666,ip-multicast-loop=1' diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh deleted file mode 100644 index 6596fe03c77f..000000000000 --- a/tools/testing/selftests/net/net_helper.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# Helper functions - -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - local protocol="${3}" - local pattern - local i - - pattern=":$(printf "%04X" "${port}") " - - # for tcp protocol additionally check the socket state - [ ${protocol} = "tcp" ] && pattern="${pattern}0A" - for i in $(seq 10); do - if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \ - /proc/net/"${protocol}"* | grep -q "${pattern}"; then - break - fi - sleep 0.1 - done -} diff --git a/tools/testing/selftests/net/netdev-l2addr.sh b/tools/testing/selftests/net/netdev-l2addr.sh new file mode 100755 index 000000000000..18509da293e5 --- /dev/null +++ b/tools/testing/selftests/net/netdev-l2addr.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh +set -o pipefail + +NSIM_ADDR=2025 +TEST_ADDR="d0:be:d0:be:d0:00" + +RET_CODE=0 + +cleanup() { + cleanup_netdevsim "$NSIM_ADDR" + cleanup_ns "$NS" +} + +trap cleanup EXIT + +fail() { + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + RET_CODE=1 +} + +get_addr() +{ + local type="$1" + local dev="$2" + local ns="$3" + + ip -j -n "$ns" link show dev "$dev" | jq -er ".[0].$type" +} + +setup_ns NS + +nsim=$(create_netdevsim $NSIM_ADDR "$NS") + +get_addr address "$nsim" "$NS" >/dev/null || fail "Couldn't get ether addr" +get_addr broadcast "$nsim" "$NS" >/dev/null || fail "Couldn't get brd addr" +get_addr permaddr "$nsim" "$NS" >/dev/null && fail "Found perm_addr without setting it" + +ip -n "$NS" link set dev "$nsim" address "$TEST_ADDR" +ip -n "$NS" link set dev "$nsim" brd "$TEST_ADDR" + +[[ "$(get_addr address "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set ether addr" +[[ "$(get_addr broadcast "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set brd addr" + +if create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "FF:FF:FF:FF:FF:FF" 2>/dev/null; then + fail "Created netdevsim with broadcast permaddr" +fi + +nsim_port=$(create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "$TEST_ADDR") + +get_addr address "$nsim_port" "$NS" >/dev/null || fail "Couldn't get ether addr" +get_addr broadcast "$nsim_port" "$NS" >/dev/null || fail "Couldn't get brd addr" +[[ "$(get_addr permaddr "$nsim_port" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't get permaddr" + +cleanup_netdevsim "$NSIM_ADDR" "$NS" + +exit $RET_CODE diff --git a/tools/testing/selftests/net/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore index 64c4f8d9aa6c..5d2be9a00627 100644 --- a/tools/testing/selftests/net/netfilter/.gitignore +++ b/tools/testing/selftests/net/netfilter/.gitignore @@ -5,3 +5,4 @@ conntrack_dump_flush conntrack_reverse_clash sctp_collision nf_queue +udpclash diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index ffe161fac8b5..ee2d1a5254f8 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -6,42 +6,52 @@ HOSTPKG_CONFIG := pkg-config MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) -TEST_PROGS := br_netfilter.sh bridge_brouter.sh -TEST_PROGS += br_netfilter_queue.sh -TEST_PROGS += conntrack_dump_flush.sh -TEST_PROGS += conntrack_icmp_related.sh -TEST_PROGS += conntrack_ipip_mtu.sh -TEST_PROGS += conntrack_tcp_unreplied.sh -TEST_PROGS += conntrack_sctp_collision.sh -TEST_PROGS += conntrack_vrf.sh -TEST_PROGS += conntrack_reverse_clash.sh -TEST_PROGS += ipvs.sh -TEST_PROGS += nf_conntrack_packetdrill.sh -TEST_PROGS += nf_nat_edemux.sh -TEST_PROGS += nft_audit.sh -TEST_PROGS += nft_concat_range.sh -TEST_PROGS += nft_conntrack_helper.sh -TEST_PROGS += nft_fib.sh -TEST_PROGS += nft_flowtable.sh -TEST_PROGS += nft_meta.sh -TEST_PROGS += nft_nat.sh -TEST_PROGS += nft_nat_zones.sh -TEST_PROGS += nft_queue.sh -TEST_PROGS += nft_synproxy.sh -TEST_PROGS += nft_tproxy_tcp.sh -TEST_PROGS += nft_tproxy_udp.sh -TEST_PROGS += nft_zones_many.sh -TEST_PROGS += rpath.sh -TEST_PROGS += vxlan_mtu_frag.sh -TEST_PROGS += xt_string.sh +TEST_PROGS := \ + br_netfilter.sh \ + br_netfilter_queue.sh \ + bridge_brouter.sh \ + conntrack_clash.sh \ + conntrack_dump_flush.sh \ + conntrack_icmp_related.sh \ + conntrack_ipip_mtu.sh \ + conntrack_resize.sh \ + conntrack_reverse_clash.sh \ + conntrack_sctp_collision.sh \ + conntrack_tcp_unreplied.sh \ + conntrack_vrf.sh \ + ipvs.sh \ + nf_conntrack_packetdrill.sh \ + nf_nat_edemux.sh \ + nft_audit.sh \ + nft_concat_range.sh \ + nft_conntrack_helper.sh \ + nft_fib.sh \ + nft_flowtable.sh \ + nft_interface_stress.sh \ + nft_meta.sh \ + nft_nat.sh \ + nft_nat_zones.sh \ + nft_queue.sh \ + nft_synproxy.sh \ + nft_tproxy_tcp.sh \ + nft_tproxy_udp.sh \ + nft_zones_many.sh \ + rpath.sh \ + vxlan_mtu_frag.sh \ + xt_string.sh \ +# end of TEST_PROGS TEST_PROGS_EXTENDED = nft_concat_range_perf.sh -TEST_GEN_FILES = audit_logread -TEST_GEN_FILES += connect_close nf_queue -TEST_GEN_FILES += conntrack_dump_flush -TEST_GEN_FILES += conntrack_reverse_clash -TEST_GEN_FILES += sctp_collision +TEST_GEN_FILES = \ + audit_logread \ + connect_close \ + conntrack_dump_flush \ + conntrack_reverse_clash \ + nf_queue \ + sctp_collision \ + udpclash \ +# end of TEST_GEN_FILES include ../../lib.mk @@ -50,10 +60,14 @@ $(OUTPUT)/nf_queue: LDLIBS += $(MNL_LDLIBS) $(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS) $(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS) +$(OUTPUT)/udpclash: LDLIBS += -lpthread -TEST_FILES := lib.sh -TEST_FILES += packetdrill +TEST_FILES := \ + lib.sh \ + packetdrill \ +# end of TEST_FILES TEST_INCLUDES := \ + $(wildcard ../lib/sh/*.sh) \ ../lib.sh \ - $(wildcard ../lib/sh/*.sh) +# end of TEST_INCLUDES diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh index c28379a965d8..011de8763094 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh @@ -13,6 +13,12 @@ source lib.sh checktool "nft --version" "run test without nft tool" +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + cleanup() { cleanup_all_ns } @@ -54,9 +60,6 @@ bcast_ping() done } -ip netns exec "$ns0" sysctl -q net.ipv4.conf.all.rp_filter=0 -ip netns exec "$ns0" sysctl -q net.ipv4.conf.default.rp_filter=0 - if ! ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns1"; then echo "SKIP: Can't create veth device" exit $ksft_skip @@ -165,6 +168,7 @@ if [ "$t" -eq 0 ];then echo PASS: kernel not tainted else echo ERROR: kernel is tainted + dmesg ret=1 fi diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh index 6a764d70ab06..4788641717d9 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh @@ -4,6 +4,12 @@ source lib.sh checktool "nft --version" "run test without nft tool" +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + cleanup() { cleanup_all_ns } @@ -72,6 +78,7 @@ if [ "$t" -eq 0 ];then echo PASS: kernel not tainted else echo ERROR: kernel is tainted + dmesg exit 1 fi diff --git a/tools/testing/selftests/net/netfilter/bridge_brouter.sh b/tools/testing/selftests/net/netfilter/bridge_brouter.sh index 2549b6590693..ea76f2bc2f59 100755 --- a/tools/testing/selftests/net/netfilter/bridge_brouter.sh +++ b/tools/testing/selftests/net/netfilter/bridge_brouter.sh @@ -22,8 +22,6 @@ trap cleanup EXIT setup_ns nsbr ns1 ns2 -ip netns exec "$nsbr" sysctl -q net.ipv4.conf.default.rp_filter=0 -ip netns exec "$nsbr" sysctl -q net.ipv4.conf.all.rp_filter=0 if ! ip link add veth0 netns "$nsbr" type veth peer name eth0 netns "$ns1"; then echo "SKIP: Can't create veth device" exit $ksft_skip diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index 43d8b500d391..979cff56e1f5 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -7,64 +7,75 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_T_FILTER=m CONFIG_BRIDGE_NETFILTER=m CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_CGROUP_BPF=y +CONFIG_CRYPTO_SHA1=m CONFIG_DUMMY=m +CONFIG_INET_DIAG=m CONFIG_INET_ESP=m -CONFIG_IP_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP_NF_IPTABLES=m +CONFIG_INET_SCTP_DIAG=m +CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_IPTABLES_LEGACY=m +CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_RAW=m CONFIG_IP_NF_FILTER=m -CONFIG_IP6_NF_FILTER=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_IPTABLES_LEGACY=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP_NF_NAT=m CONFIG_IP_NF_RAW=m -CONFIG_IP6_NF_RAW=m CONFIG_IP_SCTP=m +CONFIG_IPV6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_TUNNEL=m CONFIG_IP_VS=m CONFIG_IP_VS_PROTO_TCP=y CONFIG_IP_VS_RR=m -CONFIG_IPV6=y -CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_MACVLAN=m CONFIG_NAMESPACES=y CONFIG_NET_CLS_U32=m -CONFIG_NET_L3_MASTER_DEV=y -CONFIG_NET_NS=y -CONFIG_NET_SCH_NETEM=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_IPIP=m -CONFIG_NET_VRF=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NETFILTER_NETLINK=m CONFIG_NETFILTER_NETLINK_QUEUE=m CONFIG_NETFILTER_SYNPROXY=m CONFIG_NETFILTER_XTABLES=m -CONFIG_NETFILTER_XT_NAT=m +CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m CONFIG_NETFILTER_XT_MATCH_STATE=m CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_NAT=m CONFIG_NETFILTER_XT_TARGET_REDIRECT=m +CONFIG_NET_IPIP=m +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_NS=y +CONFIG_NET_PKTGEN=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_VRF=y CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_EVENTS=y CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_PROCFS=y CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NF_LOG_IPV4=m CONFIG_NF_LOG_IPV6=m CONFIG_NF_NAT=m -CONFIG_NF_NAT_REDIRECT=y CONFIG_NF_NAT_MASQUERADE=y +CONFIG_NF_NAT_REDIRECT=y CONFIG_NF_TABLES=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_IPV4=y CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_NETDEV=y -CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NFT_BRIDGE_META=m CONFIG_NFT_COMPAT=m CONFIG_NFT_CT=m @@ -83,12 +94,9 @@ CONFIG_NFT_QUOTA=m CONFIG_NFT_REDIR=m CONFIG_NFT_SYNPROXY=m CONFIG_NFT_TPROXY=m +CONFIG_TUN=m CONFIG_VETH=m CONFIG_VLAN_8021Q=m CONFIG_VXLAN=m -CONFIG_XFRM_USER=m CONFIG_XFRM_STATISTICS=y -CONFIG_NET_PKTGEN=m -CONFIG_TUN=m -CONFIG_INET_DIAG=m -CONFIG_SCTP_DIAG=m +CONFIG_XFRM_USER=m diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh new file mode 100755 index 000000000000..84b8eb12143a --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh @@ -0,0 +1,173 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +clash_resolution_active=0 +dport=22111 +ret=0 + +cleanup() +{ + # netns cleanup also zaps any remaining socat echo server. + cleanup_all_ns +} + +checktool "nft --version" "run test without nft" +checktool "conntrack --version" "run test without conntrack" +checktool "socat -h" "run test without socat" + +trap cleanup EXIT + +setup_ns nsclient1 nsclient2 nsrouter + +ip netns exec "$nsrouter" nft -f -<<EOF +table ip t { + chain lb { + meta l4proto udp dnat to numgen random mod 3 map { 0 : 10.0.2.1 . 9000, 1 : 10.0.2.1 . 9001, 2 : 10.0.2.1 . 9002 } + } + + chain prerouting { + type nat hook prerouting priority dstnat + + udp dport $dport counter jump lb + } + + chain output { + type nat hook output priority dstnat + + udp dport $dport counter jump lb + } +} +EOF + +load_simple_ruleset() +{ +ip netns exec "$1" nft -f -<<EOF +table ip t { + chain forward { + type filter hook forward priority 0 + + ct state new counter + } +} +EOF +} + +spawn_servers() +{ + local ns="$1" + local ports="9000 9001 9002" + + for port in $ports; do + ip netns exec "$ns" socat UDP-RECVFROM:$port,fork PIPE 2>/dev/null & + done + + for port in $ports; do + wait_local_port_listen "$ns" $port udp + done +} + +add_addr() +{ + local ns="$1" + local dev="$2" + local i="$3" + local j="$4" + + ip -net "$ns" link set "$dev" up + ip -net "$ns" addr add "10.0.$i.$j/24" dev "$dev" +} + +ping_test() +{ + local ns="$1" + local daddr="$2" + + if ! ip netns exec "$ns" ping -q -c 1 $daddr > /dev/null;then + echo "FAIL: ping from $ns to $daddr" + exit 1 + fi +} + +run_one_clash_test() +{ + local ns="$1" + local ctns="$2" + local daddr="$3" + local dport="$4" + local entries + local cre + + if ! ip netns exec "$ns" timeout 30 ./udpclash $daddr $dport;then + echo "INFO: did not receive expected number of replies for $daddr:$dport" + ip netns exec "$ctns" conntrack -S + # don't fail: check if clash resolution triggered after all. + fi + + entries=$(ip netns exec "$ctns" conntrack -S | wc -l) + cre=$(ip netns exec "$ctns" conntrack -S | grep "clash_resolve=0" | wc -l) + + if [ "$cre" -ne "$entries" ];then + clash_resolution_active=1 + return 0 + fi + + # not a failure: clash resolution logic did not trigger. + # With right timing, xmit completed sequentially and + # no parallel insertion occurs. + return $ksft_xfail +} + +run_clash_test() +{ + local ns="$1" + local ctns="$2" + local daddr="$3" + local dport="$4" + local softerr=0 + + for i in $(seq 1 10);do + run_one_clash_test "$ns" "$ctns" "$daddr" "$dport" + local rv=$? + if [ $rv -eq 0 ];then + echo "PASS: clash resolution test for $daddr:$dport on attempt $i" + return 0 + elif [ $rv -eq $ksft_xfail ]; then + softerr=1 + fi + done + + [ $softerr -eq 1 ] && echo "XFAIL: clash resolution for $daddr:$dport did not trigger" +} + +ip link add veth0 netns "$nsclient1" type veth peer name veth0 netns "$nsrouter" +ip link add veth0 netns "$nsclient2" type veth peer name veth1 netns "$nsrouter" +add_addr "$nsclient1" veth0 1 1 +add_addr "$nsclient2" veth0 2 1 +add_addr "$nsrouter" veth0 1 99 +add_addr "$nsrouter" veth1 2 99 + +ip -net "$nsclient1" route add default via 10.0.1.99 +ip -net "$nsclient2" route add default via 10.0.2.99 +ip netns exec "$nsrouter" sysctl -q net.ipv4.ip_forward=1 + +ping_test "$nsclient1" 10.0.1.99 +ping_test "$nsclient1" 10.0.2.1 +ping_test "$nsclient2" 10.0.1.1 + +spawn_servers "$nsclient2" + +# exercise clash resolution with nat: +# nsrouter is supposed to dnat to 10.0.2.1:900{0,1,2,3}. +run_clash_test "$nsclient1" "$nsrouter" 10.0.1.99 "$dport" + +# exercise clash resolution without nat. +load_simple_ruleset "$nsclient2" +run_clash_test "$nsclient2" "$nsclient2" 127.0.0.1 9001 + +if [ $clash_resolution_active -eq 0 ];then + [ "$ret" -eq 0 ] && ret=$ksft_xfail +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c index 5f827e10717d..5cecb8a1bc94 100644 --- a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c +++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c @@ -10,7 +10,7 @@ #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> #include <linux/netfilter/nf_conntrack_tcp.h> -#include "../../kselftest_harness.h" +#include "kselftest_harness.h" #define TEST_ZONE_ID 123 #define NF_CT_DEFAULT_ZONE_ID 0 diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh new file mode 100755 index 000000000000..615fe3c6f405 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh @@ -0,0 +1,515 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +checktool "conntrack --version" "run test without conntrack" +checktool "nft --version" "run test without nft tool" + +init_net_max=0 +ct_buckets=0 +tmpfile="" +tmpfile_proc="" +tmpfile_uniq="" +ret=0 +have_socat=0 + +socat -h > /dev/null && have_socat=1 + +insert_count=2000 +[ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400 + +modprobe -q nf_conntrack +if ! sysctl -q net.netfilter.nf_conntrack_max >/dev/null;then + echo "SKIP: conntrack sysctls not available" + exit $KSFT_SKIP +fi + +init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) || exit 1 +ct_buckets=$(sysctl -n net.netfilter.nf_conntrack_buckets) || exit 1 + +cleanup() { + cleanup_all_ns + + rm -f "$tmpfile" "$tmpfile_proc" "$tmpfile_uniq" + + # restore original sysctl setting + sysctl -q net.netfilter.nf_conntrack_max=$init_net_max + sysctl -q net.netfilter.nf_conntrack_buckets=$ct_buckets +} +trap cleanup EXIT + +check_max_alias() +{ + local expected="$1" + # old name, expected to alias to the first, i.e. changing one + # changes the other as well. + local lv=$(sysctl -n net.nf_conntrack_max) + + if [ $expected -ne "$lv" ];then + echo "nf_conntrack_max sysctls should have identical values" + exit 1 + fi +} + +insert_ctnetlink() { + local ns="$1" + local count="$2" + local i=0 + local bulk=16 + + while [ $i -lt $count ] ;do + ip netns exec "$ns" bash -c "for i in \$(seq 1 $bulk); do \ + if ! conntrack -I -s \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \ + -d \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \ + --protonum 17 --timeout 3600 --status ASSURED,SEEN_REPLY --sport \$RANDOM --dport 53; then \ + return;\ + fi & \ + done ; wait" 2>/dev/null + + i=$((i+bulk)) + done +} + +check_ctcount() { + local ns="$1" + local count="$2" + local msg="$3" + + local now=$(ip netns exec "$ns" conntrack -C) + + if [ $now -ne "$count" ] ;then + echo "expected $count entries in $ns, not $now: $msg" + exit 1 + fi + + echo "PASS: got $count connections: $msg" +} + +ctresize() { + local duration="$1" + local now=$(date +%s) + local end=$((now + duration)) + + while [ $now -lt $end ]; do + sysctl -q net.netfilter.nf_conntrack_buckets=$RANDOM + now=$(date +%s) + done +} + +do_rsleep() { + local limit="$1" + local r=$RANDOM + + r=$((r%limit)) + sleep "$r" +} + +ct_flush_once() { + local ns="$1" + + ip netns exec "$ns" conntrack -F 2>/dev/null +} + +ctflush() { + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + do_rsleep "$duration" + + while [ $now -lt $end ]; do + ct_flush_once "$ns" + do_rsleep "$duration" + now=$(date +%s) + done +} + +ct_pingflood() +{ + local ns="$1" + local duration="$2" + local msg="$3" + local now=$(date +%s) + local end=$((now + duration)) + local j=0 + local k=0 + + while [ $now -lt $end ]; do + j=$((j%256)) + k=$((k%256)) + + ip netns exec "$ns" bash -c \ + "j=$j k=$k; for i in \$(seq 1 254); do ping -q -c 1 127.\$k.\$j.\$i & done; wait" >/dev/null 2>&1 + + j=$((j+1)) + + if [ $j -eq 256 ];then + k=$((k+1)) + fi + + now=$(date +%s) + done + + wait +} + +ct_udpflood() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + [ $have_socat -ne "1" ] && return + + while [ $now -lt $end ]; do +ip netns exec "$ns" bash<<"EOF" + for i in $(seq 1 100);do + dport=$(((RANDOM%65536)+1)) + + echo bar | socat -u STDIN UDP:"127.0.0.1:$dport" & + done > /dev/null 2>&1 + wait +EOF + now=$(date +%s) + done +} + +ct_udpclash() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + [ -x udpclash ] || return + + while [ $now -lt $end ]; do + ip netns exec "$ns" timeout 30 ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1 + + now=$(date +%s) + done +} + +# dump to /dev/null. We don't want dumps to cause infinite loops +# or use-after-free even when conntrack table is altered while dumps +# are in progress. +ct_nulldump() +{ + local ns="$1" + + ip netns exec "$ns" conntrack -L > /dev/null 2>&1 & + + # Don't require /proc support in conntrack + if [ -r /proc/self/net/nf_conntrack ] ; then + ip netns exec "$ns" bash -c "wc -l < /proc/self/net/nf_conntrack" > /dev/null & + fi + + wait +} + +ct_nulldump_loop() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + while [ $now -lt $end ]; do + ct_nulldump "$ns" + sleep $((RANDOM%2)) + now=$(date +%s) + done +} + +change_timeouts() +{ + local ns="$1" + local r1=$((RANDOM%2)) + local r2=$((RANDOM%2)) + + [ "$r1" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=$((RANDOM%5)) + [ "$r2" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=$((RANDOM%5)) +} + +ct_change_timeouts_loop() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + while [ $now -lt $end ]; do + change_timeouts "$ns" + sleep $((RANDOM%2)) + now=$(date +%s) + done + + # restore defaults + ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=30 + ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=30 +} + +check_taint() +{ + local tainted_then="$1" + local msg="$2" + + local tainted_now=0 + + if [ "$tainted_then" -ne 0 ];then + return + fi + + read tainted_now < /proc/sys/kernel/tainted + + if [ "$tainted_now" -eq 0 ];then + echo "PASS: $msg" + else + echo "TAINT: $msg" + dmesg + exit 1 + fi +} + +insert_flood() +{ + local n="$1" + local timeout="$2" + local r=0 + + r=$((RANDOM%$insert_count)) + + ct_pingflood "$n" "$timeout" "floodresize" & + ct_udpflood "$n" "$timeout" & + ct_udpclash "$n" "$timeout" & + + insert_ctnetlink "$n" "$r" & + ctflush "$n" "$timeout" & + ct_nulldump_loop "$n" "$timeout" & + ct_change_timeouts_loop "$n" "$timeout" & + + wait +} + +test_floodresize_all() +{ + local timeout=20 + local n="" + local tainted_then="" + + read tainted_then < /proc/sys/kernel/tainted + + for n in "$nsclient1" "$nsclient2";do + insert_flood "$n" "$timeout" & + done + + # resize table constantly while flood/insert/dump/flushs + # are happening in parallel. + ctresize "$timeout" + + # wait for subshells to complete, everything is limited + # by $timeout. + wait + + check_taint "$tainted_then" "resize+flood" +} + +check_dump() +{ + local ns="$1" + local protoname="$2" + local c=0 + local proto=0 + local proc=0 + local unique="" + local lret=0 + + # NOTE: assumes timeouts are large enough to not have + # expirations in all following tests. + l=$(ip netns exec "$ns" conntrack -L 2>/dev/null | sort | tee "$tmpfile" | wc -l) + c=$(ip netns exec "$ns" conntrack -C) + + if [ "$c" -eq 0 ]; then + echo "FAIL: conntrack count for $ns is 0" + lret=1 + fi + + if [ "$c" -ne "$l" ]; then + echo "FAIL: conntrack count inconsistency for $ns -L: $c != $l" + lret=1 + fi + + # check the dump we retrieved is free of duplicated entries. + unique=$(uniq "$tmpfile" | tee "$tmpfile_uniq" | wc -l) + if [ "$l" -ne "$unique" ]; then + echo "FAIL: listing contained redundant entries for $ns: $l != $unique" + diff -u "$tmpfile" "$tmpfile_uniq" + lret=1 + fi + + # we either inserted icmp or only udp, hence, --proto should return same entry count as without filter. + proto=$(ip netns exec "$ns" conntrack -L --proto $protoname 2>/dev/null | sort | uniq | tee "$tmpfile_uniq" | wc -l) + if [ "$l" -ne "$proto" ]; then + echo "FAIL: dump inconsistency for $ns -L --proto $protoname: $l != $proto" + diff -u "$tmpfile" "$tmpfile_uniq" + lret=1 + fi + + if [ -r /proc/self/net/nf_conntrack ] ; then + proc=$(ip netns exec "$ns" bash -c "sort < /proc/self/net/nf_conntrack | tee \"$tmpfile_proc\" | wc -l") + + if [ "$l" -ne "$proc" ]; then + echo "FAIL: proc inconsistency for $ns: $l != $proc" + lret=1 + fi + + proc=$(uniq "$tmpfile_proc" | tee "$tmpfile_uniq" | wc -l) + if [ "$l" -ne "$proc" ]; then + echo "FAIL: proc inconsistency after uniq filter for $ns: $l != $proc" + diff -u "$tmpfile_proc" "$tmpfile_uniq" + lret=1 + fi + fi + + if [ $lret -eq 0 ];then + echo "PASS: dump in netns $ns had same entry count (-C $c, -L $l, -p $proto, /proc $proc)" + else + echo "FAIL: dump in netns $ns had different entry count (-C $c, -L $l, -p $proto, /proc $proc)" + ret=1 + fi +} + +test_dump_all() +{ + local timeout=3 + local tainted_then="" + + read tainted_then < /proc/sys/kernel/tainted + + ct_flush_once "$nsclient1" + ct_flush_once "$nsclient2" + + ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600 + + ct_pingflood "$nsclient1" $timeout "dumpall" & + insert_ctnetlink "$nsclient2" $insert_count + + wait + + check_dump "$nsclient1" "icmp" + check_dump "$nsclient2" "udp" + + check_taint "$tainted_then" "test parallel conntrack dumps" +} + +check_sysctl_immutable() +{ + local ns="$1" + local name="$2" + local failhard="$3" + local o=0 + local n=0 + + o=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null) + n=$((o+1)) + + # return value isn't reliable, need to read it back + ip netns exec "$ns" sysctl -q "$name"=$n 2>/dev/null >/dev/null + + n=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null) + + [ -z "$n" ] && return 1 + + if [ $o -ne $n ]; then + if [ $failhard -gt 0 ] ;then + echo "FAIL: net.$name should not be changeable from namespace (now $n)" + ret=1 + fi + return 0 + fi + + return 1 +} + +test_conntrack_max_limit() +{ + sysctl -q net.netfilter.nf_conntrack_max=100 + insert_ctnetlink "$nsclient1" 101 + + # check netns is clamped by init_net, i.e., either netns follows + # init_net value, or a higher pernet limit (compared to init_net) is ignored. + check_ctcount "$nsclient1" 100 "netns conntrack_max is init_net bound" + + sysctl -q net.netfilter.nf_conntrack_max=$init_net_max +} + +test_conntrack_disable() +{ + local timeout=2 + + # disable conntrack pickups + ip netns exec "$nsclient1" nft flush table ip test_ct + + ct_flush_once "$nsclient1" + ct_flush_once "$nsclient2" + + ct_pingflood "$nsclient1" "$timeout" "conntrack disable" + ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1 + + # Disabled, should not have picked up any connection. + check_ctcount "$nsclient1" 0 "conntrack disabled" + + # This one is still active, expect 1 connection. + check_ctcount "$nsclient2" 1 "conntrack enabled" +} + +init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) + +check_max_alias $init_net_max + +sysctl -q net.netfilter.nf_conntrack_max="262000" +check_max_alias 262000 + +setup_ns nsclient1 nsclient2 + +# check this only works from init_net +for n in netfilter.nf_conntrack_buckets netfilter.nf_conntrack_expect_max net.nf_conntrack_max;do + check_sysctl_immutable "$nsclient1" "net.$n" 1 +done + +# won't work on older kernels. If it works, check that the netns obeys the limit +if check_sysctl_immutable "$nsclient1" net.netfilter.nf_conntrack_max 0;then + # subtest: if pernet is changeable, check that reducing it in pernet + # limits the pernet entries. Inverse, pernet clamped by a lower init_net + # setting, is already checked by "test_conntrack_max_limit" test. + + ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=1 + insert_ctnetlink "$nsclient1" 2 + check_ctcount "$nsclient1" 1 "netns conntrack_max is pernet bound" + ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=$init_net_max +fi + +for n in "$nsclient1" "$nsclient2";do +# enable conntrack in both namespaces +ip netns exec "$n" nft -f - <<EOF +table ip test_ct { + chain input { + type filter hook input priority 0 + ct state new counter + } +} +EOF +done + +tmpfile=$(mktemp) +tmpfile_proc=$(mktemp) +tmpfile_uniq=$(mktemp) +test_conntrack_max_limit +test_dump_all +test_floodresize_all +test_conntrack_disable + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c index 507930cee8cb..462d628cc3bd 100644 --- a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c +++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c @@ -33,9 +33,14 @@ static void die(const char *e) exit(111); } -static void die_port(uint16_t got, uint16_t want) +static void die_port(const struct sockaddr_in *sin, uint16_t want) { - fprintf(stderr, "Port number changed, wanted %d got %d\n", want, ntohs(got)); + uint16_t got = ntohs(sin->sin_port); + char str[INET_ADDRSTRLEN]; + + inet_ntop(AF_INET, &sin->sin_addr, str, sizeof(str)); + + fprintf(stderr, "Port number changed, wanted %d got %d from %s\n", want, got, str); exit(1); } @@ -100,7 +105,7 @@ int main(int argc, char *argv[]) die("child recvfrom"); if (peer.sin_port != htons(PORT)) - die_port(peer.sin_port, PORT); + die_port(&peer, PORT); } else { if (sendto(s2, buf, LEN, 0, (struct sockaddr *)&sa1, sizeof(sa1)) != LEN) continue; @@ -109,7 +114,7 @@ int main(int argc, char *argv[]) die("parent recvfrom"); if (peer.sin_port != htons((PORT + 1))) - die_port(peer.sin_port, PORT + 1); + die_port(&peer, PORT + 1); } } diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh index a24c896347a8..dc7e9d6da062 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh @@ -45,6 +45,8 @@ if ip netns exec "$ns0" ./conntrack_reverse_clash; then echo "PASS: No SNAT performed for null bindings" else echo "ERROR: SNAT performed without any matching snat rule" + ip netns exec "$ns0" conntrack -L + ip netns exec "$ns0" conntrack -S exit 1 fi diff --git a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh index e95ecb37c2b1..207b79932d91 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh @@ -32,7 +32,6 @@ source lib.sh IP0=172.30.30.1 IP1=172.30.30.2 -DUMMYNET=10.9.9 PFXL=30 ret=0 @@ -52,11 +51,6 @@ trap cleanup EXIT setup_ns ns0 ns1 -ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0 -ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 -ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 -ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.forwarding=1 - if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then echo "SKIP: Could not add veth device" exit $ksft_skip @@ -67,18 +61,13 @@ if ! ip -net "$ns0" li add tvrf type vrf table 9876; then exit $ksft_skip fi -ip -net "$ns0" link add dummy0 type dummy - ip -net "$ns0" li set veth0 master tvrf -ip -net "$ns0" li set dummy0 master tvrf ip -net "$ns0" li set tvrf up ip -net "$ns0" li set veth0 up -ip -net "$ns0" li set dummy0 up ip -net "$ns1" li set veth0 up ip -net "$ns0" addr add $IP0/$PFXL dev veth0 ip -net "$ns1" addr add $IP1/$PFXL dev veth0 -ip -net "$ns0" addr add $DUMMYNET.1/$PFXL dev dummy0 listener_ready() { @@ -219,35 +208,9 @@ EOF fi } -test_fib() -{ -ip netns exec "$ns0" nft -f - <<EOF -flush ruleset -table ip t { - counter fibcount { } - - chain prerouting { - type filter hook prerouting priority 0; - meta iifname veth0 ip daddr $DUMMYNET.2 fib daddr oif dummy0 counter name fibcount notrack - } -} -EOF - ip -net "$ns1" route add 10.9.9.0/24 via "$IP0" dev veth0 - ip netns exec "$ns1" ping -q -w 1 -c 1 "$DUMMYNET".2 > /dev/null - - if ip netns exec "$ns0" nft list counter t fibcount | grep -q "packets 1"; then - echo "PASS: fib lookup returned exepected output interface" - else - echo "FAIL: fib lookup did not return exepected output interface" - ret=1 - return - fi -} - test_ct_zone_in test_masquerade_vrf "default" test_masquerade_vrf "pfifo" test_masquerade_veth -test_fib exit $ret diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh index d3edb16cd4b3..9c9d5b38ab71 100755 --- a/tools/testing/selftests/net/netfilter/ipvs.sh +++ b/tools/testing/selftests/net/netfilter/ipvs.sh @@ -129,9 +129,6 @@ test_dr() { # avoid incorrect arp response ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1 ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2 - # avoid reverse route lookup - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0 ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1 test_service @@ -154,7 +151,7 @@ test_nat() { test_tun() { ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0 - ip netns exec "${ns1}" modprobe -q ipip + modprobe -q ipip ip netns exec "${ns1}" ip link set tunl0 up ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0 ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0 @@ -163,13 +160,10 @@ test_tun() { ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port} ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1 - ip netns exec "${ns2}" modprobe -q ipip ip netns exec "${ns2}" ip link set tunl0 up ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1 ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2 - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0 - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0 ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1 test_service diff --git a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh index 1014551dd769..6731fe1eaf2e 100755 --- a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh +++ b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh @@ -17,9 +17,31 @@ cleanup() checktool "socat -h" "run test without socat" checktool "iptables --version" "run test without iptables" +checktool "conntrack --version" "run test without conntrack" trap cleanup EXIT +connect_done() +{ + local ns="$1" + local port="$2" + + ip netns exec "$ns" ss -nt -o state established "dport = :$port" | grep -q "$port" +} + +check_ctstate() +{ + local ns="$1" + local dp="$2" + + if ! ip netns exec "$ns" conntrack --get -s 192.168.1.2 -d 192.168.1.1 -p tcp \ + --sport 10000 --dport "$dp" --state ESTABLISHED > /dev/null 2>&1;then + echo "FAIL: Did not find expected state for dport $2" + ip netns exec "$ns" bash -c 'conntrack -L; conntrack -S; ss -nt' + ret=1 + fi +} + setup_ns ns1 ns2 # Connect the namespaces using a veth pair @@ -44,15 +66,18 @@ socatpid=$! ip netns exec "$ns2" sysctl -q net.ipv4.ip_local_port_range="10000 10000" # add a virtual IP using DNAT -ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201 +ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201 || exit 1 # ... and route it to the other namespace ip netns exec "$ns2" ip route add 10.96.0.1 via 192.168.1.1 -# add a persistent connection from the other namespace -ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null & +# listener should be up by now, wait if it isn't yet. +wait_local_port_listen "$ns1" 5201 tcp -sleep 1 +# add a persistent connection from the other namespace +sleep 10 | ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null & +cpid0=$! +busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" "5201" # ip daddr:dport will be rewritten to 192.168.1.1 5201 # NAT must reallocate source port 10000 because @@ -71,26 +96,25 @@ fi ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201 ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201 -sleep 5 | ip netns exec "$ns2" socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null & +sleep 5 | ip netns exec "$ns2" socat -T 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null & +cpid1=$! -# if connect succeeds, client closes instantly due to EOF on stdin. -# if connect hangs, it will time out after 5s. -echo | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null & +sleep 5 | ip netns exec "$ns2" socat -T 5 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null & cpid2=$! -time_then=$(date +%s) -wait $cpid2 -rv=$? -time_now=$(date +%s) +busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" 5202 +busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" 5203 -# Check how much time has elapsed, expectation is for -# 'cpid2' to connect and then exit (and no connect delay). -delta=$((time_now - time_then)) +check_ctstate "$ns1" 5202 +check_ctstate "$ns1" 5203 -if [ $delta -lt 2 ] && [ $rv -eq 0 ]; then +kill $socatpid $cpid0 $cpid1 $cpid2 +socatpid=0 + +if [ $ret -eq 0 ]; then echo "PASS: could connect to service via redirected ports" else - echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)" + echo "FAIL: socat cannot connect to service via redirect" ret=1 fi diff --git a/tools/testing/selftests/net/netfilter/nf_queue.c b/tools/testing/selftests/net/netfilter/nf_queue.c index 9e56b9d47037..8bbec37f5356 100644 --- a/tools/testing/selftests/net/netfilter/nf_queue.c +++ b/tools/testing/selftests/net/netfilter/nf_queue.c @@ -18,6 +18,9 @@ struct options { bool count_packets; bool gso_enabled; + bool failopen; + bool out_of_order; + bool bogus_verdict; int verbose; unsigned int queue_num; unsigned int timeout; @@ -30,7 +33,7 @@ static struct options opts; static void help(const char *p) { - printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p); + printf("Usage: %s [-c|-v [-vv] ] [-o] [-O] [-b] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p); } static int parse_attr_cb(const struct nlattr *attr, void *data) @@ -236,6 +239,8 @@ struct mnl_socket *open_queue(void) flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0; flags |= NFQA_CFG_F_UID_GID; + if (opts.failopen) + flags |= NFQA_CFG_F_FAIL_OPEN; mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags)); mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags)); @@ -272,7 +277,9 @@ static int mainloop(void) unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE; struct mnl_socket *nl; struct nlmsghdr *nlh; + uint32_t ooo_ids[16]; unsigned int portid; + int ooo_count = 0; char *buf; int ret; @@ -305,6 +312,9 @@ static int mainloop(void) ret = mnl_cb_run(buf, ret, 0, portid, queue_cb, NULL); if (ret < 0) { + /* bogus verdict mode will generate ENOENT error messages */ + if (opts.bogus_verdict && errno == ENOENT) + continue; perror("mnl_cb_run"); exit(EXIT_FAILURE); } @@ -313,10 +323,35 @@ static int mainloop(void) if (opts.delay_ms) sleep_ms(opts.delay_ms); - nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict); - if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { - perror("mnl_socket_sendto"); - exit(EXIT_FAILURE); + if (opts.bogus_verdict) { + for (int i = 0; i < 50; i++) { + nlh = nfq_build_verdict(buf, id + 0x7FFFFFFF + i, + opts.queue_num, opts.verdict); + mnl_socket_sendto(nl, nlh, nlh->nlmsg_len); + } + } + + if (opts.out_of_order) { + ooo_ids[ooo_count] = id; + if (ooo_count >= 15) { + for (ooo_count; ooo_count >= 0; ooo_count--) { + nlh = nfq_build_verdict(buf, ooo_ids[ooo_count], + opts.queue_num, opts.verdict); + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + exit(EXIT_FAILURE); + } + } + ooo_count = 0; + } else { + ooo_count++; + } + } else { + nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict); + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + exit(EXIT_FAILURE); + } } } @@ -329,7 +364,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) { + while ((c = getopt(argc, argv, "chvoObt:q:Q:d:G")) != -1) { switch (c) { case 'c': opts.count_packets = true; @@ -366,9 +401,18 @@ static void parse_opts(int argc, char **argv) case 'G': opts.gso_enabled = false; break; + case 'o': + opts.failopen = true; + break; case 'v': opts.verbose++; break; + case 'O': + opts.out_of_order = true; + break; + case 'b': + opts.bogus_verdict = true; + break; } } diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index 47088b005390..ffdc6ccc6511 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -15,10 +15,12 @@ source lib.sh # Available test groups: # - reported_issues: check for issues that were reported in the past # - correctness: check that packets match given entries, and only those +# - correctness_large: same but with additional non-matching entries # - concurrency: attempt races between insertion, deletion and lookup # - timeout: check that packets match entries until they expire # - performance: estimate matching rate, compare with rbtree and hash baselines -TESTS="reported_issues correctness concurrency timeout" +TESTS="reported_issues correctness correctness_large concurrency timeout" + [ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}" # Set types, defined by TYPE_ variables below @@ -27,7 +29,8 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add reload net_port_proto_match" +BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate + insert_overlap load_flush_load4 load_flush_load8" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -376,7 +379,7 @@ display net,port,proto type_spec ipv4_addr . inet_service . inet_proto chain_spec ip daddr . udp dport . meta l4proto dst addr4 port proto -src +src start 1 count 9 src_delta 9 @@ -387,6 +390,73 @@ race_repeat 0 perf_duration 0 " + +TYPE_avx2_mismatch=" +display avx2 false match +type_spec inet_proto . ipv6_addr +chain_spec meta l4proto . ip6 daddr +dst proto addr6 +src +start 1 +count 1 +src_delta 1 +tools ping +proto icmp6 + +race_repeat 0 + +perf_duration 0 +" + + +TYPE_doublecreate=" +display cannot create same element twice +type_spec ipv4_addr . ipv4_addr +chain_spec ip saddr . ip daddr +dst addr4 +proto icmp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_insert_overlap=" +display reject overlapping range on add +type_spec ipv4_addr . ipv4_addr +chain_spec ip saddr . ip daddr +dst addr4 +proto icmp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_load_flush_load4=" +display reload with flush, 4bit groups +type_spec ipv4_addr . ipv4_addr +chain_spec ip saddr . ip daddr +dst addr4 +proto icmp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_load_flush_load8=" +display reload with flush, 8bit groups +type_spec ipv4_addr . ipv4_addr +chain_spec ip saddr . ip daddr +dst addr4 +proto icmp + +race_repeat 0 + +perf_duration 0 +" + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -398,6 +468,7 @@ table inet filter { set test { type ${type_spec} + counter flags interval,timeout } @@ -1137,9 +1208,18 @@ del() { fi } -# Return packet count from 'test' counter in 'inet filter' table +# Return packet count for elem $1 from 'test' counter in 'inet filter' table count_packets() { found=0 + for token in $(nft reset element inet filter test "${1}" ); do + [ ${found} -eq 1 ] && echo "${token}" && return + [ "${token}" = "packets" ] && found=1 + done +} + +# Return packet count from 'test' counter in 'inet filter' table +count_packets_nomatch() { + found=0 for token in $(nft list counter inet filter test); do [ ${found} -eq 1 ] && echo "${token}" && return [ "${token}" = "packets" ] && found=1 @@ -1185,6 +1265,10 @@ perf() { # Set MAC addresses, send single packet, check that it matches, reset counter send_match() { + local elem="$1" + + shift + ip link set veth_a address "$(format_mac "${1}")" ip -n B link set veth_b address "$(format_mac "${2}")" @@ -1195,7 +1279,7 @@ send_match() { eval src_"$f"=\$\(format_\$f "${2}"\) done eval send_\$proto - if [ "$(count_packets)" != "1" ]; then + if [ "$(count_packets "$elem")" != "1" ]; then err "${proto} packet to:" err " $(for f in ${dst}; do eval format_\$f "${1}"; printf ' '; done)" @@ -1221,7 +1305,7 @@ send_nomatch() { eval src_"$f"=\$\(format_\$f "${2}"\) done eval send_\$proto - if [ "$(count_packets)" != "0" ]; then + if [ "$(count_packets_nomatch)" != "0" ]; then err "${proto} packet to:" err " $(for f in ${dst}; do eval format_\$f "${1}"; printf ' '; done)" @@ -1234,15 +1318,54 @@ send_nomatch() { fi } +maybe_send_nomatch() { + local elem="$1" + local what="$4" + + [ $((RANDOM%20)) -gt 0 ] && return + + dst_addr4="$2" + dst_port="$3" + send_udp + + if [ "$(count_packets_nomatch)" != "0" ]; then + err "Packet to $dst_addr4:$dst_port did match $what" + err "$(nft -a list ruleset)" + return 1 + fi +} + +maybe_send_match() { + local elem="$1" + local what="$4" + + [ $((RANDOM%20)) -gt 0 ] && return + + dst_addr4="$2" + dst_port="$3" + send_udp + + if [ "$(count_packets "{ $elem }")" != "1" ]; then + err "Packet to $dst_addr4:$dst_port did not match $what" + err "$(nft -a list ruleset)" + return 1 + fi + nft reset counter inet filter test >/dev/null + nft reset element inet filter test "{ $elem }" >/dev/null +} + # Correctness test template: # - add ranged element, check that packets match it # - check that packets outside range don't match it # - remove some elements, check that packets don't match anymore -test_correctness() { - setup veth send_"${proto}" set || return ${ksft_skip} - +test_correctness_main() { range_size=1 + + send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1 + for i in $(seq "${start}" $((start + count))); do + local elem="" + end=$((start + range_size)) # Avoid negative or zero-sized port ranges @@ -1253,15 +1376,16 @@ test_correctness() { srcstart=$((start + src_delta)) srcend=$((end + src_delta)) - add "$(format)" || return 1 + elem="$(format)" + add "$elem" || return 1 for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do - send_match "${j}" $((j + src_delta)) || return 1 + send_match "$elem" "${j}" $((j + src_delta)) || return 1 done send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1 # Delete elements now and then if [ $((i % 3)) -eq 0 ]; then - del "$(format)" || return 1 + del "$elem" || return 1 for j in $(seq "$start" \ $((range_size / 2 + 1)) ${end}); do send_nomatch "${j}" $((j + src_delta)) \ @@ -1274,6 +1398,163 @@ test_correctness() { done } +test_correctness() { + setup veth send_"${proto}" set || return ${ksft_skip} + + test_correctness_main +} + +# Repeat the correctness tests, but add extra non-matching entries. +# This exercises the more compact '4 bit group' representation that +# gets picked when the default 8-bit representation exceed +# NFT_PIPAPO_LT_SIZE_HIGH bytes of memory. +# See usage of NFT_PIPAPO_LT_SIZE_HIGH in pipapo_lt_bits_adjust(). +# +# The format() helper is way too slow when generating lots of +# entries so its not used here. +test_correctness_large() { + setup veth send_"${proto}" set || return ${ksft_skip} + # number of dummy (filler) entries to add. + local dcount=16385 + + ( + echo -n "add element inet filter test { " + + case "$type_spec" in + "ether_addr . ipv4_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_mac $((1000000 + i)) + printf ". 172.%i.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) + done + ;; + "inet_proto . ipv6_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "%i . " $((RANDOM%256)) + format_addr6 $((1000000 + i)) + done + ;; + "inet_service . inet_proto") + # smaller key sizes, need more entries to hit the + # 4-bit threshold. + dcount=65536 + for i in $(seq 1 $dcount); do + local proto=$((RANDOM%256)) + + # Test uses UDP to match, as it also fails when matching + # an entry that doesn't exist, so skip 'udp' entries + # to not trigger a wrong failure. + [ $proto -eq 17 ] && proto=18 + [ $i -gt 1 ] && echo ", " + printf "%i . %i " $(((i%65534) + 1)) $((proto)) + done + ;; + "inet_service . ipv4_addr") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "%i . 172.%i.%i.%i " $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((i%256)) + done + ;; + "ipv4_addr . ether_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) + format_mac $((1000000 + i)) + done + ;; + "ipv4_addr . inet_service") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i" $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) + done + ;; + "ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr") + dcount=65536 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) + format_mac $((1000000 + i)) + printf ". %i . 192.168.%i.%i" $((RANDOM%256)) $((RANDOM%256)) $((i%256)) + done + ;; + "ipv4_addr . inet_service . inet_proto") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i . %i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) + done + ;; + "ipv4_addr . inet_service . inet_proto . ipv4_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((RANDOM%256)) + done + ;; + "ipv4_addr . inet_service . ipv4_addr") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) + done + ;; + "ipv6_addr . ether_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . " + format_mac $((1000000 + i)) + done + ;; + "ipv6_addr . inet_service") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . $(((RANDOM%65534) + 1))" + done + ;; + "ipv6_addr . inet_service . ether_addr") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . $(((RANDOM%65534) + 1)) . " + format_mac $((i + 1000000)) + done + ;; + "ipv6_addr . inet_service . ether_addr . inet_proto") + dcount=65536 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . $(((RANDOM%65534) + 1)) . " + format_mac $((i + 1000000)) + echo -n " . $((RANDOM%256))" + done + ;; + "ipv6_addr . inet_service . ipv6_addr . inet_service") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . $(((RANDOM%65534) + 1)) . " + format_addr6 $((i + 2123456)) + echo -n " . $((RANDOM%256))" + done + ;; + *) + "Unhandled $type_spec" + return 1 + esac + echo -n "}" + + ) | nft -f - || return 1 + + test_correctness_main +} + # Concurrency test template: # - add all the elements # - start a thread for each physical thread that: @@ -1396,14 +1677,17 @@ test_timeout() { range_size=1 for i in $(seq "$start" $((start + count))); do + local elem="" + end=$((start + range_size)) srcstart=$((start + src_delta)) srcend=$((end + src_delta)) - add "$(format)" || return 1 + elem="$(format)" + add "$elem" || return 1 for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do - send_match "${j}" $((j + src_delta)) || return 1 + send_match "$elem" "${j}" $((j + src_delta)) || return 1 done range_size=$((range_size + 1)) @@ -1561,7 +1845,7 @@ test_bug_reload() { srcend=$((end + src_delta)) for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do - send_match "${j}" $((j + src_delta)) || return 1 + send_match "$(format)" "${j}" $((j + src_delta)) || return 1 done range_size=$((range_size + 1)) @@ -1580,22 +1864,34 @@ test_bug_net_port_proto_match() { range_size=1 for i in $(seq 1 10); do for j in $(seq 1 20) ; do - elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))") + local dport=$j + + elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))") + + # too slow, do not test all addresses + maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "before add" || return 1 nft "add element inet filter test { $elem }" || return 1 + + maybe_send_match "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "after add" || return 1 + nft "get element inet filter test { $elem }" | grep -q "$elem" if [ $? -ne 0 ];then local got=$(nft "get element inet filter test { $elem }") err "post-add: should have returned $elem but got $got" return 1 fi + + maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "out-of-range" || return 1 done done # recheck after set was filled for i in $(seq 1 10); do for j in $(seq 1 20) ; do - elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))") + local dport=$j + + elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))") nft "get element inet filter test { $elem }" | grep -q "$elem" if [ $? -ne 0 ];then @@ -1603,6 +1899,9 @@ test_bug_net_port_proto_match() { err "post-fill: should have returned $elem but got $got" return 1 fi + + maybe_send_match "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "recheck" || return 1 + maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "recheck out-of-range" || return 1 done done @@ -1610,9 +1909,10 @@ test_bug_net_port_proto_match() { for i in $(seq 1 10); do for j in $(seq 1 20) ; do local rnd=$((RANDOM%10)) + local dport=$j local got="" - elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))") + elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))") if [ $rnd -gt 0 ];then continue fi @@ -1623,12 +1923,148 @@ test_bug_net_port_proto_match() { err "post-delete: query for $elem returned $got instead of error." return 1 fi + + maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "match after deletion" || return 1 done done nft flush ruleset } +test_bug_avx2_mismatch() +{ + setup veth send_"${proto}" set || return ${ksft_skip} + + local a1="fe80:dead:01ff:0a02:0b03:6007:8009:a001" + local a2="fe80:dead:01fe:0a02:0b03:6007:8009:a001" + + nft "add element inet filter test { icmpv6 . $a1 }" + + dst_addr6="$a2" + send_icmp6 + + if [ "$(count_packets "{ icmpv6 . $a1 }")" -gt "0" ]; then + err "False match for $a2" + return 1 + fi +} + +test_bug_doublecreate() +{ + local elements="1.2.3.4 . 1.2.4.1, 1.2.4.1 . 1.2.3.4" + local ret=1 + local i + + setup veth send_"${proto}" set || return ${ksft_skip} + + add "{ $elements }" || return 1 + # expected to work: 'add' on existing should be no-op. + add "{ $elements }" || return 1 + + # 'create' should return an error. + if nft create element inet filter test "{ $elements }" 2>/dev/null; then + err "Could create an existing element" + return 1 + fi +nft -f - <<EOF 2>/dev/null +flush set inet filter test +create element inet filter test { $elements } +create element inet filter test { $elements } +EOF + ret=$? + if [ $ret -eq 0 ]; then + err "Could create element twice in one transaction" + err "$(nft -a list ruleset)" + return 1 + fi + +nft -f - <<EOF 2>/dev/null +flush set inet filter test +create element inet filter test { $elements } +EOF + ret=$? + if [ $ret -ne 0 ]; then + err "Could not flush and re-create element in one transaction" + return 1 + fi + + return 0 +} + +add_fail() +{ + if nft add element inet filter test "$1" 2>/dev/null ; then + err "Returned success for add ${1} given set:" + err "$(nft -a list set inet filter test )" + return 1 + fi + + return 0 +} + +test_bug_insert_overlap() +{ + local elements="1.2.3.4 . 1.2.4.1" + + setup veth send_"${proto}" set || return ${ksft_skip} + + add "{ $elements }" || return 1 + + elements="1.2.3.0-1.2.3.4 . 1.2.4.1" + add_fail "{ $elements }" || return 1 + + elements="1.2.3.0-1.2.3.4 . 1.2.4.2" + add "{ $elements }" || return 1 + + elements="1.2.3.4 . 1.2.4.1-1.2.4.2" + add_fail "{ $elements }" || return 1 + + return 0 +} + +test_bug_load_flush_load4() +{ + local i + + setup veth send_"${proto}" set || return ${ksft_skip} + + for i in $(seq 0 255); do + local addelem="add element inet filter test" + local j + + for j in $(seq 0 20); do + echo "$addelem { 10.$j.0.$i . 10.$j.1.$i }" + echo "$addelem { 10.$j.0.$i . 10.$j.2.$i }" + done + done > "$tmp" + + nft -f "$tmp" || return 1 + + ( echo "flush set inet filter test";cat "$tmp") | nft -f - + [ $? -eq 0 ] || return 1 + + return 0 +} + +test_bug_load_flush_load8() +{ + local i + + setup veth send_"${proto}" set || return ${ksft_skip} + + for i in $(seq 1 100); do + echo "add element inet filter test { 10.0.0.$i . 10.0.1.$i }" + echo "add element inet filter test { 10.0.0.$i . 10.0.2.$i }" + done > "$tmp" + + nft -f "$tmp" || return 1 + + ( echo "flush set inet filter test";cat "$tmp") | nft -f - + [ $? -eq 0 ] || return 1 + + return 0 +} + test_reported_issues() { eval test_bug_"${subtest}" } diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh index ce1451c275fd..04544905c216 100755 --- a/tools/testing/selftests/net/netfilter/nft_fib.sh +++ b/tools/testing/selftests/net/netfilter/nft_fib.sh @@ -3,6 +3,10 @@ # This tests the fib expression. # # Kselftest framework requirement - SKIP code is 4. +# +# 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99 +# dead:1::99 dead:1::1 dead:2::1 dead:2::99 +# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2 source lib.sh @@ -45,6 +49,19 @@ table inet filter { EOF } +load_input_ruleset() { + local netns=$1 + +ip netns exec "$netns" nft -f /dev/stdin <<EOF +table inet filter { + chain input { + type filter hook input priority 0; policy accept; + fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop + } +} +EOF +} + load_pbr_ruleset() { local netns=$1 @@ -59,6 +76,89 @@ table inet filter { EOF } +load_type_ruleset() { + local netns=$1 + + for family in ip ip6;do +ip netns exec "$netns" nft -f /dev/stdin <<EOF +table $family filter { + chain type_match_in { + fib daddr type local counter comment "daddr configured on other iface" + fib daddr . iif type local counter comment "daddr configured on iif" + fib daddr type unicast counter comment "daddr not local" + fib daddr . iif type unicast counter comment "daddr not configured on iif" + } + + chain type_match_out { + fib daddr type unicast counter + fib daddr . oif type unicast counter + fib daddr type local counter + fib daddr . oif type local counter + } + + chain prerouting { + type filter hook prerouting priority 0; + icmp type echo-request counter jump type_match_in + icmpv6 type echo-request counter jump type_match_in + } + + chain input { + type filter hook input priority 0; + icmp type echo-request counter jump type_match_in + icmpv6 type echo-request counter jump type_match_in + } + + chain forward { + type filter hook forward priority 0; + icmp type echo-request counter jump type_match_in + icmpv6 type echo-request counter jump type_match_in + } + + chain output { + type filter hook output priority 0; + icmp type echo-request counter jump type_match_out + icmpv6 type echo-request counter jump type_match_out + } + + chain postrouting { + type filter hook postrouting priority 0; + icmp type echo-request counter jump type_match_out + icmpv6 type echo-request counter jump type_match_out + } +} +EOF +done +} + +reload_type_ruleset() { + ip netns exec "$1" nft flush table ip filter + ip netns exec "$1" nft flush table ip6 filter + load_type_ruleset "$1" +} + +check_fib_type_counter_family() { + local family="$1" + local want="$2" + local ns="$3" + local chain="$4" + local what="$5" + local errmsg="$6" + + if ! ip netns exec "$ns" nft list chain "$family" filter "$chain" | grep "$what" | grep -q "packets $want";then + echo "Netns $ns $family fib type counter doesn't match expected packet count of $want for $what $errmsg" 1>&2 + ip netns exec "$ns" nft list chain "$family" filter "$chain" + ret=1 + return 1 + fi + + return 0 +} + +check_fib_type_counter() { + check_fib_type_counter_family "ip" "$@" || return 1 + check_fib_type_counter_family "ip6" "$@" || return 1 +} + load_ruleset_count() { local netns=$1 @@ -77,6 +177,7 @@ check_drops() { if dmesg | grep -q ' nft_rpfilter: ';then dmesg | grep ' nft_rpfilter: ' echo "FAIL: rpfilter did drop packets" + ret=1 return 1 fi @@ -151,19 +252,509 @@ test_ping() { return 0 } +test_ping_unreachable() { + local daddr4=$1 + local daddr6=$2 + + if ip netns exec "$ns1" ping -c 1 -W 0.1 -q "$daddr4" > /dev/null; then + echo "FAIL: ${ns1} could reach $daddr4" 1>&2 + return 1 + fi + + if ip netns exec "$ns1" ping -c 1 -W 0.1 -q "$daddr6" > /dev/null; then + echo "FAIL: ${ns1} could reach $daddr6" 1>&2 + return 1 + fi + + return 0 +} + +test_fib_type() { + local notice="$1" + local errmsg="addr-on-if" + local lret=0 + + if ! load_type_ruleset "$nsrouter";then + echo "SKIP: Could not load fib type ruleset" + [ $ret -eq 0 ] && ret=$ksft_skip + return + fi + + # makes router receive packet for addresses configured on incoming + # interface. + test_ping 10.0.1.1 dead:1::1 || return 1 + + # expectation: triggers all 'local' in prerouting/input. + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1 + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type local" "$errmsg" || lret=1 + + reload_type_ruleset "$nsrouter" + # makes router receive packet for address configured on a different (but local) + # interface. + test_ping 10.0.2.1 dead:2::1 || return 1 + + # expectation: triggers 'unicast' in prerouting/input for daddr . iif and local for 'daddr'. + errmsg="addr-on-host" + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1 + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1 + + reload_type_ruleset "$nsrouter" + test_ping 10.0.2.99 dead:2::99 || return 1 + errmsg="addr-on-otherhost" + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type unicast" "$errmsg" || lret=1 + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1 + + if [ $lret -eq 0 ];then + echo "PASS: fib expression address types match ($notice)" + else + echo "FAIL: fib expression address types match ($notice)" + ret=1 + fi +} + +test_fib_vrf_dev_add_dummy() +{ + if ! ip -net "$nsrouter" link add dummy0 type dummy ;then + echo "SKIP: VRF tests: dummy device type not supported" + return 1 + fi + + if ! ip -net "$nsrouter" link add tvrf type vrf table 9876;then + echo "SKIP: VRF tests: vrf device type not supported" + return 1 + fi + + ip -net "$nsrouter" link set dummy0 master tvrf + ip -net "$nsrouter" link set dummy0 up + ip -net "$nsrouter" link set tvrf up +} + +load_ruleset_vrf() +{ +# Due to the many different possible combinations using named counters +# or one-rule-per-expected-result is complex. +# +# Instead, add dynamic sets for the fib modes +# (fib address type, fib output interface lookup .. ), +# and then add the obtained fib results to them. +# +# The test is successful if the sets contain the expected results +# and no unexpected extra entries existed. +ip netns exec "$nsrouter" nft -f - <<EOF +flush ruleset +table inet t { + set fibif4 { + typeof meta iif . ip daddr . fib daddr oif + flags dynamic + counter + } + + set fibif4iif { + typeof meta iif . ip daddr . fib daddr . iif oif + flags dynamic + counter + } + + set fibif6 { + typeof meta iif . ip6 daddr . fib daddr oif + flags dynamic + counter + } + + set fibif6iif { + typeof meta iif . ip6 daddr . fib daddr . iif oif + flags dynamic + counter + } + + set fibtype4 { + typeof meta iif . ip daddr . fib daddr type + flags dynamic + counter + } + + set fibtype4iif { + typeof meta iif . ip daddr . fib daddr . iif type + flags dynamic + counter + } + + set fibtype6 { + typeof meta iif . ip6 daddr . fib daddr type + flags dynamic + counter + } + + set fibtype6iif { + typeof meta iif . ip6 daddr . fib daddr . iif type + flags dynamic + counter + } + + chain fib_test { + meta nfproto ipv4 jump { + add @fibif4 { meta iif . ip daddr . fib daddr oif } + add @fibif4iif { meta iif . ip daddr . fib daddr . iif oif } + add @fibtype4 { meta iif . ip daddr . fib daddr type } + add @fibtype4iif { meta iif . ip daddr . fib daddr . iif type } + + add @fibif4 { meta iif . ip saddr . fib saddr oif } + add @fibif4iif { meta iif . ip saddr . fib saddr . iif oif } + } + + meta nfproto ipv6 jump { + add @fibif6 { meta iif . ip6 daddr . fib daddr oif } + add @fibif6iif { meta iif . ip6 daddr . fib daddr . iif oif } + add @fibtype6 { meta iif . ip6 daddr . fib daddr type } + add @fibtype6iif { meta iif . ip6 daddr . fib daddr . iif type } + + add @fibif6 { meta iif . ip6 saddr . fib saddr oif } + add @fibif6iif { meta iif . ip6 saddr . fib saddr . iif oif } + } + } + + chain prerouting { + type filter hook prerouting priority 0; + icmp type echo-request counter jump fib_test + + # neighbour discovery to be ignored. + icmpv6 type echo-request counter jump fib_test + } +} +EOF + +if [ $? -ne 0 ] ;then + echo "SKIP: Could not load ruleset for fib vrf test" + [ $ret -eq 0 ] && ret=$ksft_skip + return 1 +fi +} + +check_type() +{ + local setname="$1" + local iifname="$2" + local addr="$3" + local type="$4" + local count="$5" + local lret=0 + + [ -z "$count" ] && count=1 + + if ! ip netns exec "$nsrouter" nft get element inet t "$setname" { "$iifname" . "$addr" . "$type" } |grep -q "counter packets $count";then + echo "FAIL: did not find $iifname . $addr . $type in $setname with $count packets" + ip netns exec "$nsrouter" nft list set inet t "$setname" + ret=1 + # do not fail right away, delete entry if it exists so later test that + # checks for unwanted keys don't get confused by this *expected* key. + lret=1 + fi + + # delete the entry, this allows to check if anything unexpected appeared + # at the end of the test run: all dynamic sets should be empty by then. + if ! ip netns exec "$nsrouter" nft delete element inet t "$setname" { "$iifname" . "$addr" . "$type" } ; then + echo "FAIL: can't delete $iifname . $addr . $type in $setname" + ip netns exec "$nsrouter" nft list set inet t "$setname" + ret=1 + return 1 + fi + + return $lret +} + +check_local() +{ + check_type $@ "local" 1 +} + +check_unicast() +{ + check_type $@ "unicast" 1 +} + +check_rpf() +{ + check_type $@ +} + +check_fib_vrf_sets_empty() +{ + local setname="" + local lret=0 + + # A non-empty set means that we have seen unexpected packets OR + # that a fib lookup provided unexpected results. + for setname in "fibif4" "fibif4iif" "fibif6" "fibif6iif" \ + "fibtype4" "fibtype4iif" "fibtype6" "fibtype6iif";do + if ip netns exec "$nsrouter" nft list set inet t "$setname" | grep -q elements;then + echo "FAIL: $setname not empty" + ip netns exec "$nsrouter" nft list set inet t "$setname" + ret=1 + lret=1 + fi + done + + return $lret +} + +check_fib_vrf_type() +{ + local msg="$1" + + local addr + # the incoming interface is always veth0. As its not linked to a VRF, + # the 'tvrf' device should NOT show up anywhere. + local ifname="veth0" + local lret=0 + + # local_veth0, local_veth1 + for addr in "10.0.1.1" "10.0.2.1"; do + check_local fibtype4 "$ifname" "$addr" || lret=1 + check_type fibif4 "$ifname" "$addr" "0" || lret=1 + done + for addr in "dead:1::1" "dead:2::1";do + check_local fibtype6 "$ifname" "$addr" || lret=1 + check_type fibif6 "$ifname" "$addr" "0" || lret=1 + done + + # when restricted to the incoming interface, 10.0.1.1 should + # be 'local', but 10.0.2.1 unicast. + check_local fibtype4iif "$ifname" "10.0.1.1" || lret=1 + check_unicast fibtype4iif "$ifname" "10.0.2.1" || lret=1 + + # same for the ipv6 addresses. + check_local fibtype6iif "$ifname" "dead:1::1" || lret=1 + check_unicast fibtype6iif "$ifname" "dead:2::1" || lret=1 + + # None of these addresses should find a valid route when restricting + # to the incoming interface (we ask for daddr - 10.0.1.1/2.1 are + # reachable via 'lo'. + for addr in "10.0.1.1" "10.0.2.1" "10.9.9.1" "10.9.9.2";do + check_type fibif4iif "$ifname" "$addr" "0" || lret=1 + done + + # expect default route (veth1), dummy0 is part of VRF but iif isn't. + for addr in "10.9.9.1" "10.9.9.2";do + check_unicast fibtype4 "$ifname" "$addr" || lret=1 + check_unicast fibtype4iif "$ifname" "$addr" || lret=1 + check_type fibif4 "$ifname" "$addr" "veth1" || lret=1 + done + for addr in "dead:9::1" "dead:9::2";do + check_unicast fibtype6 "$ifname" "$addr" || lret=1 + check_unicast fibtype6iif "$ifname" "$addr" || lret=1 + check_type fibif6 "$ifname" "$addr" "veth1" || lret=1 + done + + # same for the IPv6 equivalent addresses. + for addr in "dead:1::1" "dead:2::1" "dead:9::1" "dead:9::2";do + check_type fibif6iif "$ifname" "$addr" "0" || lret=1 + done + + check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1 + check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1 + check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1 + check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1 + + check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1 + check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1 + check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1 + check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1 + + check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 5 || lret=1 + check_rpf fibif4iif "$ifname" "10.0.1.99" "veth0" 5 || lret=1 + check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 5 || lret=1 + check_rpf fibif6iif "$ifname" "dead:1::99" "veth0" 5 || lret=1 + + check_fib_vrf_sets_empty || lret=1 + + if [ $lret -eq 0 ];then + echo "PASS: $msg" + else + echo "FAIL: $msg" + ret=1 + fi +} + +check_fib_veth_vrf_type() +{ + local msg="$1" + + local addr + local ifname + local setname + local lret=0 + + # as veth0 is now part of tvrf interface, packets will be seen + # twice, once with iif veth0, then with iif tvrf. + + for ifname in "veth0" "tvrf"; do + for addr in "10.0.1.1" "10.9.9.1"; do + check_local fibtype4 "$ifname" "$addr" || lret=1 + # addr local, but nft_fib doesn't return routes with RTN_LOCAL. + check_type fibif4 "$ifname" "$addr" 0 || lret=1 + check_type fibif4iif "$ifname" "$addr" 0 || lret=1 + done + + for addr in "dead:1::1" "dead:9::1"; do + check_local fibtype6 "$ifname" "$addr" || lret=1 + # same, address is local but no route is returned for lo. + check_type fibif6 "$ifname" "$addr" 0 || lret=1 + check_type fibif6iif "$ifname" "$addr" 0 || lret=1 + done + + for t in fibtype4 fibtype4iif; do + check_unicast "$t" "$ifname" 10.9.9.2 || lret=1 + done + for t in fibtype6 fibtype6iif; do + check_unicast "$t" "$ifname" dead:9::2 || lret=1 + done + + check_unicast fibtype4iif "$ifname" "10.9.9.1" || lret=1 + check_unicast fibtype6iif "$ifname" "dead:9::1" || lret=1 + + check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1 + check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1 + + check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1 + check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1 + + check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1 + check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1 + check_type fibif4 "$ifname" "10.9.9.2" "dummy0" || lret=1 + check_type fibif6 "$ifname" "dead:9::2" "dummy0" || lret=1 + + # restricted to iif -- MUST NOT provide result, its != $ifname. + check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1 + check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1 + + check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 4 || lret=1 + check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 4 || lret=1 + check_rpf fibif4iif "$ifname" "10.0.1.99" "$ifname" 4 || lret=1 + check_rpf fibif6iif "$ifname" "dead:1::99" "$ifname" 4 || lret=1 + done + + check_local fibtype4iif "veth0" "10.0.1.1" || lret=1 + check_local fibtype6iif "veth0" "dead:1::1" || lret=1 + + check_unicast fibtype4iif "tvrf" "10.0.1.1" || lret=1 + check_unicast fibtype6iif "tvrf" "dead:1::1" || lret=1 + + # 10.9.9.2 should not provide a result for iif veth, but + # should when iif is tvrf. + # This is because its reachable via dummy0 which is part of + # tvrf. iif veth0 MUST conceal the dummy0 result (i.e. return oif 0). + check_type fibif4iif "veth0" "10.9.9.2" 0 || lret=1 + check_type fibif6iif "veth0" "dead:9::2" 0 || lret=1 + + check_type fibif4iif "tvrf" "10.9.9.2" "tvrf" || lret=1 + check_type fibif6iif "tvrf" "dead:9::2" "tvrf" || lret=1 + + check_fib_vrf_sets_empty || lret=1 + + if [ $lret -eq 0 ];then + echo "PASS: $msg" + else + echo "FAIL: $msg" + ret=1 + fi +} + +# Extends nsrouter config by adding dummy0+vrf. +# +# 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99 +# dead:1::99 dead:1::1 dead:2::1 dead:2::99 +# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2 +# [dummy0] +# 10.9.9.1 +# dead:9::1 +# [tvrf] +test_fib_vrf() +{ + local cntname="" + + if ! test_fib_vrf_dev_add_dummy; then + [ $ret -eq 0 ] && ret=$ksft_skip + return + fi + + ip -net "$nsrouter" addr add "10.9.9.1"/24 dev dummy0 + ip -net "$nsrouter" addr add "dead:9::1"/64 dev dummy0 nodad + + ip -net "$nsrouter" route add default via 10.0.2.99 + ip -net "$nsrouter" route add default via dead:2::99 + + load_ruleset_vrf || return + + # no echo reply for these addresses: The dummy interface is part of tvrf, + # but veth0 (incoming interface) isn't linked to it. + test_ping_unreachable "10.9.9.1" "dead:9::1" & + test_ping_unreachable "10.9.9.2" "dead:9::2" & + + # expect replies from these. + test_ping "10.0.1.1" "dead:1::1" + test_ping "10.0.2.1" "dead:2::1" + test_ping "10.0.2.99" "dead:2::99" + + wait + + check_fib_vrf_type "fib expression address types match (iif not in vrf)" + + # second round: this time, make veth0 (rx interface) part of the vrf. + # 10.9.9.1 / dead:9::1 become reachable from ns1, while ns2 + # becomes unreachable. + ip -net "$nsrouter" link set veth0 master tvrf + ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad + + # this reload should not be needed, but in case + # there is some error (missing or unexpected entry) this will prevent them + # from leaking into round 2. + load_ruleset_vrf || return + + test_ping "10.0.1.1" "dead:1::1" + test_ping "10.9.9.1" "dead:9::1" + + # ns2 should no longer be reachable (veth1 not in vrf) + test_ping_unreachable "10.0.2.99" "dead:2::99" & + + # vrf via dummy0, but host doesn't exist + test_ping_unreachable "10.9.9.2" "dead:9::2" & + + wait + + check_fib_veth_vrf_type "fib expression address types match (iif in vrf)" +} + ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null -ip netns exec "$nsrouter" sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null -ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null test_ping 10.0.2.1 dead:2::1 || exit 1 -check_drops || exit 1 +check_drops test_ping 10.0.2.99 dead:2::99 || exit 1 +check_drops + +[ $ret -eq 0 ] && echo "PASS: fib expression did not cause unwanted packet drops" + +load_input_ruleset "$ns1" + +test_ping 127.0.0.1 ::1 +check_drops + +test_ping 10.0.1.99 dead:1::99 +check_drops + +[ $ret -eq 0 ] && echo "PASS: fib expression did not discard loopback packets" + +load_input_ruleset "$ns1" + +test_ping 127.0.0.1 ::1 || exit 1 check_drops || exit 1 -echo "PASS: fib expression did not cause unwanted packet drops" +test_ping 10.0.1.99 dead:1::99 || exit 1 +check_drops || exit 1 + +echo "PASS: fib expression did not discard loopback packets" ip netns exec "$nsrouter" nft flush table inet filter @@ -213,7 +804,7 @@ ip -net "$nsrouter" addr del dead:2::1/64 dev veth0 # ... pbr ruleset for the router, check iif+oif. if ! load_pbr_ruleset "$nsrouter";then echo "SKIP: Could not load fib forward ruleset" - exit $ksft_skip + [ "$ret" -eq 0 ] && ret=$ksft_skip fi ip -net "$nsrouter" rule add from all table 128 @@ -224,11 +815,36 @@ ip -net "$nsrouter" route add table 129 to 10.0.2.0/24 dev veth1 # drop main ipv4 table ip -net "$nsrouter" -4 rule delete table main -if ! test_ping 10.0.2.99 dead:2::99;then - ip -net "$nsrouter" nft list ruleset - echo "FAIL: fib mismatch in pbr setup" - exit 1 +if test_ping 10.0.2.99 dead:2::99;then + echo "PASS: fib expression forward check with policy based routing" +else + echo "FAIL: fib expression forward check with policy based routing" + ret=1 fi -echo "PASS: fib expression forward check with policy based routing" -exit 0 +test_fib_type "policy routing" +ip netns exec "$nsrouter" nft delete table ip filter +ip netns exec "$nsrouter" nft delete table ip6 filter + +# Un-do policy routing changes +ip -net "$nsrouter" rule del from all table 128 +ip -net "$nsrouter" rule del from all iif veth0 table 129 + +ip -net "$nsrouter" route del table 128 to 10.0.1.0/24 dev veth0 +ip -net "$nsrouter" route del table 129 to 10.0.2.0/24 dev veth1 + +ip -net "$ns1" -4 route del default +ip -net "$ns1" -6 route del default + +ip -net "$ns1" -4 route add default via 10.0.1.1 +ip -net "$ns1" -6 route add default via dead:1::1 + +ip -net "$nsrouter" -4 rule add from all table main priority 32766 + +test_fib_type "default table" +ip netns exec "$nsrouter" nft delete table ip filter +ip netns exec "$nsrouter" nft delete table ip6 filter + +test_fib_vrf + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh index a4ee5496f2a1..7a34ef468975 100755 --- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh @@ -20,6 +20,7 @@ ret=0 SOCAT_TIMEOUT=60 nsin="" +nsin_small="" ns1out="" ns2out="" @@ -36,7 +37,7 @@ cleanup() { cleanup_all_ns - rm -f "$nsin" "$ns1out" "$ns2out" + rm -f "$nsin" "$nsin_small" "$ns1out" "$ns2out" [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns" } @@ -72,6 +73,7 @@ lmtu=1500 rmtu=2000 filesize=$((2 * 1024 * 1024)) +filesize_small=$((filesize / 16)) usage(){ echo "nft_flowtable.sh [OPTIONS]" @@ -89,7 +91,10 @@ do o) omtu=$OPTARG;; l) lmtu=$OPTARG;; r) rmtu=$OPTARG;; - s) filesize=$OPTARG;; + s) + filesize=$OPTARG + filesize_small=$((OPTARG / 16)) + ;; *) usage;; esac done @@ -122,6 +127,8 @@ ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0 ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad +ip netns exec "$nsr1" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec "$nsr2" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null for i in 0 1; do ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null @@ -148,7 +155,9 @@ ip -net "$ns1" route add default via dead:1::1 ip -net "$ns2" route add default via dead:2::1 ip -net "$nsr1" route add default via 192.168.10.2 +ip -6 -net "$nsr1" route add default via fee1:2::2 ip -net "$nsr2" route add default via 192.168.10.1 +ip -6 -net "$nsr2" route add default via fee1:2::1 ip netns exec "$nsr1" nft -f - <<EOF table inet filter { @@ -215,6 +224,7 @@ if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then fi nsin=$(mktemp) +nsin_small=$(mktemp) ns1out=$(mktemp) ns2out=$(mktemp) @@ -265,6 +275,7 @@ check_counters() check_dscp() { local what=$1 + local pmtud="$2" local ok=1 local counter @@ -277,37 +288,39 @@ check_dscp() local pc4z=${counter%*bytes*} local pc4z=${pc4z#*packets} + local failmsg="FAIL: pmtu $pmtu: $what counters do not match, expected" + case "$what" in "dscp_none") if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_fwd") if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_ingress") if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_egress") if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; *) - echo "FAIL: Unknown DSCP check" 1>&2 + echo "$failmsg: Unknown DSCP check" 1>&2 ret=1 ok=0 esac @@ -319,9 +332,9 @@ check_dscp() check_transfer() { - in=$1 - out=$2 - what=$3 + local in=$1 + local out=$2 + local what=$3 if ! cmp "$in" "$out" > /dev/null 2>&1; then echo "FAIL: file mismatch for $what" 1>&2 @@ -342,25 +355,42 @@ test_tcp_forwarding_ip() { local nsa=$1 local nsb=$2 - local dstip=$3 - local dstport=$4 + local pmtu=$3 + local proto=$4 + local dstip=$5 + local dstport=$6 local lret=0 + local socatc + local socatl + local infile="$nsin" + + if [ $pmtu -eq 0 ]; then + infile="$nsin_small" + fi - timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" & + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -${proto} \ + TCP"${proto}"-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" & lpid=$! busywait 1000 listener_ready - timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out" + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -${proto} \ + TCP"${proto}":"$dstip":"$dstport" STDIO < "$infile" > "$ns1out" + socatc=$? wait $lpid + socatl=$? - if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then + if [ $socatl -ne 0 ] || [ $socatc -ne 0 ];then + rc=1 + fi + + if ! check_transfer "$infile" "$ns2out" "ns1 -> ns2"; then lret=1 ret=1 fi - if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then + if ! check_transfer "$infile" "$ns1out" "ns1 <- ns2"; then lret=1 ret=1 fi @@ -370,14 +400,22 @@ test_tcp_forwarding_ip() test_tcp_forwarding() { - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + local pmtu="$3" + local proto="$4" + local dstip="$5" + local dstport="$6" + + test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport" return $? } test_tcp_forwarding_set_dscp() { - check_dscp "dscp_none" + local pmtu="$3" + local proto="$4" + local dstip="$5" + local dstport="$6" ip netns exec "$nsr1" nft -f - <<EOF table netdev dscpmangle { @@ -388,8 +426,8 @@ table netdev dscpmangle { } EOF if [ $? -eq 0 ]; then - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 - check_dscp "dscp_ingress" + test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport" + check_dscp "dscp_ingress" "$pmtu" ip netns exec "$nsr1" nft delete table netdev dscpmangle else @@ -405,10 +443,10 @@ table netdev dscpmangle { } EOF if [ $? -eq 0 ]; then - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 - check_dscp "dscp_egress" + test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport" + check_dscp "dscp_egress" "$pmtu" - ip netns exec "$nsr1" nft flush table netdev dscpmangle + ip netns exec "$nsr1" nft delete table netdev dscpmangle else echo "SKIP: Could not load netdev:egress for veth1" fi @@ -416,48 +454,53 @@ fi # partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3 # counters should have seen packets (before and after ft offload kicks in). ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3 - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 - check_dscp "dscp_fwd" + test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport" + check_dscp "dscp_fwd" "$pmtu" } test_tcp_forwarding_nat() { + local nsa="$1" + local nsb="$2" + local pmtu="$3" + local what="$4" local lret - local pmtu - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 - lret=$? + [ "$pmtu" -eq 0 ] && what="$what (pmtu disabled)" - pmtu=$3 - what=$4 + test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 4 10.0.2.99 12345 + lret=$? if [ "$lret" -eq 0 ] ; then if [ "$pmtu" -eq 1 ] ;then - check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what" + check_counters "flow offload for ns1/ns2 with masquerade $what" else echo "PASS: flow offload for ns1/ns2 with masquerade $what" fi - test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666 + test_tcp_forwarding_ip "$1" "$2" "$pmtu" 4 10.6.6.6 1666 lret=$? if [ "$pmtu" -eq 1 ] ;then - check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what" + check_counters "flow offload for ns1/ns2 with dnat $what" elif [ "$lret" -eq 0 ] ; then echo "PASS: flow offload for ns1/ns2 with dnat $what" fi + else + echo "FAIL: flow offload for ns1/ns2 with dnat $what" fi return $lret } make_file "$nsin" "$filesize" +make_file "$nsin_small" "$filesize_small" # First test: # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. # Due to MTU mismatch in both directions, all packets (except small packets like pure # acks) have to be handled by normal forwarding path. Therefore, packet counters # are not checked. -if test_tcp_forwarding "$ns1" "$ns2"; then +if test_tcp_forwarding "$ns1" "$ns2" 0 4 10.0.2.99 12345; then echo "PASS: flow offloaded for ns1/ns2" else echo "FAIL: flow offload for ns1/ns2:" 1>&2 @@ -465,6 +508,14 @@ else ret=1 fi +if test_tcp_forwarding "$ns1" "$ns2" 0 6 "[dead:2::99]" 12345; then + echo "PASS: IPv6 flow offloaded for ns1/ns2" +else + echo "FAIL: IPv6 flow offload for ns1/ns2:" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + # delete default route, i.e. ns2 won't be able to reach ns1 and # will depend on ns1 being masqueraded in nsr1. # expect ns1 has nsr1 address. @@ -489,8 +540,9 @@ table ip nat { } EOF -if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then - echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2 +check_dscp "dscp_none" "0" +if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 4 10.0.2.99 12345; then + echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2 exit 0 fi @@ -513,12 +565,138 @@ ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null # For earlier tests (large mtus), packets cannot be handled via flowtable # (except pure acks and other small packets). ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null +ip netns exec "$ns2" nft reset counters table inet filter >/dev/null + +if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 4 10.0.2.99 12345; then + echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2 + exit 0 +fi + +ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 ip netns exec "$nsr1" nft list ruleset fi +# IPIP tunnel test: +# Add IPIP tunnel interfaces and check flowtable acceleration. +test_ipip() { +if ! ip -net "$nsr1" link add name tun0 type ipip \ + local 192.168.10.1 remote 192.168.10.2 >/dev/null;then + echo "SKIP: could not add ipip tunnel" + [ "$ret" -eq 0 ] && ret=$ksft_skip + return +fi +ip -net "$nsr1" link set tun0 up +ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0 +ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null + +ip -net "$nsr1" link add name tun6 type ip6tnl local fee1:2::1 remote fee1:2::2 +ip -net "$nsr1" link set tun6 up +ip -net "$nsr1" addr add fee1:3::1/64 dev tun6 nodad + +ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1 +ip -net "$nsr2" link set tun0 up +ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0 +ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null + +ip -net "$nsr2" link add name tun6 type ip6tnl local fee1:2::2 remote fee1:2::1 || ret=1 +ip -net "$nsr2" link set tun6 up +ip -net "$nsr2" addr add fee1:3::2/64 dev tun6 nodad + +ip -net "$nsr1" route change default via 192.168.100.2 +ip -net "$nsr2" route change default via 192.168.100.1 + +# do not use "route change" and delete old default so +# socat fails to connect in case new default can't be added. +ip -6 -net "$nsr1" route delete default +ip -6 -net "$nsr1" route add default via fee1:3::2 +ip -6 -net "$nsr2" route delete default +ip -6 -net "$nsr2" route add default via fee1:3::1 +ip -net "$ns2" route add default via 10.0.2.1 +ip -6 -net "$ns2" route add default via dead:2::1 + +ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept' +ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6 accept' +ip netns exec "$nsr1" nft -a insert rule inet filter forward \ + 'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept' + +if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then + echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + +if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then + echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel" +else + echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + +# Create vlan tagged devices for IPIP traffic. +ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10 +ip -net "$nsr1" link set veth1.10 up +ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10 +ip -net "$nsr1" addr add fee1:4::1/64 dev veth1.10 nodad +ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null +ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept' + +ip -net "$nsr1" link add name tun0.10 type ipip local 192.168.20.1 remote 192.168.20.2 +ip -net "$nsr1" link set tun0.10 up +ip -net "$nsr1" addr add 192.168.200.1/24 dev tun0.10 +ip -net "$nsr1" route change default via 192.168.200.2 +ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null +ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0.10 accept' + +ip -net "$nsr1" link add name tun6.10 type ip6tnl local fee1:4::1 remote fee1:4::2 +ip -net "$nsr1" link set tun6.10 up +ip -net "$nsr1" addr add fee1:5::1/64 dev tun6.10 nodad +ip -6 -net "$nsr1" route delete default +ip -6 -net "$nsr1" route add default via fee1:5::2 +ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6.10 accept' + +ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10 +ip -net "$nsr2" link set veth0.10 up +ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10 +ip -net "$nsr2" addr add fee1:4::2/64 dev veth0.10 nodad +ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null + +ip -net "$nsr2" link add name tun0.10 type ipip local 192.168.20.2 remote 192.168.20.1 +ip -net "$nsr2" link set tun0.10 up +ip -net "$nsr2" addr add 192.168.200.2/24 dev tun0.10 +ip -net "$nsr2" route change default via 192.168.200.1 +ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null + +ip -net "$nsr2" link add name tun6.10 type ip6tnl local fee1:4::2 remote fee1:4::1 || ret=1 +ip -net "$nsr2" link set tun6.10 up +ip -net "$nsr2" addr add fee1:5::2/64 dev tun6.10 nodad +ip -6 -net "$nsr2" route delete default +ip -6 -net "$nsr2" route add default via fee1:5::1 + +if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then + echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + +if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then + echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel over vlan" +else + echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel over vlan" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + +# Restore the previous configuration +ip -net "$nsr1" route change default via 192.168.10.2 +ip -net "$nsr2" route change default via 192.168.10.1 +ip -net "$ns2" route del default via 10.0.2.1 +ip -6 -net "$ns2" route del default via dead:2::1 +} + # Another test: # Add bridge interface br0 to Router1, with NAT enabled. test_bridge() { @@ -604,6 +782,8 @@ ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad ip -net "$nsr1" link set up dev veth0 } +test_ipip + test_bridge KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1) @@ -644,7 +824,7 @@ ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1 ip -net "$ns2" route add default via 10.0.2.1 ip -net "$ns2" route add default via dead:2::1 -if test_tcp_forwarding "$ns1" "$ns2"; then +if test_tcp_forwarding "$ns1" "$ns2" 1 4 10.0.2.99 12345; then check_counters "ipsec tunnel mode for ns1/ns2" else echo "FAIL: ipsec tunnel mode for ns1/ns2" @@ -652,6 +832,14 @@ else ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2 fi +if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then + check_counters "IPv6 ipsec tunnel mode for ns1/ns2" +else + echo "FAIL: IPv6 ipsec tunnel mode for ns1/ns2" + ip netns exec "$nsr1" nft list ruleset 1>&2 + ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2 +fi + if [ "$1" = "" ]; then low=1280 mtu=$((65536 - low)) @@ -668,7 +856,7 @@ if [ "$1" = "" ]; then fi echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize" - $0 -o "$o" -l "$l" -r "$r" -s "$filesize" + $0 -o "$o" -l "$l" -r "$r" -s "$filesize" || ret=1 fi exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh new file mode 100755 index 000000000000..c0fffaa6dbd9 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh @@ -0,0 +1,157 @@ +#!/bin/bash -e +# +# SPDX-License-Identifier: GPL-2.0 +# +# Torture nftables' netdevice notifier callbacks and related code by frequent +# renaming of interfaces which netdev-family chains and flowtables hook into. + +source lib.sh + +checktool "nft --version" "run test without nft tool" +checktool "iperf3 --version" "run test without iperf3 tool" + +read kernel_tainted < /proc/sys/kernel/tainted + +# how many seconds to torture the kernel? +# default to 80% of max run time but don't exceed 48s +TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10)) +[[ $TEST_RUNTIME -gt 48 ]] && TEST_RUNTIME=48 + +trap "cleanup_all_ns" EXIT + +setup_ns nsc nsr nss + +ip -net $nsc link add cr0 type veth peer name rc0 netns $nsr +ip -net $nsc addr add 10.0.0.1/24 dev cr0 +ip -net $nsc link set cr0 up +ip -net $nsc route add default via 10.0.0.2 + +ip -net $nss link add sr0 type veth peer name rs0 netns $nsr +ip -net $nss addr add 10.1.0.1/24 dev sr0 +ip -net $nss link set sr0 up +ip -net $nss route add default via 10.1.0.2 + +ip -net $nsr addr add 10.0.0.2/24 dev rc0 +ip -net $nsr link set rc0 up +ip -net $nsr addr add 10.1.0.2/24 dev rs0 +ip -net $nsr link set rs0 up +ip netns exec $nsr sysctl -q net.ipv4.ip_forward=1 +ip netns exec $nsr sysctl -q net.ipv4.conf.all.forwarding=1 + +{ + echo "table netdev t {" + for ((i = 0; i < 10; i++)); do + cat <<-EOF + chain chain_rc$i { + type filter hook ingress device rc$i priority 0 + counter + } + chain chain_rs$i { + type filter hook ingress device rs$i priority 0 + counter + } + EOF + done + echo "}" + echo "table ip t {" + for ((i = 0; i < 10; i++)); do + cat <<-EOF + flowtable ft_${i} { + hook ingress priority 0 + devices = { rc$i, rs$i } + } + EOF + done + echo "chain c {" + echo "type filter hook forward priority 0" + for ((i = 0; i < 10; i++)); do + echo -n "iifname rc$i oifname rs$i " + echo "ip protocol tcp counter flow add @ft_${i}" + done + echo "counter" + echo "}" + echo "}" +} | ip netns exec $nsr nft -f - || { + echo "SKIP: Could not load nft ruleset" + exit $ksft_skip +} + +for ((o=0, n=1; ; o=n, n++, n %= 10)); do + ip -net $nsr link set rc$o name rc$n + ip -net $nsr link set rs$o name rs$n +done & +rename_loop_pid=$! + +while true; do ip netns exec $nsr nft list ruleset >/dev/null 2>&1; done & +nft_list_pid=$! + +ip netns exec $nsr nft monitor >/dev/null & +nft_monitor_pid=$! + +ip netns exec $nss iperf3 --server --daemon -1 +summary_expr='s,^\[SUM\] .* \([0-9\.]\+\) Kbits/sec .* receiver,\1,p' +rate=$(ip netns exec $nsc iperf3 \ + --format k -c 10.1.0.1 --time $TEST_RUNTIME \ + --length 56 --parallel 10 -i 0 | sed -n "$summary_expr") + +kill $nft_list_pid +kill $nft_monitor_pid +kill $rename_loop_pid +wait + +wildcard_prep() { + ip netns exec $nsr nft -f - <<EOF +table ip t { + flowtable ft_wild { + hook ingress priority 0 + devices = { wild* } + } +} +EOF +} + +if ! wildcard_prep; then + echo "SKIP wildcard tests: not supported by host's nft?" +else + for ((i = 0; i < 100; i++)); do + ip -net $nsr link add wild$i type dummy & + done + wait + for ((i = 80; i < 100; i++)); do + ip -net $nsr link del wild$i & + done + for ((i = 0; i < 80; i++)); do + ip -net $nsr link del wild$i & + done + wait + for ((i = 0; i < 100; i += 10)); do + ( + for ((j = 0; j < 10; j++)); do + ip -net $nsr link add wild$((i + j)) type dummy + done + for ((j = 0; j < 10; j++)); do + ip -net $nsr link del wild$((i + j)) + done + ) & + done + wait +fi + + +[[ $kernel_tainted -eq 0 && $(</proc/sys/kernel/tainted) -ne 0 ]] && { + echo "FAIL: Kernel is tainted!" + exit $ksft_fail +} + +[[ $rate -gt 0 ]] || { + echo "FAIL: Zero throughput in iperf3" + exit $ksft_fail +} + +[[ -f /sys/kernel/debug/kmemleak && \ + -n $(</sys/kernel/debug/kmemleak) ]] && { + echo "FAIL: non-empty kmemleak report" + exit $ksft_fail +} + +exit $ksft_pass diff --git a/tools/testing/selftests/net/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh index 9e39de26455f..b3ec2d0a3f56 100755 --- a/tools/testing/selftests/net/netfilter/nft_nat.sh +++ b/tools/testing/selftests/net/netfilter/nft_nat.sh @@ -569,7 +569,7 @@ test_redirect6() ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then - echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6" + echo "ERROR: cannot ping $ns1 from $ns2 via ipv6" lret=1 fi @@ -859,13 +859,31 @@ EOF # from router:service bypass connection tracking. test_port_shadow_notrack "$family" - # test nat based mitigation: fowarded packets coming from service port + # test nat based mitigation: forwarded packets coming from service port # are masqueraded with random highport. test_port_shadow_pat "$family" ip netns exec "$ns0" nft delete table $family nat } +file_cmp() +{ + local infile="$1" + local outfile="$2" + + if ! cmp "$infile" "$outfile";then + echo -n "Infile " + ls -l "$infile" + echo -n "Outfile " + ls -l "$outfile" + echo "ERROR: in and output file mismatch when checking $msg" 1>&1 + ret=1 + return 1 + fi + + return 0 +} + test_stateless_nat_ip() { local lret=0 @@ -966,11 +984,7 @@ EOF wait - if ! cmp "$INFILE" "$OUTFILE";then - ls -l "$INFILE" "$OUTFILE" - echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2 - lret=1 - fi + file_cmp "$INFILE" "$OUTFILE" "udp with stateless nat" || lret=1 :> "$OUTFILE" @@ -991,6 +1005,62 @@ EOF return $lret } +test_dnat_clash() +{ + local lret=0 + + if ! socat -h > /dev/null 2>&1;then + echo "SKIP: Could not run dnat clash test without socat tool" + [ $ret -eq 0 ] && ret=$ksft_skip + return $ksft_skip + fi + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +flush ruleset +table ip dnat-test { + chain prerouting { + type nat hook prerouting priority dstnat; policy accept; + ip daddr 10.0.2.1 udp dport 1234 counter dnat to 10.0.1.1:1234 + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add dnat rules" + [ $ret -eq 0 ] && ret=$ksft_skip + return $ksft_skip + fi + + local udpdaddr="10.0.2.1" + for i in 1 2;do + echo "PING $udpdaddr" > "$INFILE" + echo "PONG 10.0.1.1 step $i" | ip netns exec "$ns0" timeout 3 socat STDIO UDP4-LISTEN:1234,bind=10.0.1.1 > "$OUTFILE" 2>/dev/null & + local lpid=$! + + busywait $BUSYWAIT_TIMEOUT listener_ready "$ns0" 1234 "-u" + + result=$(ip netns exec "$ns1" timeout 3 socat STDIO UDP4-SENDTO:"$udpdaddr:1234,sourceport=4321" < "$INFILE") + udpdaddr="10.0.1.1" + + if [ "$result" != "PONG 10.0.1.1 step $i" ] ; then + echo "ERROR: failed to test udp $ns1 to $ns2 with dnat rule step $i, result: \"$result\"" 1>&2 + lret=1 + ret=1 + fi + + wait + + file_cmp "$INFILE" "$OUTFILE" "udp dnat step $i" || lret=1 + + :> "$OUTFILE" + done + + test $lret -eq 0 && echo "PASS: IP dnat clash $ns1:$ns2" + + ip netns exec "$ns0" nft flush ruleset + + return $lret +} + # ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 for i in "$ns0" "$ns1" "$ns2" ;do ip netns exec "$i" nft -f /dev/stdin <<EOF @@ -1147,6 +1217,7 @@ $test_inet_nat && test_redirect6 inet test_port_shadowing test_stateless_nat_ip +test_dnat_clash if [ $ret -ne 0 ];then echo -n "FAIL: " diff --git a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh index 3b81d88bdde3..9f200f80253a 100755 --- a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh +++ b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh @@ -88,7 +88,6 @@ for i in $(seq 1 "$maxclients");do echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2 echo netns exec "$gw" ip link set "veth$i" up echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".arp_ignore=2 - echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".rp_filter=0 # clients have same IP addresses. echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0 @@ -178,7 +177,6 @@ fi ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null -ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null # useful for debugging: allows to use 'ping' from clients to gateway. ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index 785e3875a6da..d80390848e85 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -10,6 +10,9 @@ source lib.sh ret=0 timeout=5 +SCTP_TEST_TIMEOUT=60 +STRESS_TEST_TIMEOUT=30 + cleanup() { ip netns pids "$ns1" | xargs kill 2>/dev/null @@ -40,7 +43,7 @@ TMPFILE3=$(mktemp) TMPINPUT=$(mktemp) COUNT=200 -[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=25 +[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=$((COUNT/8)) dd conv=sparse status=none if=/dev/zero bs=1M count=$COUNT of="$TMPINPUT" if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then @@ -275,9 +278,11 @@ test_tcp_forward() busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 2 + local tthen=$(date +%s) + ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null - wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain" + wait_and_check_retval "$rpid" "tcp and nfqueue in forward chain" "$tthen" kill "$nfqpid" } @@ -288,13 +293,14 @@ test_tcp_localhost() ip netns exec "$nsrouter" ./nf_queue -q 3 & local nfqpid=$! + local tthen=$(date +%s) busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3 ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null - wait "$rpid" && echo "PASS: tcp via loopback" + wait_and_check_retval "$rpid" "tcp via loopback" "$tthen" kill "$nfqpid" } @@ -417,6 +423,23 @@ check_output_files() fi } +wait_and_check_retval() +{ + local rpid="$1" + local msg="$2" + local tthen="$3" + local tnow=$(date +%s) + + if wait "$rpid";then + echo -n "PASS: " + else + echo -n "FAIL: " + ret=1 + fi + + printf "%s (duration: %ds)\n" "$msg" $((tnow-tthen)) +} + test_sctp_forward() { ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF @@ -428,13 +451,14 @@ table inet sctpq { } } EOF - timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" & + timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" & local rpid=$! busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2" ip netns exec "$nsrouter" ./nf_queue -q 10 -G & local nfqpid=$! + local tthen=$(date +%s) ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null @@ -443,7 +467,7 @@ EOF exit 1 fi - wait "$rpid" && echo "PASS: sctp and nfqueue in forward chain" + wait_and_check_retval "$rpid" "sctp and nfqueue in forward chain" "$tthen" kill "$nfqpid" check_output_files "$TMPINPUT" "$TMPFILE1" "sctp forward" @@ -462,13 +486,14 @@ EOF # reduce test file size, software segmentation causes sk wmem increase. dd conv=sparse status=none if=/dev/zero bs=1M count=$((COUNT/2)) of="$TMPINPUT" - timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" & + timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" & local rpid=$! busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2" ip netns exec "$ns1" ./nf_queue -q 11 & local nfqpid=$! + local tthen=$(date +%s) ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null @@ -478,7 +503,7 @@ EOF fi # must wait before checking completeness of output file. - wait "$rpid" && echo "PASS: sctp and nfqueue in output chain with GSO" + wait_and_check_retval "$rpid" "sctp and nfqueue in output chain with GSO" "$tthen" kill "$nfqpid" check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output" @@ -486,7 +511,7 @@ EOF udp_listener_ready() { - ss -S -N "$1" -uln -o "sport = :12345" | grep -q 12345 + ss -S -N "$1" -uln -o "sport = :$2" | grep -q "$2" } output_files_written() @@ -494,7 +519,7 @@ output_files_written() test -s "$1" && test -s "$2" } -test_udp_ct_race() +test_udp_nat_race() { ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF flush ruleset @@ -521,8 +546,8 @@ EOF ip netns exec "$nsrouter" ./nf_queue -q 12 -d 1000 & local nfqpid=$! - busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" - busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3" + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" 12345 + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3" 12345 busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 12 # Send two packets, one should end up in ns1, other in ns2. @@ -533,7 +558,7 @@ EOF busywait 10000 output_files_written "$TMPFILE1" "$TMPFILE2" - kill "$nfqpid" + kill "$nfqpid" "$rpid1" "$rpid2" if ! ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12345 2>/dev/null | wc -l | grep -q "^1"'; then echo "FAIL: Expected One udp conntrack entry" @@ -561,6 +586,208 @@ EOF echo "PASS: both udp receivers got one packet each" } +# Make sure UDPGRO aggregated packets don't lose +# their skb->nfct entry when nfqueue passes the +# skb to userspace with software gso segmentation on. +test_udp_gro_ct() +{ + local errprefix="FAIL: test_udp_gro_ct:" + local timeout=5 + + ip netns exec "$nsrouter" conntrack -F 2>/dev/null + + ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table inet udpq { + # Number of packets/bytes queued to userspace + counter toqueue { } + # Number of packets/bytes reinjected from userspace with 'ct new' intact + counter fromqueue { } + # These two counters should be identical and not 0. + + chain prerouting { + type filter hook prerouting priority -300; policy accept; + + # userspace sends small packets, if < 1000, UDPGRO did + # not kick in, but test needs a 'new' conntrack with udpgro skb. + meta iifname veth0 meta l4proto udp meta length > 1000 accept + + # don't pick up non-gso packets and don't queue them to + # userspace. + notrack + } + + chain postrouting { + type filter hook postrouting priority 0; policy accept; + + # Only queue unconfirmed fraglist gro skbs to userspace. + udp dport 12346 ct status ! confirmed counter name "toqueue" mark set 1 queue num 1 + } + + chain validate { + type filter hook postrouting priority 1; policy accept; + # ... and only count those that were reinjected with the + # skb->nfct intact. + mark 1 counter name "fromqueue" + } +} +EOF + timeout "$timeout" ip netns exec "$ns2" socat UDP-LISTEN:12346,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc & + local rpid=$! + + ip netns exec "$nsrouter" nice -n -19 ./nf_queue -G -c -q 1 -o -t 2 > "$TMPFILE2" & + local nfqpid=$! + + ip netns exec "$nsrouter" ethtool -K "veth0" rx-udp-gro-forwarding on rx-gro-list on generic-receive-offload on + + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" 12346 + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 1 + + local bs=512 + local count=$(((32 * 1024 * 1024) / bs)) + + local nprocs=$(nproc) + [ $nprocs -gt 1 ] && nprocs=$((nprocs - 1)) + + dd if=/dev/zero bs="$bs" count="$count" 2>/dev/null | for i in $(seq 1 $nprocs); do + timeout "$timeout" nice -n 19 ip netns exec "$ns1" \ + socat -u -b 512 STDIN UDP-DATAGRAM:10.0.2.99:12346,reuseport,bind=0.0.0.0:55221 & + done + + busywait 10000 test -s "$TMPFILE1" + + kill "$rpid" + + wait + + local p + local b + local pqueued + local bqueued + + c=$(ip netns exec "$nsrouter" nft list counter inet udpq "toqueue" | grep packets) + read p pqueued b bqueued <<EOF +$c +EOF + local preinject + local breinject + c=$(ip netns exec "$nsrouter" nft list counter inet udpq "fromqueue" | grep packets) + read p preinject b breinject <<EOF +$c +EOF + ip netns exec "$nsrouter" ethtool -K "veth0" rx-udp-gro-forwarding off + ip netns exec "$nsrouter" ethtool -K "veth1" rx-udp-gro-forwarding off + + if [ "$pqueued" -eq 0 ];then + # happens when gro did not build at least on aggregate + echo "SKIP: No packets were queued" + return + fi + + local saw_ct_entry=0 + if ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12346 2>/dev/null | wc -l | grep -q "^1"'; then + saw_ct_entry=1 + else + echo "$errprefix Expected udp conntrack entry" + ip netns exec "$nsrouter" conntrack -L + ret=1 + fi + + if [ "$pqueued" -ge "$preinject" ] ;then + echo "$errprefix Expected software segmentation to occur, had $pqueued and $preinject" + ret=1 + return + fi + + # sw segmentation adds extra udp and ip headers. + local breinject_expect=$((preinject * (512 + 20 + 8))) + + if [ "$breinject" -eq "$breinject_expect" ]; then + if [ "$saw_ct_entry" -eq 1 ];then + echo "PASS: fraglist gro skb passed with conntrack entry" + else + echo "$errprefix fraglist gro skb passed without conntrack entry" + ret=1 + fi + else + echo "$errprefix Counter mismatch, conntrack entry dropped by nfqueue? Queued: $pqueued, $bqueued. Post-queue: $preinject, $breinject. Expected $breinject_expect" + ret=1 + fi + + if ! ip netns exec "$nsrouter" nft delete table inet udpq; then + echo "$errprefix: Could not delete udpq table" + ret=1 + fi +} + +check_tainted() +{ + local msg="$1" + + if [ "$tainted_then" -ne 0 ];then + return + fi + + read tainted_now < /proc/sys/kernel/tainted + if [ "$tainted_now" -eq 0 ];then + echo "PASS: $msg" + else + echo "TAINT: $msg" + dmesg + ret=1 + fi +} + +test_queue_stress() +{ + read tainted_then < /proc/sys/kernel/tainted + local i + + ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table inet t { + chain forward { + type filter hook forward priority 0; policy accept; + + queue flags bypass to numgen random mod 8 + } +} +EOF + timeout "$STRESS_TEST_TIMEOUT" ip netns exec "$ns2" \ + socat -u UDP-LISTEN:12345,fork,pf=ipv4 STDOUT > /dev/null & + + timeout "$STRESS_TEST_TIMEOUT" ip netns exec "$ns3" \ + socat -u UDP-LISTEN:12345,fork,pf=ipv4 STDOUT > /dev/null & + + for i in $(seq 0 7); do + ip netns exec "$nsrouter" timeout "$STRESS_TEST_TIMEOUT" \ + ./nf_queue -q $i -t 2 -O -b > /dev/null & + done + + ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \ + ping -q -f 10.0.2.99 > /dev/null 2>&1 & + ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \ + ping -q -f 10.0.3.99 > /dev/null 2>&1 & + ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \ + ping -q -f "dead:2::99" > /dev/null 2>&1 & + ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \ + ping -q -f "dead:3::99" > /dev/null 2>&1 & + + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" 12345 + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3" 12345 + + for i in $(seq 1 4);do + ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \ + socat -u STDIN UDP-DATAGRAM:10.0.2.99:12345 < /dev/zero > /dev/null & + ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \ + socat -u STDIN UDP-DATAGRAM:10.0.3.99:12345 < /dev/zero > /dev/null & + done + + wait + + check_tainted "concurrent queueing" +} + test_queue_removal() { read tainted_then < /proc/sys/kernel/tainted @@ -584,17 +811,7 @@ EOF ip netns exec "$ns1" nft flush ruleset - if [ "$tainted_then" -ne 0 ];then - return - fi - - read tainted_now < /proc/sys/kernel/tainted - if [ "$tainted_now" -eq 0 ];then - echo "PASS: queue program exiting while packets queued" - else - echo "TAINT: queue program exiting while packets queued" - ret=1 - fi + check_tainted "queue program exiting while packets queued" } ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null @@ -638,7 +855,9 @@ test_tcp_localhost_connectclose test_tcp_localhost_requeue test_sctp_forward test_sctp_output -test_udp_ct_race +test_udp_nat_race +test_udp_gro_ct +test_queue_stress # should be last, adds vrf device in ns1 and changes routes test_icmp_vrf diff --git a/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh b/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh index d16de13fe5a7..1dc7b0450145 100755 --- a/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh +++ b/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh @@ -190,13 +190,13 @@ table inet filter { } EOF - timeout "$timeout" ip netns exec "$nsrouter" socat -u "$socat_ipproto" udp-listen:12345,fork,ip-transparent,reuseport udp:"$ns1_ip_port",ip-transparent,reuseport,bind="$ns2_ip_port" 2>/dev/null & + timeout "$timeout" ip netns exec "$nsrouter" socat -u "$socat_ipproto" udp-listen:12345,fork,ip-transparent,reuseport,shut-none udp:"$ns1_ip_port",ip-transparent,reuseport,bind="$ns2_ip_port",shut-none 2>/dev/null & local tproxy_pid=$! - timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" udp-listen:8080,fork SYSTEM:"echo PONG_NS2" 2>/dev/null & + timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" udp-listen:8080,fork,shut-none SYSTEM:"echo PONG_NS2" 2>/dev/null & local server2_pid=$! - timeout "$timeout" ip netns exec "$ns3" socat "$socat_ipproto" udp-listen:8080,fork SYSTEM:"echo PONG_NS3" 2>/dev/null & + timeout "$timeout" ip netns exec "$ns3" socat "$socat_ipproto" udp-listen:8080,fork,shut-none SYSTEM:"echo PONG_NS3" 2>/dev/null & local server3_pid=$! busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" 12345 "-u" @@ -205,7 +205,7 @@ EOF local result # request from ns1 to ns2 (forwarded traffic) - result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port",sourceport=18888) + result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port",sourceport=18888,shut-none) if [ "$result" == "$expect_ns1_ns2" ] ;then echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2" else @@ -214,7 +214,7 @@ EOF fi # request from ns1 to ns3 (forwarded traffic) - result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port") + result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port",shut-none) if [ "$result" = "$expect_ns1_ns3" ] ;then echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3" else @@ -223,7 +223,7 @@ EOF fi # request from nsrouter to ns2 (localy originated traffic) - result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port") + result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port",shut-none) if [ "$result" == "$expect_nsrouter_ns2" ] ;then echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2" else @@ -232,7 +232,7 @@ EOF fi # request from nsrouter to ns3 (localy originated traffic) - result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port") + result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port",shut-none) if [ "$result" = "$expect_nsrouter_ns3" ] ;then echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3" else diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt index 3442cd29bc93..cdb3910af95b 100644 --- a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt @@ -26,7 +26,7 @@ +0.01 > R 643160523:643160523(0) win 0 -+0.01 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT` ++0.1 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT` // Must go through. +0.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8> diff --git a/tools/testing/selftests/net/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh index 86ec4e68594d..24ad41d526d9 100755 --- a/tools/testing/selftests/net/netfilter/rpath.sh +++ b/tools/testing/selftests/net/netfilter/rpath.sh @@ -1,8 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# return code to signal skipped test -ksft_skip=4 +source lib.sh # search for legacy iptables (it uses the xtables extensions if iptables-legacy --version >/dev/null 2>&1; then @@ -32,17 +31,10 @@ if [ -z "$iptables$ip6tables$nft" ]; then exit $ksft_skip fi -sfx=$(mktemp -u "XXXXXXXX") -ns1="ns1-$sfx" -ns2="ns2-$sfx" -trap "ip netns del $ns1; ip netns del $ns2" EXIT - -# create two netns, disable rp_filter in ns2 and -# keep IPv6 address when moving into VRF -ip netns add "$ns1" -ip netns add "$ns2" -ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0 -ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0 +trap cleanup_all_ns EXIT + +# create two netns, keep IPv6 address when moving into VRF +setup_ns ns1 ns2 ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1 # a standard connection between the netns, should not trigger rp filter diff --git a/tools/testing/selftests/net/netfilter/sctp_collision.c b/tools/testing/selftests/net/netfilter/sctp_collision.c index 21bb1cfd8a85..b282d1785c9b 100644 --- a/tools/testing/selftests/net/netfilter/sctp_collision.c +++ b/tools/testing/selftests/net/netfilter/sctp_collision.c @@ -9,9 +9,10 @@ int main(int argc, char *argv[]) { struct sockaddr_in saddr = {}, daddr = {}; - int sd, ret, len = sizeof(daddr); + socklen_t len = sizeof(daddr); struct timeval tv = {25, 0}; char buf[] = "hello"; + int sd, ret; if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n", diff --git a/tools/testing/selftests/net/netfilter/udpclash.c b/tools/testing/selftests/net/netfilter/udpclash.c new file mode 100644 index 000000000000..79de163d61ab --- /dev/null +++ b/tools/testing/selftests/net/netfilter/udpclash.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Usage: ./udpclash <IP> <PORT> + * + * Emit THREAD_COUNT UDP packets sharing the same saddr:daddr pair. + * + * This mimics DNS resolver libraries that emit A and AAAA requests + * in parallel. + * + * This exercises conntrack clash resolution logic added and later + * refined in + * + * 71d8c47fc653 ("netfilter: conntrack: introduce clash resolution on insertion race") + * ed07d9a021df ("netfilter: nf_conntrack: resolve clash for matching conntracks") + * 6a757c07e51f ("netfilter: conntrack: allow insertion of clashing entries") + */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/socket.h> +#include <pthread.h> + +#define THREAD_COUNT 128 + +struct thread_args { + const struct sockaddr_in *si_remote; + int sockfd; +}; + +static volatile int wait = 1; + +static void *thread_main(void *varg) +{ + const struct sockaddr_in *si_remote; + const struct thread_args *args = varg; + static const char msg[] = "foo"; + + si_remote = args->si_remote; + + while (wait == 1) + ; + + if (sendto(args->sockfd, msg, strlen(msg), MSG_NOSIGNAL, + (struct sockaddr *)si_remote, sizeof(*si_remote)) < 0) + exit(111); + + return varg; +} + +static int run_test(int fd, const struct sockaddr_in *si_remote) +{ + struct thread_args thread_args = { + .si_remote = si_remote, + .sockfd = fd, + }; + pthread_t *tid = calloc(THREAD_COUNT, sizeof(pthread_t)); + unsigned int repl_count = 0, timeout = 0; + int i; + + if (!tid) { + perror("calloc"); + return 1; + } + + for (i = 0; i < THREAD_COUNT; i++) { + int err = pthread_create(&tid[i], NULL, &thread_main, &thread_args); + + if (err != 0) { + perror("pthread_create"); + exit(1); + } + } + + wait = 0; + + for (i = 0; i < THREAD_COUNT; i++) + pthread_join(tid[i], NULL); + + while (repl_count < THREAD_COUNT) { + struct sockaddr_in si_repl; + socklen_t si_repl_len = sizeof(si_repl); + char repl[512]; + ssize_t ret; + + ret = recvfrom(fd, repl, sizeof(repl), MSG_NOSIGNAL, + (struct sockaddr *) &si_repl, &si_repl_len); + if (ret < 0) { + if (timeout++ > 5000) { + fputs("timed out while waiting for reply from thread\n", stderr); + break; + } + + /* give reply time to pass though the stack */ + usleep(1000); + continue; + } + + if (si_repl_len != sizeof(*si_remote)) { + fprintf(stderr, "warning: reply has unexpected repl_len %d vs %d\n", + (int)si_repl_len, (int)sizeof(si_repl)); + } else if (si_remote->sin_addr.s_addr != si_repl.sin_addr.s_addr || + si_remote->sin_port != si_repl.sin_port) { + char a[64], b[64]; + + inet_ntop(AF_INET, &si_remote->sin_addr, a, sizeof(a)); + inet_ntop(AF_INET, &si_repl.sin_addr, b, sizeof(b)); + + fprintf(stderr, "reply from wrong source: want %s:%d got %s:%d\n", + a, ntohs(si_remote->sin_port), b, ntohs(si_repl.sin_port)); + } + + repl_count++; + } + + printf("got %d of %d replies\n", repl_count, THREAD_COUNT); + + free(tid); + + return repl_count == THREAD_COUNT ? 0 : 1; +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_in si_local = { + .sin_family = AF_INET, + }; + struct sockaddr_in si_remote = { + .sin_family = AF_INET, + }; + int fd, ret; + + if (argc < 3) { + fputs("Usage: send_udp <daddr> <dport>\n", stderr); + return 1; + } + + si_remote.sin_port = htons(atoi(argv[2])); + si_remote.sin_addr.s_addr = inet_addr(argv[1]); + + fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, IPPROTO_UDP); + if (fd < 0) { + perror("socket"); + return 1; + } + + if (bind(fd, (struct sockaddr *)&si_local, sizeof(si_local)) < 0) { + perror("bind"); + return 1; + } + + ret = run_test(fd, &si_remote); + + close(fd); + + return ret; +} diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c index 07423f256f96..51129c564d0a 100644 --- a/tools/testing/selftests/net/netlink-dumps.c +++ b/tools/testing/selftests/net/netlink-dumps.c @@ -18,7 +18,7 @@ #include <linux/mqueue.h> #include <linux/rtnetlink.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" #include <ynl.h> @@ -31,9 +31,18 @@ struct ext_ack { const char *str; }; -/* 0: no done, 1: done found, 2: extack found, -1: error */ -static int nl_get_extack(char *buf, size_t n, struct ext_ack *ea) +enum get_ea_ret { + ERROR = -1, + NO_CTRL = 0, + FOUND_DONE, + FOUND_ERR, + FOUND_EXTACK, +}; + +static enum get_ea_ret +nl_get_extack(char *buf, size_t n, struct ext_ack *ea) { + enum get_ea_ret ret = NO_CTRL; const struct nlmsghdr *nlh; const struct nlattr *attr; ssize_t rem; @@ -41,15 +50,19 @@ static int nl_get_extack(char *buf, size_t n, struct ext_ack *ea) for (rem = n; rem > 0; NLMSG_NEXT(nlh, rem)) { nlh = (struct nlmsghdr *)&buf[n - rem]; if (!NLMSG_OK(nlh, rem)) - return -1; + return ERROR; - if (nlh->nlmsg_type != NLMSG_DONE) + if (nlh->nlmsg_type == NLMSG_ERROR) + ret = FOUND_ERR; + else if (nlh->nlmsg_type == NLMSG_DONE) + ret = FOUND_DONE; + else continue; ea->err = -*(int *)NLMSG_DATA(nlh); if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) - return 1; + return ret; ynl_attr_for_each(attr, nlh, sizeof(int)) { switch (ynl_attr_type(attr)) { @@ -68,10 +81,10 @@ static int nl_get_extack(char *buf, size_t n, struct ext_ack *ea) } } - return 2; + return FOUND_EXTACK; } - return 0; + return ret; } static const struct { @@ -99,9 +112,9 @@ static const struct { TEST(dump_extack) { int netlink_sock; + int i, cnt, ret; char buf[8192]; int one = 1; - int i, cnt; ssize_t n; netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); @@ -118,7 +131,7 @@ TEST(dump_extack) ASSERT_EQ(n, 0); /* Dump so many times we fill up the buffer */ - cnt = 64; + cnt = 80; for (i = 0; i < cnt; i++) { n = send(netlink_sock, &dump_neigh_bad, sizeof(dump_neigh_bad), 0); @@ -130,6 +143,7 @@ TEST(dump_extack) EXPECT_EQ(n, -1); EXPECT_EQ(errno, ENOBUFS); + ret = NO_CTRL; for (i = 0; i < cnt; i++) { struct ext_ack ea = {}; @@ -140,10 +154,20 @@ TEST(dump_extack) } ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr)); - EXPECT_EQ(nl_get_extack(buf, n, &ea), 2); + ret = nl_get_extack(buf, n, &ea); + /* Once we fill the buffer we'll see one ENOBUFS followed + * by a number of EBUSYs. Then the last recv() will finally + * trigger and complete the dump. + */ + if (ret == FOUND_ERR && (ea.err == ENOBUFS || ea.err == EBUSY)) + continue; + EXPECT_EQ(ret, FOUND_EXTACK); + EXPECT_EQ(ea.err, EINVAL); EXPECT_EQ(ea.attr_offs, sizeof(struct nlmsghdr) + sizeof(struct ndmsg)); } + /* Make sure last message was a full DONE+extack */ + EXPECT_EQ(ret, FOUND_EXTACK); } static const struct { diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh index 6974474c26f3..38871bdef67f 100755 --- a/tools/testing/selftests/net/netns-name.sh +++ b/tools/testing/selftests/net/netns-name.sh @@ -7,10 +7,12 @@ set -o pipefail DEV=dummy-dev0 DEV2=dummy-dev1 ALT_NAME=some-alt-name +NSIM_ADDR=2025 RET_CODE=0 cleanup() { + cleanup_netdevsim $NSIM_ADDR cleanup_ns $NS $test_ns } @@ -25,12 +27,15 @@ setup_ns NS test_ns # # Test basic move without a rename +# Use netdevsim because it has extra asserts for notifiers. # -ip -netns $NS link add name $DEV type dummy || fail -ip -netns $NS link set dev $DEV netns $test_ns || + +nsim=$(create_netdevsim $NSIM_ADDR $NS) +ip -netns $NS link set dev $nsim netns $test_ns || fail "Can't perform a netns move" -ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move" -ip -netns $test_ns link del $DEV || fail +ip -netns $test_ns link show dev $nsim >> /dev/null || + fail "Device not found after move" +cleanup_netdevsim $NSIM_ADDR # # Test move with a conflict @@ -78,6 +83,16 @@ ip -netns $NS link show dev $ALT_NAME 2> /dev/null && fail "Can still find alt-name after move" ip -netns $test_ns link del $DEV || fail +# +# Test no conflict of the same name/ifindex in different netns +# +ip -netns $NS link add name $DEV index 100 type dummy || fail +ip -netns $NS link add netns $test_ns name $DEV index 100 type dummy || + fail "Can create in netns without moving" +ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found" +ip -netns $NS link del $DEV || fail +ip -netns $test_ns link del $DEV || fail + echo -ne "$(basename $0) \t\t\t\t" if [ $RET_CODE -eq 0 ]; then echo "[ OK ]" diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index cd8a58097448..1f5227f3d64d 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -385,7 +385,7 @@ static int get_bind_to_device(int sd, char *name, size_t len) name[0] = '\0'; rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen); if (rc < 0) - log_err_errno("setsockopt(SO_BINDTODEVICE)"); + log_err_errno("getsockopt(SO_BINDTODEVICE)"); return rc; } @@ -535,7 +535,7 @@ static int set_freebind(int sd, int version) break; case AF_INET6: if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) { - log_err_errno("setsockopt(IPV6_FREEBIND"); + log_err_errno("setsockopt(IPV6_FREEBIND)"); rc = -1; } break; @@ -812,7 +812,7 @@ static int convert_addr(struct sock_args *args, const char *_str, sep++; if (str_to_uint(sep, 1, pfx_len_max, &args->prefix_len) != 0) { - fprintf(stderr, "Invalid port\n"); + fprintf(stderr, "Invalid prefix length\n"); return 1; } } else { @@ -1272,7 +1272,7 @@ static int msg_loop(int client, int sd, void *addr, socklen_t alen, } } - nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1; + nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1; while (1) { FD_ZERO(&rfds); FD_SET(sd, &rfds); @@ -1492,7 +1492,7 @@ static int lsock_init(struct sock_args *args) sd = socket(args->version, args->type, args->protocol); if (sd < 0) { log_err_errno("Error opening socket"); - return -1; + return -1; } if (set_reuseaddr(sd) != 0) @@ -1912,7 +1912,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) * waiting to be told when to continue */ if (read(fd, &buf, sizeof(buf)) <= 0) { - log_err_errno("Failed to read IPC status from status"); + log_err_errno("Failed to read IPC status from pipe"); return 1; } if (!buf) { diff --git a/tools/testing/selftests/net/nk_qlease.py b/tools/testing/selftests/net/nk_qlease.py new file mode 100755 index 000000000000..a84a73ff4eda --- /dev/null +++ b/tools/testing/selftests/net/nk_qlease.py @@ -0,0 +1,2109 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import errno +import time +from lib.py import ( + ksft_run, + ksft_exit, + ksft_eq, + ksft_ne, + ksft_in, + ksft_not_in, + ksft_raises, +) +from lib.py import ( + NetNS, + NetNSEnter, + EthtoolFamily, + NetdevFamily, + RtnlFamily, + NetdevSimDev, +) +from lib.py import ( + NlError, + Netlink, + cmd, + defer, + ip, +) + + +def wait_until(cond, timeout=2.0, interval=0.05): + deadline = time.monotonic() + timeout + while not cond(): + if time.monotonic() >= deadline: + return + time.sleep(interval) + + +def create_netkit(rxqueues, mode="l2"): + all_links = ip("-d link show", json=True) + old_idxs = { + link["ifindex"] + for link in all_links + if link.get("linkinfo", {}).get("info_kind") == "netkit" + } + + rtnl = RtnlFamily() + rtnl.newlink( + { + "linkinfo": { + "kind": "netkit", + "data": { + "mode": mode, + "policy": "forward", + "peer-policy": "forward", + }, + }, + "num-rx-queues": rxqueues, + }, + flags=[Netlink.NLM_F_CREATE, Netlink.NLM_F_EXCL], + ) + + all_links = ip("-d link show", json=True) + nk_links = [ + link + for link in all_links + if link.get("linkinfo", {}).get("info_kind") == "netkit" + and link["ifindex"] not in old_idxs + ] + nk_links.sort(key=lambda x: x["ifindex"]) + return ( + nk_links[1]["ifname"], + nk_links[1]["ifindex"], + nk_links[0]["ifname"], + nk_links[0]["ifindex"], + ) + + +def create_netkit_single(rxqueues): + rtnl = RtnlFamily() + rtnl.newlink( + { + "linkinfo": { + "kind": "netkit", + "data": { + "mode": "l2", + "pairing": "single", + }, + }, + "num-rx-queues": rxqueues, + }, + flags=[Netlink.NLM_F_CREATE, Netlink.NLM_F_EXCL], + ) + + all_links = ip("-d link show", json=True) + nk_links = [ + link + for link in all_links + if link.get("linkinfo", {}).get("info_kind") == "netkit" + and "UP" not in link.get("flags", []) + ] + return nk_links[0]["ifname"], nk_links[0]["ifindex"] + + +def test_remove_phys(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + nk_queue_id = result["id"] + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["ifindex"], nk_guest_idx) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + nsimdev.remove() + wait_until(lambda: cmd(f"ip link show dev {nk_host}", fail=False).ret != 0) + ret = cmd(f"ip link show dev {nk_host}", fail=False) + ksft_ne(ret.ret, 0) + + +def test_double_lease(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=3) + defer(cmd, f"ip link del dev {nk_host}") + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EBUSY) + + +def test_virtual_lessor(netns) -> None: + nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_a}") + ip(f"link set dev {nk_host_a} up") + ip(f"link set dev {nk_guest_a} up") + + nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_b}") + + ip(f"link set dev {nk_guest_b} netns {netns.name}") + ip(f"link set dev {nk_host_b} up") + ip(f"link set dev {nk_guest_b} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_b_idx, + "type": "rx", + "lease": { + "ifindex": nk_guest_a_idx, + "queue": {"id": 0, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_phys_lessee(_netns) -> None: + nsimdev_a = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev_a.remove) + nsim_a = nsimdev_a.nsims[0] + ip(f"link set dev {nsim_a.ifname} up") + + nsimdev_b = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev_b.remove) + nsim_b = nsimdev_b.nsims[0] + ip(f"link set dev {nsim_b.ifname} up") + + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nsim_a.ifindex, + "type": "rx", + "lease": { + "ifindex": nsim_b.ifindex, + "queue": {"id": 0, "type": "rx"}, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_different_lessors(netns) -> None: + nsimdev_a = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev_a.remove) + nsim_a = nsimdev_a.nsims[0] + ip(f"link set dev {nsim_a.ifname} up") + + nsimdev_b = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev_b.remove) + nsim_b = nsimdev_b.nsims[0] + ip(f"link set dev {nsim_b.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=3) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim_a.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim_b.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP) + + +def test_queue_out_of_range(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 2, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.ERANGE) + + +def test_resize_leased(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + ethnl = EthtoolFamily() + with ksft_raises(NlError) as e: + ethnl.channels_set({"header": {"dev-index": nsim.ifindex}, "combined-count": 1}) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_self_lease(_netns) -> None: + nk_host, _, _, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nk_guest_idx, + "queue": {"id": 0, "type": "rx"}, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_veth_queue_create(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + ip("link add veth0 type veth peer name veth1") + defer(cmd, "ip link del dev veth0", fail=False) + + all_links = ip("-d link show", json=True) + veth_peer = [ + link + for link in all_links + if link.get("ifname") == "veth1" + ] + veth_peer_idx = veth_peer[0]["ifindex"] + + ip(f"link set dev veth1 netns {netns.name}") + ip("link set dev veth0 up") + ip("link set dev veth1 up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": veth_peer_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_create_tx_type(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "tx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_create_primary(_netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, nk_host_idx, _, _ = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_host} up") + + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_host_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP) + + +def test_create_limit(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=1) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_link_flap_phys(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}") + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + nk_queue_id = result["id"] + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + # Link flap the physical device + ip(f"link set dev {nsim.ifname} down") + ip(f"link set dev {nsim.ifname} up") + + # Verify lease survives the flap + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + +def test_queue_get_virtual(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}") + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + nk_queue_id = result["id"] + + # queue-get on virtual device's leased queue should not show lease + # info (lease info is only shown from the physical device's side) + queue_info = netdevnl.queue_get( + {"ifindex": nk_guest_idx, "id": nk_queue_id, "type": "rx"} + ) + ksft_eq(queue_info["id"], nk_queue_id) + ksft_eq(queue_info["ifindex"], nk_guest_idx) + ksft_not_in("lease", queue_info) + + # Default queue (not leased) also has no lease info + queue_info = netdevnl.queue_get( + {"ifindex": nk_guest_idx, "id": 0, "type": "rx"} + ) + ksft_not_in("lease", queue_info) + + +def test_remove_virt_first(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) + + # Delete netkit (virtual device removed first, physical stays) + cmd(f"ip link del dev {nk_host}") + + # Verify lease is cleaned up on physical device + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_not_in("lease", queue_info) + + +def test_multiple_leases(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=3) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=4) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + r1 = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + r2 = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 2, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + ksft_eq(r1["id"], 1) + ksft_eq(r2["id"], 2) + + # Verify both leases visible on physical device + netdevnl = NetdevFamily() + q1 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + q2 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"} + ) + ksft_in("lease", q1) + ksft_in("lease", q2) + ksft_eq(q1["lease"]["ifindex"], nk_guest_idx) + ksft_eq(q2["lease"]["ifindex"], nk_guest_idx) + ksft_eq(q1["lease"]["queue"]["id"], r1["id"]) + ksft_eq(q2["lease"]["queue"]["id"], r2["id"]) + + +def test_lease_queue_tx_type(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "tx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_invalid_netns(netns) -> None: + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": 1, + "queue": {"id": 0, "type": "rx"}, + "netns-id": 999, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.ENONET) + + +def test_invalid_phys_ifindex(netns) -> None: + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": 99999, + "queue": {"id": 0, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.ENODEV) + + +def test_multi_netkit_remove_phys(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=3) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + # Create two netkit pairs, each leasing a different physical queue + nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_a}", fail=False) + + nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_b}", fail=False) + + ip(f"link set dev {nk_guest_a} netns {netns.name}") + ip(f"link set dev {nk_host_a} up") + ip(f"link set dev {nk_guest_a} up", ns=netns) + + ip(f"link set dev {nk_guest_b} netns {netns.name}") + ip(f"link set dev {nk_host_b} up") + ip(f"link set dev {nk_guest_b} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_guest_a_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + netdevnl.queue_create( + { + "ifindex": nk_guest_b_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 2, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + # Removing the physical device should take down both netkit pairs + nsimdev.remove() + wait_until(lambda: cmd(f"ip link show dev {nk_host_a}", fail=False).ret != 0 + and cmd(f"ip link show dev {nk_host_b}", fail=False).ret != 0) + ret = cmd(f"ip link show dev {nk_host_a}", fail=False) + ksft_ne(ret.ret, 0) + ret = cmd(f"ip link show dev {nk_host_b}", fail=False) + ksft_ne(ret.ret, 0) + + +def test_single_remove_phys(_netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_name, nk_idx = create_netkit_single(rxqueues=2) + defer(cmd, f"ip link del dev {nk_name}", fail=False) + + ip(f"link set dev {nk_name} up") + + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + }, + } + ) + + # Removing the physical device should take down the single netkit device + nsimdev.remove() + wait_until(lambda: cmd(f"ip link show dev {nk_name}", fail=False).ret != 0) + ret = cmd(f"ip link show dev {nk_name}", fail=False) + ksft_ne(ret.ret, 0) + + +def test_link_flap_virt(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}") + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + nk_queue_id = result["id"] + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + # Link flap the virtual (netkit) device + ip(f"link set dev {nk_guest} down", ns=netns) + ip(f"link set dev {nk_guest} up", ns=netns) + + # Verify lease survives the virtual device flap + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + +def test_phys_queue_no_lease(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}") + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + # Physical queue 0 (not leased) should have no lease info + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 0, "type": "rx"} + ) + ksft_not_in("lease", queue_info) + + # Physical queue 1 (leased) should have lease info + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + ksft_in("lease", queue_info) + + +def test_same_ns_lease(_netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_name, nk_idx = create_netkit_single(rxqueues=2) + defer(cmd, f"ip link del dev {nk_name}", fail=False) + + ip(f"link set dev {nk_name} up") + + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + }, + } + ) + ksft_eq(result["id"], 1) + + # Same namespace: lease info should NOT have netns-id + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["ifindex"], nk_idx) + ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) + ksft_not_in("netns-id", queue_info["lease"]) + + +def test_resize_after_unlease(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + # Resize should fail while lease is active + ethnl = EthtoolFamily() + with ksft_raises(NlError) as e: + ethnl.channels_set({"header": {"dev-index": nsim.ifindex}, "combined-count": 1}) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + # Delete netkit, clearing the lease + cmd(f"ip link del dev {nk_host}") + + # Resize should now succeed + ethnl.channels_set({"header": {"dev-index": nsim.ifindex}, "combined-count": 1}) + + +def test_lease_queue_zero(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 0, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 0, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) + + +def test_release_and_reuse(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + src_queue = 1 + + # First lease + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + + # Delete netkit, freeing the lease + cmd(f"ip link del dev {nk_host}") + + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_not_in("lease", queue_info) + + # Re-create netkit and lease the same physical queue again + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) + + +def test_two_netkits_same_queue(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_a}", fail=False) + + nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_b}", fail=False) + + ip(f"link set dev {nk_guest_a} netns {netns.name}") + ip(f"link set dev {nk_host_a} up") + ip(f"link set dev {nk_guest_a} up", ns=netns) + + ip(f"link set dev {nk_guest_b} netns {netns.name}") + ip(f"link set dev {nk_host_b} up") + ip(f"link set dev {nk_guest_b} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_a_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + with ksft_raises(NlError) as e: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_b_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EBUSY) + + +def test_l3_mode_lease(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2, mode="l3") + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + result = netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["ifindex"], nk_guest_idx) + ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) + + +def test_single_double_lease(_netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_name, nk_idx = create_netkit_single(rxqueues=3) + defer(cmd, f"ip link del dev {nk_name}", fail=False) + + ip(f"link set dev {nk_name} up") + + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + }, + } + ) + ksft_eq(result["id"], 1) + + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EBUSY) + + +def test_single_different_lessors(_netns) -> None: + nsimdev_a = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev_a.remove) + nsim_a = nsimdev_a.nsims[0] + ip(f"link set dev {nsim_a.ifname} up") + + nsimdev_b = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev_b.remove) + nsim_b = nsimdev_b.nsims[0] + ip(f"link set dev {nsim_b.ifname} up") + + nk_name, nk_idx = create_netkit_single(rxqueues=3) + defer(cmd, f"ip link del dev {nk_name}", fail=False) + + ip(f"link set dev {nk_name} up") + + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_idx, + "type": "rx", + "lease": { + "ifindex": nsim_a.ifindex, + "queue": {"id": 1, "type": "rx"}, + }, + } + ) + + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_idx, + "type": "rx", + "lease": { + "ifindex": nsim_b.ifindex, + "queue": {"id": 1, "type": "rx"}, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP) + + +def test_cross_ns_netns_id(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_in("netns-id", queue_info["lease"]) + + +def test_delete_guest_netns(_netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + test_ns = NetNS() + ip("netns set init 0", ns=test_ns) + ip("link set lo up", ns=test_ns) + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {test_ns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=test_ns) + + src_queue = 1 + with NetNSEnter(str(test_ns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + + del test_ns + wait_until(lambda: "lease" not in netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"})) + + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_not_in("lease", queue_info) + + ret = cmd(f"ip link show dev {nk_host}", fail=False) + ksft_ne(ret.ret, 0) + + +def test_move_guest_netns(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + result = netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + nk_queue_id = result["id"] + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + new_ns = NetNS() + defer(new_ns.__del__) + ip(f"link set dev {nk_guest} netns {new_ns.name}", ns=netns) + + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + +def test_resize_phys_no_reduction(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + ethnl = EthtoolFamily() + ethnl.channels_set( + {"header": {"dev-index": nsim.ifindex}, "combined-count": 2} + ) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + ksft_in("lease", queue_info) + + +def test_delete_one_netkit_of_two(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=3) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_a}", fail=False) + + nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_b}", fail=False) + + ip(f"link set dev {nk_guest_a} netns {netns.name}") + ip(f"link set dev {nk_host_a} up") + ip(f"link set dev {nk_guest_a} up", ns=netns) + + ip(f"link set dev {nk_guest_b} netns {netns.name}") + ip(f"link set dev {nk_host_b} up") + ip(f"link set dev {nk_guest_b} up", ns=netns) + + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_a_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_b_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 2, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + netdevnl = NetdevFamily() + q1 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + q2 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"} + ) + ksft_in("lease", q1) + ksft_in("lease", q2) + + cmd(f"ip link del dev {nk_host_a}") + + q1 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + q2 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"} + ) + ksft_not_in("lease", q1) + ksft_in("lease", q2) + + +def test_bind_rx_leased_phys_queue(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.bind_rx( + { + "ifindex": nsim.ifindex, + "fd": 0, + "queues": [ + {"id": 0, "type": "rx"}, + {"id": 1, "type": "rx"}, + ], + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP) + + +def test_resize_phys_shrink_past_leased(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=4) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + ethnl = EthtoolFamily() + + # Shrink past the leased queue — only queue 3 removed, queue 1 untouched + ethnl.channels_set( + {"header": {"dev-index": nsim.ifindex}, "combined-count": 3} + ) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + ksft_in("lease", queue_info) + + # Shrink further — queue 2 removed, queue 1 still untouched + ethnl.channels_set( + {"header": {"dev-index": nsim.ifindex}, "combined-count": 2} + ) + + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + ksft_in("lease", queue_info) + + # Shrink into the leased queue — queue 1 is busy, must fail + with ksft_raises(NlError) as e: + ethnl.channels_set( + {"header": {"dev-index": nsim.ifindex}, "combined-count": 1} + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_resize_virt_not_supported(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, nk_host_idx, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + # Channel resize on the netkit host must fail — not supported + ethnl = EthtoolFamily() + with ksft_raises(NlError) as e: + ethnl.channels_set( + {"header": {"dev-index": nk_host_idx}, "combined-count": 1} + ) + ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP) + + # Lease must be intact + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + ksft_in("lease", queue_info) + + +def test_lease_devices_down(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + + # Create lease while both physical and virtual devices are down + src_queue = 1 + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + result = netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + # Bring devices up before queue_get: netdevsim only instantiates NAPIs in + # ndo_open, and netdev-genl queue_get returns -ENOENT without a NAPI. + ip(f"link set dev {nsim.ifname} up") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) + + +def test_lease_capacity_exhaustion(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=4) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + # rxqueues=3 means num_rx_queues=3, real_num_rx_queues starts at 1. + # Can create 2 leased queues (real goes 1->2->3) but not a 3rd (3->4 > 3). + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=3) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + r1 = netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(r1["id"], 1) + + r2 = netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 2, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(r2["id"], 2) + + # Third lease fails — netkit queue capacity exhausted + with ksft_raises(NlError) as e: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 3, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + # Verify the two successful leases are intact + netdevnl = NetdevFamily() + q1 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + q2 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"} + ) + ksft_in("lease", q1) + ksft_in("lease", q2) + + +def test_resize_phys_up(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=3) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + # Shrink nsim first so we have room to grow + ethnl = EthtoolFamily() + ethnl.channels_set( + {"header": {"dev-index": nsim.ifindex}, "combined-count": 2} + ) + + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + # Grow channels — should succeed since leased queue is not removed + ethnl.channels_set( + {"header": {"dev-index": nsim.ifindex}, "combined-count": 3} + ) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + ksft_in("lease", queue_info) + + # New queue 2 should exist without a lease + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"} + ) + ksft_not_in("lease", queue_info) + + +def test_multi_ns_lease(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=3) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + ns_b = NetNS() + defer(ns_b.__del__) + ip("netns set init 0", ns=ns_b) + ip("link set lo up", ns=ns_b) + + # First netkit pair, guest in netns + nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_a}", fail=False) + ip(f"link set dev {nk_guest_a} netns {netns.name}") + ip(f"link set dev {nk_host_a} up") + ip(f"link set dev {nk_guest_a} up", ns=netns) + + # Second netkit pair, guest in ns_b + nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_b}", fail=False) + ip(f"link set dev {nk_guest_b} netns {ns_b.name}") + ip(f"link set dev {nk_host_b} up") + ip(f"link set dev {nk_guest_b} up", ns=ns_b) + + # Lease from netns + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + result = netdevnl_ns.queue_create( + { + "ifindex": nk_guest_a_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + # Lease from ns_b (different namespace, same physical device) + with NetNSEnter(str(ns_b)), NetdevFamily() as netdevnl_ns: + result = netdevnl_ns.queue_create( + { + "ifindex": nk_guest_b_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 2, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + # Verify both leases from the physical side + netdevnl = NetdevFamily() + q1 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + q2 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"} + ) + ksft_in("lease", q1) + ksft_in("lease", q2) + ksft_eq(q1["lease"]["ifindex"], nk_guest_a_idx) + ksft_eq(q2["lease"]["ifindex"], nk_guest_b_idx) + + +def test_multi_ns_delete_one(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=3) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + ns_b = NetNS() + ip("netns set init 0", ns=ns_b) + ip("link set lo up", ns=ns_b) + + # First netkit pair, guest in netns (ns_a) + nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_a}", fail=False) + ip(f"link set dev {nk_guest_a} netns {netns.name}") + ip(f"link set dev {nk_host_a} up") + ip(f"link set dev {nk_guest_a} up", ns=netns) + + # Second netkit pair, guest in ns_b + nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_b}", fail=False) + + ip(f"link set dev {nk_guest_b} netns {ns_b.name}") + ip(f"link set dev {nk_host_b} up") + ip(f"link set dev {nk_guest_b} up", ns=ns_b) + + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_a_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + with NetNSEnter(str(ns_b)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_b_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 2, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + netdevnl = NetdevFamily() + q1 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + q2 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"} + ) + ksft_in("lease", q1) + ksft_in("lease", q2) + + # Delete ns_b — destroys nk_guest_b, triggers unlease of queue 2 + del ns_b + wait_until(lambda: "lease" not in netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"})) + + # ns_a's lease on queue 1 must survive + q1 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + ksft_in("lease", q1) + ksft_eq(q1["lease"]["ifindex"], nk_guest_a_idx) + + # ns_b's lease on queue 2 must be gone + q2 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"} + ) + ksft_not_in("lease", q2) + + # nk_host_b should be gone too (phys removal cascades to netkit pair) + ret = cmd(f"ip link show dev {nk_host_b}", fail=False) + ksft_ne(ret.ret, 0) + + +def test_move_phys_netns(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + nk_queue_id = netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + )["id"] + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + + # Move the physical device to a new namespace. Move it back to init_net + # on cleanup before the other defers fire (new_ns deletion, nsimdev.remove) + # so nsim lives in a stable namespace when they run. + new_ns = NetNS() + defer(new_ns.__del__) + ip(f"link set dev {nsim.ifname} netns {new_ns.name}") + defer(ip, f"link set dev {nsim.ifname} netns init", ns=new_ns) + + # Physical device is now in new_ns — find its ifindex there + all_links = ip("-d link show", json=True, ns=new_ns) + nsim_in_new = [lnk for lnk in all_links if lnk.get("ifname") == nsim.ifname] + new_ifindex = nsim_in_new[0]["ifindex"] + + # Moving a device across netns brings it admin-down; bring it back up so + # netdevsim re-creates the NAPI (netdev-genl queue_get needs it). + ip(f"link set dev {nsim.ifname} up", ns=new_ns) + + # Verify lease survived the namespace move + with NetNSEnter(str(new_ns)), NetdevFamily() as netdevnl_ns: + queue_info = netdevnl_ns.queue_get( + {"ifindex": new_ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + +def main() -> None: + netns = NetNS() + cmd("ip netns attach init 1") + ip("netns set init 0", ns=netns) + ip("link set lo up", ns=netns) + + ksft_run( + [ + test_remove_phys, + test_double_lease, + test_virtual_lessor, + test_phys_lessee, + test_different_lessors, + test_queue_out_of_range, + test_resize_leased, + test_self_lease, + test_create_tx_type, + test_create_primary, + test_create_limit, + test_link_flap_phys, + test_queue_get_virtual, + test_remove_virt_first, + test_multiple_leases, + test_lease_queue_tx_type, + test_invalid_netns, + test_invalid_phys_ifindex, + test_multi_netkit_remove_phys, + test_single_remove_phys, + test_link_flap_virt, + test_phys_queue_no_lease, + test_same_ns_lease, + test_resize_after_unlease, + test_lease_queue_zero, + test_release_and_reuse, + test_veth_queue_create, + test_two_netkits_same_queue, + test_l3_mode_lease, + test_single_double_lease, + test_single_different_lessors, + test_cross_ns_netns_id, + test_delete_guest_netns, + test_move_guest_netns, + test_resize_phys_no_reduction, + test_delete_one_netkit_of_two, + test_bind_rx_leased_phys_queue, + test_resize_phys_shrink_past_leased, + test_resize_virt_not_supported, + test_lease_devices_down, + test_lease_capacity_exhaustion, + test_resize_phys_up, + test_multi_ns_lease, + test_multi_ns_delete_one, + test_move_phys_netns, + ], + args=(netns,), + ) + + cmd("ip netns del init", fail=False) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index 93e8cb671c3d..eff55c64a012 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -1,10 +1,15 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 -import time -from lib.py import ksft_run, ksft_exit, ksft_pr -from lib.py import ksft_eq, ksft_ge, ksft_busy_wait -from lib.py import NetdevFamily, NetdevSimDev, ip +""" +Tests for the netdev netlink family. +""" + +import errno +from os import system +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_raises, ksft_busy_wait +from lib.py import NetdevFamily, NetdevSimDev, NlError, ip def empty_check(nf) -> None: @@ -18,6 +23,15 @@ def lo_check(nf) -> None: ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0) +def dev_dump_reject_attr(nf) -> None: + """Test that dev-get dump rejects attributes (no dump request policy).""" + with ksft_raises(NlError) as cm: + nf.dev_get({'ifindex': 1}, dump=True) + ksft_eq(cm.exception.nl_msg.error, -errno.EINVAL) + ksft_eq(cm.exception.nl_msg.extack['msg'], 'Unknown attribute type') + ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex') + + def napi_list_check(nf) -> None: with NetdevSimDev(queue_count=100) as nsimdev: nsim = nsimdev.nsims[0] @@ -34,6 +48,143 @@ def napi_list_check(nf) -> None: ksft_eq(len(napis), 100, comment=f"queue count after reset queue {q} mode {i}") +def napi_set_threaded(nf) -> None: + """ + Test that verifies various cases of napi threaded + set and unset at napi and device level. + """ + with NetdevSimDev(queue_count=2) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + # set napi threaded and verify + nf.napi_set({'id': napi0_id, 'threaded': "enabled"}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + + # check it is not set for napi1 + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) + + ip(f"link set dev {nsim.ifname} down") + ip(f"link set dev {nsim.ifname} up") + + # verify if napi threaded is still set + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + + # check it is still not set for napi1 + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) + + # unset napi threaded and verify + nf.napi_set({'id': napi0_id, 'threaded': "disabled"}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "disabled") + ksft_eq(napi0.get('pid'), None) + + # set threaded at device level + system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is set for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "enabled") + ksft_ne(napi1.get('pid'), None) + + # unset threaded at device level + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "disabled") + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) + + # set napi threaded for napi0 + nf.napi_set({'id': napi0_id, 'threaded': 1}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + + # unset threaded at device level + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "disabled") + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) + +def dev_set_threaded(nf) -> None: + """ + Test that verifies various cases of napi threaded + set and unset at device level using sysfs. + """ + with NetdevSimDev(queue_count=2) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + # set threaded + system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is set for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "enabled") + ksft_ne(napi1.get('pid'), None) + + # unset threaded + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "disabled") + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) + +def nsim_rxq_reset_down(nf) -> None: + """ + Test that the queue API supports resetting a queue + while the interface is down. We should convert this + test to testing real HW once more devices support + queue API. + """ + with NetdevSimDev(queue_count=4) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} down") + for i in [0, 2, 3]: + nsim.dfs_write("queue_reset", f"1 {i}") + def page_pool_check(nf) -> None: with NetdevSimDev() as nsimdev: @@ -105,8 +256,16 @@ def page_pool_check(nf) -> None: def main() -> None: + """ Ksft boiler plate main """ nf = NetdevFamily() - ksft_run([empty_check, lo_check, page_pool_check, napi_list_check], + ksft_run([empty_check, + lo_check, + dev_dump_reject_attr, + napi_list_check, + napi_set_threaded, + dev_set_threaded, + nsim_rxq_reset_down, + page_pool_check], args=(nf, )) ksft_exit() diff --git a/tools/testing/selftests/net/nl_nlctrl.py b/tools/testing/selftests/net/nl_nlctrl.py new file mode 100755 index 000000000000..fe1f66dc9435 --- /dev/null +++ b/tools/testing/selftests/net/nl_nlctrl.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Tests for the nlctrl genetlink family (family info and policy dumps). +""" + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, ksft_ge, ksft_true, ksft_in, ksft_not_in +from lib.py import NetdevFamily, EthtoolFamily, NlctrlFamily + + +def getfamily_do(ctrl) -> None: + """Query a single family by name and validate its ops.""" + fam = ctrl.getfamily({'family-name': 'netdev'}) + ksft_eq(fam['family-name'], 'netdev') + ksft_true(fam['family-id'] > 0) + + # The format of ops is quite odd, [{$idx: {"id"...}}, {$idx: {"id"...}}] + # Discard the indices and re-key by command id. + ops_by_id = {v['id']: v for op in fam['ops'] for v in op.values()} + ksft_eq(len(ops_by_id), len(fam['ops'])) + + # All ops should have a policy (either do or dump has one) + for op in ops_by_id.values(): + ksft_in('cmd-cap-haspol', op['flags'], + comment=f"op {op['id']} missing haspol") + + # dev-get (id 1) should support both do and dump + ksft_in('cmd-cap-do', ops_by_id[1]['flags']) + ksft_in('cmd-cap-dump', ops_by_id[1]['flags']) + + # qstats-get (id 12) is dump-only + ksft_not_in('cmd-cap-do', ops_by_id[12]['flags']) + ksft_in('cmd-cap-dump', ops_by_id[12]['flags']) + + # napi-set (id 14) is do-only and requires admin + ksft_in('cmd-cap-do', ops_by_id[14]['flags']) + ksft_not_in('cmd-cap-dump', ops_by_id[14]['flags']) + ksft_in('admin-perm', ops_by_id[14]['flags']) + + # Notification-only commands (dev-add/del/change-ntf etc.) must + # not appear in the ops list since they have no do/dump handlers. + for ntf_id in [2, 3, 4, 6, 7, 8]: + ksft_not_in(ntf_id, ops_by_id, + comment=f"ntf-only cmd {ntf_id} should not be in ops") + + +def getfamily_dump(ctrl) -> None: + """Dump all families and verify expected entries.""" + families = ctrl.getfamily({}, dump=True) + ksft_ge(len(families), 2) + + names = [f['family-name'] for f in families] + ksft_in('nlctrl', names, comment="nlctrl not found in family dump") + ksft_in('netdev', names, comment="netdev not found in family dump") + + +def getpolicy_dump(_ctrl) -> None: + """Dump policies for ops using get_policy() and validate results. + + Test with netdev (split ops) where do and dump can have different + policies, and with ethtool (full ops) where they always share one. + """ + # -- netdev (split ops) -- + ndev = NetdevFamily() + + # dev-get: do has a real policy with ifindex, dump has no policy + # (only the reject-all policy with maxattr=0) + pol = ndev.get_policy('dev-get', 'do') + ksft_in('ifindex', pol, comment="dev-get do policy should have ifindex") + ksft_eq(pol['ifindex'].type, 'u32') + + pol_dump = ndev.get_policy('dev-get', 'dump') + ksft_eq(len(pol_dump), 0, comment="dev-get should not accept any attrs") + + # napi-get: both do and dump have real policies + pol_do = ndev.get_policy('napi-get', 'do') + ksft_ge(len(pol_do), 1) + + pol_dump = ndev.get_policy('napi-get', 'dump') + ksft_ge(len(pol_dump), 1) + + # -- ethtool (full ops) -- + et = EthtoolFamily() + + # strset-get (has both do and dump, full ops share policy) + pol_do = et.get_policy('strset-get', 'do') + ksft_ge(len(pol_do), 1, comment="strset-get should have a do policy") + + pol_dump = et.get_policy('strset-get', 'dump') + ksft_ge(len(pol_dump), 1, comment="strset-get should have a dump policy") + + # Same policy means same attribute names + ksft_eq(set(pol_do.keys()), set(pol_dump.keys())) + + # linkinfo-set is do-only (SET command), no dump + pol_do = et.get_policy('linkinfo-set', 'do') + ksft_ge(len(pol_do), 1, comment="linkinfo-set should have a do policy") + + pol_dump = et.get_policy('linkinfo-set', 'dump') + ksft_eq(pol_dump, None, + comment="linkinfo-set should not have a dump policy") + + +def getpolicy_by_op(_ctrl) -> None: + """Query policy for specific ops, check attr names are resolved.""" + ndev = NetdevFamily() + + # dev-get do policy should have named attributes from the spec + pol = ndev.get_policy('dev-get', 'do') + ksft_ge(len(pol), 1) + # All attr names should be resolved (no 'attr-N' fallbacks) + for name in pol: + ksft_true(not name.startswith('attr-'), + comment=f"unresolved attr name: {name}") + + +def main() -> None: + """ Ksft boiler plate main """ + ctrl = NlctrlFamily() + ksft_run([getfamily_do, + getfamily_dump, + getpolicy_dump, + getpolicy_by_op], + args=(ctrl, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 960e1ab4dd04..b327d3061ed5 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -25,6 +25,7 @@ tests=" nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT netlink_checks ovsnl: validate netlink attrs and settings upcall_interfaces ovs: test the upcall interfaces + tunnel_metadata ovs: test extraction of tunnel metadata drop_reason drop: test drop reasons are emitted psample psample: Sampling packets with psample" @@ -113,13 +114,13 @@ ovs_add_dp () { } ovs_add_if () { - info "Adding IF to DP: br:$2 if:$3" - if [ "$4" != "-u" ]; then - ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if "$2" "$3" \ - || return 1 + info "Adding IF to DP: br:$3 if:$4 ($2)" + if [ "$5" != "-u" ]; then + ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if \ + -t "$2" "$3" "$4" || return 1 else python3 $ovs_base/ovs-dpctl.py add-if \ - -u "$2" "$3" >$ovs_dir/$3.out 2>$ovs_dir/$3.err & + -u -t "$2" "$3" "$4" >$ovs_dir/$4.out 2>$ovs_dir/$4.err & pid=$! on_exit "ovs_sbx $1 kill -TERM $pid 2>/dev/null" fi @@ -166,9 +167,9 @@ ovs_add_netns_and_veths () { fi if [ "$7" != "-u" ]; then - ovs_add_if "$1" "$2" "$4" || return 1 + ovs_add_if "$1" "netdev" "$2" "$4" || return 1 else - ovs_add_if "$1" "$2" "$4" -u || return 1 + ovs_add_if "$1" "netdev" "$2" "$4" -u || return 1 fi if [ $TRACING -eq 1 ]; then @@ -330,6 +331,11 @@ test_psample() { # - drop packets and verify the right drop reason is reported test_drop_reason() { which perf >/dev/null 2>&1 || return $ksft_skip + which pahole >/dev/null 2>&1 || return $ksft_skip + + ovs_drop_subsys=$(pahole -C skb_drop_reason_subsys | + awk '/OPENVSWITCH/ { print $3; }' | + tr -d ,) sbx_add "test_drop_reason" || return $? @@ -373,7 +379,7 @@ test_drop_reason() { "in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,proto=1),icmp()" 'drop' ovs_drop_record_and_run "test_drop_reason" ip netns exec client ping -c 2 172.31.110.20 - ovs_drop_reason_count 0x30001 # OVS_DROP_FLOW_ACTION + ovs_drop_reason_count 0x${ovs_drop_subsys}0001 # OVS_DROP_FLOW_ACTION if [[ "$?" -ne "2" ]]; then info "Did not detect expected drops: $?" return 1 @@ -390,7 +396,7 @@ test_drop_reason() { ovs_drop_record_and_run \ "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 6000 - ovs_drop_reason_count 0x30004 # OVS_DROP_EXPLICIT_ACTION_ERROR + ovs_drop_reason_count 0x${ovs_drop_subsys}0004 # OVS_DROP_EXPLICIT_ACTION_ERROR if [[ "$?" -ne "1" ]]; then info "Did not detect expected explicit error drops: $?" return 1 @@ -398,7 +404,7 @@ test_drop_reason() { ovs_drop_record_and_run \ "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 7000 - ovs_drop_reason_count 0x30003 # OVS_DROP_EXPLICIT_ACTION + ovs_drop_reason_count 0x${ovs_drop_subsys}0003 # OVS_DROP_EXPLICIT_ACTION if [[ "$?" -ne "1" ]]; then info "Did not detect expected explicit drops: $?" return 1 @@ -751,6 +757,79 @@ test_upcall_interfaces() { return 0 } +ovs_add_kernel_tunnel() { + local sbxname=$1; shift + local ns=$1; shift + local tnl_type=$1; shift + local name=$1; shift + local addr=$1; shift + + info "setting up kernel ${tnl_type} tunnel ${name}" + ovs_sbx "${sbxname}" ip -netns ${ns} link add dev ${name} type ${tnl_type} $* || return 1 + on_exit "ovs_sbx ${sbxname} ip -netns ${ns} link del ${name} >/dev/null 2>&1" + ovs_sbx "${sbxname}" ip -netns ${ns} addr add dev ${name} ${addr} || return 1 + ovs_sbx "${sbxname}" ip -netns ${ns} link set dev ${name} mtu 1450 up || return 1 +} + +test_tunnel_metadata() { + which arping >/dev/null 2>&1 || return $ksft_skip + + sbxname="test_tunnel_metadata" + sbx_add "${sbxname}" || return 1 + + info "setting up new DP" + ovs_add_dp "${sbxname}" tdp0 -V 2:1 || return 1 + + ovs_add_netns_and_veths "${sbxname}" tdp0 tns left0 l0 \ + 172.31.110.1/24 || return 1 + + info "removing veth interface from openvswitch and setting IP" + ovs_del_if "${sbxname}" tdp0 left0 || return 1 + ovs_sbx "${sbxname}" ip addr add 172.31.110.2/24 dev left0 || return 1 + ovs_sbx "${sbxname}" ip link set left0 up || return 1 + + info "setting up tunnel port in openvswitch" + ovs_add_if "${sbxname}" "vxlan" tdp0 ovs-vxlan0 -u || return 1 + on_exit "ovs_sbx ${sbxname} ip link del ovs-vxlan0" + ovs_wait ip link show ovs-vxlan0 &>/dev/null || return 1 + ovs_sbx "${sbxname}" ip link set ovs-vxlan0 up || return 1 + + configs=$(echo ' + 1 172.31.221.1/24 1155332 32 set udpcsum flags\(df\|csum\) + 2 172.31.222.1/24 1234567 45 set noudpcsum flags\(df\) + 3 172.31.223.1/24 1020304 23 unset udpcsum flags\(csum\) + 4 172.31.224.1/24 1357986 15 unset noudpcsum' | sed '/^$/d') + + while read -r i addr id ttl df csum flags; do + ovs_add_kernel_tunnel "${sbxname}" tns vxlan vxlan${i} ${addr} \ + remote 172.31.110.2 id ${id} dstport 4789 \ + ttl ${ttl} df ${df} ${csum} || return 1 + done <<< "${configs}" + + ovs_wait grep -q 'listening on upcall packet handler' \ + ${ovs_dir}/ovs-vxlan0.out || return 1 + + info "sending arping" + for i in 1 2 3 4; do + ovs_sbx "${sbxname}" ip netns exec tns \ + arping -I vxlan${i} 172.31.22${i}.2 -c 1 \ + >${ovs_dir}/arping.stdout 2>${ovs_dir}/arping.stderr + done + + info "checking that received decapsulated packets carry correct metadata" + while read -r i addr id ttl df csum flags; do + arp_hdr="arp\\(sip=172.31.22${i}.1,tip=172.31.22${i}.2,op=1,sha=" + addrs="src=172.31.110.1,dst=172.31.110.2" + ports="tp_src=[0-9]*,tp_dst=4789" + tnl_md="tunnel\\(tun_id=${id},${addrs},ttl=${ttl},${ports},${flags}\\)" + + ovs_sbx "${sbxname}" grep -qE "MISS upcall.*${tnl_md}.*${arp_hdr}" \ + ${ovs_dir}/ovs-vxlan0.out || return 1 + done <<< "${configs}" + + return 0 +} + run_test() { ( tname="$1" diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 8a0396bfaf99..848f61fdcee0 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -1877,7 +1877,7 @@ class OvsPacket(GenericNetlinkSocket): elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_EXECUTE: up.execute(msg) else: - print("Unkonwn cmd: %d" % msg["cmd"]) + print("Unknown cmd: %d" % msg["cmd"]) except NetlinkError as ne: raise ne @@ -2583,7 +2583,7 @@ def main(argv): prverscheck = pyroute2.__version__.split(".") if int(prverscheck[0]) == 0 and int(prverscheck[1]) < 6: print("Need to upgrade the python pyroute2 package to >= 0.6.") - sys.exit(0) + sys.exit(1) parser = argparse.ArgumentParser() parser.add_argument( diff --git a/tools/testing/selftests/net/ovpn/.gitignore b/tools/testing/selftests/net/ovpn/.gitignore new file mode 100644 index 000000000000..ee44c081ca7c --- /dev/null +++ b/tools/testing/selftests/net/ovpn/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0+ +ovpn-cli diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile new file mode 100644 index 000000000000..169f0464ac3a --- /dev/null +++ b/tools/testing/selftests/net/ovpn/Makefile @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +CFLAGS = -pedantic -Wextra -Wall -Wl,--no-as-needed -g -O0 -ggdb $(KHDR_INCLUDES) +CFLAGS += $(shell pkg-config --cflags mbedcrypto-3 mbedtls-3 2>/dev/null) + +VAR_CFLAGS = $(shell pkg-config --cflags libnl-3.0 libnl-genl-3.0 2>/dev/null) +ifeq ($(VAR_CFLAGS),) +VAR_CFLAGS = -I/usr/include/libnl3 +endif +CFLAGS += $(VAR_CFLAGS) + +MTLS_LDLIBS= $(shell pkg-config --libs mbedcrypto-3 mbedtls-3 2>/dev/null) +ifeq ($(MTLS_LDLIBS),) +MTLS_LDLIBS = -lmbedtls -lmbedcrypto +endif +LDLIBS += $(MTLS_LDLIBS) + +NL_LDLIBS = $(shell pkg-config --libs libnl-3.0 libnl-genl-3.0 2>/dev/null) +ifeq ($(NL_LDLIBS),) +NL_LDLIBS = -lnl-genl-3 -lnl-3 +endif +LDLIBS += $(NL_LDLIBS) + + +TEST_FILES = \ + common.sh \ + data64.key \ + json \ + tcp_peers.txt \ + udp_peers.txt \ + ../../../../net/ynl/pyynl/cli.py \ +# end of TEST_FILES + +TEST_PROGS := \ + test-chachapoly.sh \ + test-close-socket-tcp.sh \ + test-close-socket.sh \ + test-float.sh \ + test-large-mtu.sh \ + test-mark.sh \ + test-symmetric-id-float.sh \ + test-symmetric-id-tcp.sh \ + test-symmetric-id.sh \ + test-tcp.sh \ + test.sh \ +# end of TEST_PROGS + +TEST_GEN_FILES := ovpn-cli + +include ../../lib.mk diff --git a/tools/testing/selftests/net/ovpn/common.sh b/tools/testing/selftests/net/ovpn/common.sh new file mode 100644 index 000000000000..4c08f756e63a --- /dev/null +++ b/tools/testing/selftests/net/ovpn/common.sh @@ -0,0 +1,183 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +UDP_PEERS_FILE=${UDP_PEERS_FILE:-udp_peers.txt} +TCP_PEERS_FILE=${TCP_PEERS_FILE:-tcp_peers.txt} +OVPN_CLI=${OVPN_CLI:-./ovpn-cli} +YNL_CLI=${YNL_CLI:-../../../../net/ynl/pyynl/cli.py} +ALG=${ALG:-aes} +PROTO=${PROTO:-UDP} +FLOAT=${FLOAT:-0} +SYMMETRIC_ID=${SYMMETRIC_ID:-0} + +export ID_OFFSET=$(( 9 * (SYMMETRIC_ID == 0) )) + +JQ_FILTER='map(select(.msg.peer | has("remote-ipv6") | not)) | + map(del(.msg.ifindex)) | sort_by(.msg.peer.id)[]' +LAN_IP="11.11.11.11" + +declare -A tmp_jsons=() +declare -A listener_pids=() + +create_ns() { + ip netns add peer${1} +} + +setup_ns() { + MODE="P2P" + + if [ ${1} -eq 0 ]; then + MODE="MP" + for p in $(seq 1 ${NUM_PEERS}); do + ip link add veth${p} netns peer0 type veth peer name veth${p} netns peer${p} + + ip -n peer0 addr add 10.10.${p}.1/24 dev veth${p} + ip -n peer0 addr add fd00:0:0:${p}::1/64 dev veth${p} + ip -n peer0 link set veth${p} up + + ip -n peer${p} addr add 10.10.${p}.2/24 dev veth${p} + ip -n peer${p} addr add fd00:0:0:${p}::2/64 dev veth${p} + ip -n peer${p} link set veth${p} up + done + fi + + ip netns exec peer${1} ${OVPN_CLI} new_iface tun${1} $MODE + ip -n peer${1} addr add ${2} dev tun${1} + # add a secondary IP to peer 1, to test a LAN behind a client + if [ ${1} -eq 1 -a -n "${LAN_IP}" ]; then + ip -n peer${1} addr add ${LAN_IP} dev tun${1} + ip -n peer0 route add ${LAN_IP} via $(echo ${2} |sed -e s'!/.*!!') dev tun0 + fi + if [ -n "${3}" ]; then + ip -n peer${1} link set mtu ${3} dev tun${1} + fi + ip -n peer${1} link set tun${1} up +} + +build_capture_filter() { + # match the first four bytes of the openvpn data payload + if [ "${PROTO}" == "UDP" ]; then + # For UDP, libpcap transport indexing only works for IPv4, so + # use an explicit IPv4 or IPv6 expression based on the peer + # address. The IPv6 branch assumes there are no extension + # headers in the outer packet. + if [[ "${2}" == *:* ]]; then + printf "ip6 and ip6[6] = 17 and ip6[48:4] = %s" "${1}" + else + printf "ip and udp[8:4] = %s" "${1}" + fi + else + # openvpn over TCP prepends a 2-byte packet length ahead of the + # DATA_V2 opcode, so skip it before matching the payload header + printf "ip and tcp[(((tcp[12] & 0xf0) >> 2) + 2):4] = %s" "${1}" + fi +} + +setup_listener() { + file=$(mktemp) + PYTHONUNBUFFERED=1 ip netns exec peer${p} ${YNL_CLI} --family ovpn \ + --subscribe peers --output-json --duration 40 > ${file} & + listener_pids[$1]=$! + tmp_jsons[$1]="${file}" +} + +add_peer() { + labels=("ASYMM" "SYMM") + M_ID=${labels[SYMMETRIC_ID]} + + if [ "${PROTO}" == "UDP" ]; then + if [ ${1} -eq 0 ]; then + ip netns exec peer0 ${OVPN_CLI} new_multi_peer tun0 1 \ + ${M_ID} ${UDP_PEERS_FILE} + + for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 ${ALG} 0 \ + data64.key + done + else + if [ "${SYMMETRIC_ID}" -eq 1 ]; then + PEER_ID=${1} + TX_ID="none" + else + PEER_ID=$(awk "NR == ${1} {print \$2}" \ + ${UDP_PEERS_FILE}) + TX_ID=${1} + fi + RADDR=$(awk "NR == ${1} {print \$3}" ${UDP_PEERS_FILE}) + RPORT=$(awk "NR == ${1} {print \$4}" ${UDP_PEERS_FILE}) + LPORT=$(awk "NR == ${1} {print \$6}" ${UDP_PEERS_FILE}) + ip netns exec peer${1} ${OVPN_CLI} new_peer tun${1} \ + ${PEER_ID} ${TX_ID} ${LPORT} ${RADDR} ${RPORT} + ip netns exec peer${1} ${OVPN_CLI} new_key tun${1} \ + ${PEER_ID} 1 0 ${ALG} 1 data64.key + fi + else + if [ ${1} -eq 0 ]; then + (ip netns exec peer0 ${OVPN_CLI} listen tun0 1 ${M_ID} \ + ${TCP_PEERS_FILE} && { + for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 \ + ${ALG} 0 data64.key + done + }) & + sleep 5 + else + if [ "${SYMMETRIC_ID}" -eq 1 ]; then + PEER_ID=${1} + TX_ID="none" + else + PEER_ID=$(awk "NR == ${1} {print \$2}" \ + ${TCP_PEERS_FILE}) + TX_ID=${1} + fi + ip netns exec peer${1} ${OVPN_CLI} connect tun${1} \ + ${PEER_ID} ${TX_ID} 10.10.${1}.1 1 data64.key + fi + fi +} + +compare_ntfs() { + if [ ${#tmp_jsons[@]} -gt 0 ]; then + suffix="" + [ "${SYMMETRIC_ID}" -eq 1 ] && suffix="${suffix}-symm" + [ "$FLOAT" == 1 ] && suffix="${suffix}-float" + expected="json/peer${1}${suffix}.json" + received="${tmp_jsons[$1]}" + + kill -TERM ${listener_pids[$1]} || true + wait ${listener_pids[$1]} || true + printf "Checking notifications for peer ${1}... " + if diff <(jq -s "${JQ_FILTER}" ${expected}) \ + <(jq -s "${JQ_FILTER}" ${received}); then + echo "OK" + fi + + rm -f ${received} || true + fi +} + +cleanup() { + # some ovpn-cli processes sleep in background so they need manual poking + killall $(basename ${OVPN_CLI}) 2>/dev/null || true + + # netns peer0 is deleted without erasing ifaces first + for p in $(seq 1 10); do + ip -n peer${p} link set tun${p} down 2>/dev/null || true + ip netns exec peer${p} ${OVPN_CLI} del_iface tun${p} 2>/dev/null || true + done + for p in $(seq 1 10); do + ip -n peer0 link del veth${p} 2>/dev/null || true + done + for p in $(seq 0 10); do + ip netns del peer${p} 2>/dev/null || true + done +} + +if [ "${PROTO}" == "UDP" ]; then + NUM_PEERS=${NUM_PEERS:-$(wc -l ${UDP_PEERS_FILE} | awk '{print $1}')} +else + NUM_PEERS=${NUM_PEERS:-$(wc -l ${TCP_PEERS_FILE} | awk '{print $1}')} +fi diff --git a/tools/testing/selftests/net/ovpn/config b/tools/testing/selftests/net/ovpn/config new file mode 100644 index 000000000000..42699740936d --- /dev/null +++ b/tools/testing/selftests/net/ovpn/config @@ -0,0 +1,10 @@ +CONFIG_CRYPTO=y +CONFIG_CRYPTO_AES=y +CONFIG_CRYPTO_CHACHA20POLY1305=y +CONFIG_CRYPTO_GCM=y +CONFIG_DST_CACHE=y +CONFIG_INET=y +CONFIG_NET=y +CONFIG_NET_UDP_TUNNEL=y +CONFIG_OVPN=m +CONFIG_STREAM_PARSER=y diff --git a/tools/testing/selftests/net/ovpn/data64.key b/tools/testing/selftests/net/ovpn/data64.key new file mode 100644 index 000000000000..d04febcdf5a2 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/data64.key @@ -0,0 +1 @@ +jRqMACN7d7/aFQNT8S7jkrBD8uwrgHbG5OQZP2eu4R1Y7tfpS2bf5RHv06Vi163CGoaIiTX99R3Bia9ycAH8Wz1+9PWv51dnBLur9jbShlgZ2QHLtUc4a/gfT7zZwULXuuxdLnvR21DDeMBaTbkgbai9uvAa7ne1liIgGFzbv+Bas4HDVrygxIxuAnP5Qgc3648IJkZ0QEXPF+O9f0n5+QIvGCxkAUVx+5K6KIs+SoeWXnAopELmoGSjUpFtJbagXK82HfdqpuUxT2Tnuef0/14SzVE/vNleBNu2ZbyrSAaah8tEBofkPJUBFY+YQcfZNM5Dgrw3i+Bpmpq/gpdg5w== diff --git a/tools/testing/selftests/net/ovpn/json/peer0-float.json b/tools/testing/selftests/net/ovpn/json/peer0-float.json new file mode 100644 index 000000000000..682fa58ad4ea --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer0-float.json @@ -0,0 +1,9 @@ +{"name": "peer-float-ntf", "msg": {"ifindex": 0, "peer": {"id": 1, "remote-ipv4": "10.10.1.3", "remote-port": 1}}} +{"name": "peer-float-ntf", "msg": {"ifindex": 0, "peer": {"id": 2, "remote-ipv4": "10.10.2.3", "remote-port": 1}}} +{"name": "peer-float-ntf", "msg": {"ifindex": 0, "peer": {"id": 3, "remote-ipv4": "10.10.3.3", "remote-port": 1}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 1}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 2}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 3}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 4}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 5}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 6}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer0-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer0-symm-float.json new file mode 120000 index 000000000000..e31a5bd59863 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer0-symm-float.json @@ -0,0 +1 @@ +peer0-float.json
\ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer0-symm.json b/tools/testing/selftests/net/ovpn/json/peer0-symm.json new file mode 120000 index 000000000000..57a163048eed --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer0-symm.json @@ -0,0 +1 @@ +peer0.json
\ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer0.json b/tools/testing/selftests/net/ovpn/json/peer0.json new file mode 100644 index 000000000000..7c46a33d5ecd --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer0.json @@ -0,0 +1,6 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 1}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 2}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 3}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 4}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 5}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 6}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer1-float.json b/tools/testing/selftests/net/ovpn/json/peer1-float.json new file mode 120000 index 000000000000..d28c328d1452 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer1-float.json @@ -0,0 +1 @@ +peer1.json
\ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer1-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer1-symm-float.json new file mode 120000 index 000000000000..b3615dcc523d --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer1-symm-float.json @@ -0,0 +1 @@ +peer1-symm.json
\ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer1-symm.json b/tools/testing/selftests/net/ovpn/json/peer1-symm.json new file mode 100644 index 000000000000..5da4ea9d51fb --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer1-symm.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 1}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer1.json b/tools/testing/selftests/net/ovpn/json/peer1.json new file mode 100644 index 000000000000..1009d26dc14a --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer1.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 10}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer2-float.json b/tools/testing/selftests/net/ovpn/json/peer2-float.json new file mode 120000 index 000000000000..b9f09980aaa0 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer2-float.json @@ -0,0 +1 @@ +peer2.json
\ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer2-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer2-symm-float.json new file mode 120000 index 000000000000..28a895cb5170 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer2-symm-float.json @@ -0,0 +1 @@ +peer2-symm.json
\ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer2-symm.json b/tools/testing/selftests/net/ovpn/json/peer2-symm.json new file mode 100644 index 000000000000..8f6db4f8c2ac --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer2-symm.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 2}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer2.json b/tools/testing/selftests/net/ovpn/json/peer2.json new file mode 100644 index 000000000000..44e9fad2b622 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer2.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 11}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer3-float.json b/tools/testing/selftests/net/ovpn/json/peer3-float.json new file mode 120000 index 000000000000..2700b55bcf2e --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer3-float.json @@ -0,0 +1 @@ +peer3.json
\ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer3-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer3-symm-float.json new file mode 120000 index 000000000000..ee8b9719c2fd --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer3-symm-float.json @@ -0,0 +1 @@ +peer3-symm.json
\ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer3-symm.json b/tools/testing/selftests/net/ovpn/json/peer3-symm.json new file mode 100644 index 000000000000..bdabd6fa2e64 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer3-symm.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 3}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer3.json b/tools/testing/selftests/net/ovpn/json/peer3.json new file mode 100644 index 000000000000..d4be8ba130ae --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer3.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 12}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer4-float.json b/tools/testing/selftests/net/ovpn/json/peer4-float.json new file mode 120000 index 000000000000..460f6c14cd60 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer4-float.json @@ -0,0 +1 @@ +peer4.json
\ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer4-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer4-symm-float.json new file mode 120000 index 000000000000..7d34ff7305da --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer4-symm-float.json @@ -0,0 +1 @@ +peer4-symm.json
\ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer4-symm.json b/tools/testing/selftests/net/ovpn/json/peer4-symm.json new file mode 100644 index 000000000000..c3734bb9251b --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer4-symm.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 4}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer4.json b/tools/testing/selftests/net/ovpn/json/peer4.json new file mode 100644 index 000000000000..67d27e2d48ac --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer4.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 13}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer5-float.json b/tools/testing/selftests/net/ovpn/json/peer5-float.json new file mode 120000 index 000000000000..0f725c50ce19 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer5-float.json @@ -0,0 +1 @@ +peer5.json
\ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer5-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer5-symm-float.json new file mode 120000 index 000000000000..afc0f5f9f13b --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer5-symm-float.json @@ -0,0 +1 @@ +peer5-symm.json
\ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer5-symm.json b/tools/testing/selftests/net/ovpn/json/peer5-symm.json new file mode 100644 index 000000000000..46c4a348299d --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer5-symm.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 5}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer5.json b/tools/testing/selftests/net/ovpn/json/peer5.json new file mode 100644 index 000000000000..ecd9bd0b2f37 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer5.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 14}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer6-float.json b/tools/testing/selftests/net/ovpn/json/peer6-float.json new file mode 120000 index 000000000000..4d9ded3e0a84 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer6-float.json @@ -0,0 +1 @@ +peer6.json
\ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer6-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer6-symm-float.json new file mode 120000 index 000000000000..e39203204d8c --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer6-symm-float.json @@ -0,0 +1 @@ +peer6-symm.json
\ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer6-symm.json b/tools/testing/selftests/net/ovpn/json/peer6-symm.json new file mode 100644 index 000000000000..aa30f2cff625 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer6-symm.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 6}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer6.json b/tools/testing/selftests/net/ovpn/json/peer6.json new file mode 100644 index 000000000000..7fded29c5804 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer6.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 15}}} diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c new file mode 100644 index 000000000000..d40953375c86 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c @@ -0,0 +1,2467 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel accelerator + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + */ + +#include <stdint.h> +#include <stdio.h> +#include <inttypes.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <time.h> + +#include <linux/ovpn.h> +#include <linux/types.h> +#include <linux/netlink.h> + +#include <netlink/socket.h> +#include <netlink/netlink.h> +#include <netlink/genl/genl.h> +#include <netlink/genl/family.h> +#include <netlink/genl/ctrl.h> + +#include <mbedtls/base64.h> +#include <mbedtls/error.h> + +#include <sys/socket.h> + +#include "kselftest.h" + +/* defines to make checkpatch happy */ +#define strscpy strncpy + +/* libnl < 3.5.0 does not set the NLA_F_NESTED on its own, therefore we + * have to explicitly do it to prevent the kernel from failing upon + * parsing of the message + */ +#define nla_nest_start(_msg, _type) \ + nla_nest_start(_msg, (_type) | NLA_F_NESTED) + +/* libnl < 3.11.0 does not implement nla_get_uint() */ +uint64_t ovpn_nla_get_uint(struct nlattr *attr) +{ + if (nla_len(attr) == sizeof(uint32_t)) + return nla_get_u32(attr); + else + return nla_get_u64(attr); +} + +typedef int (*ovpn_nl_cb)(struct nl_msg *msg, void *arg); + +enum ovpn_key_direction { + KEY_DIR_IN = 0, + KEY_DIR_OUT, +}; + +#define KEY_LEN (256 / 8) +#define NONCE_LEN 8 + +#define PEER_ID_UNDEF 0x00FFFFFF +#define MAX_PEERS 10 + +struct nl_ctx { + struct nl_sock *nl_sock; + struct nl_msg *nl_msg; + struct nl_cb *nl_cb; + + int ovpn_dco_id; +}; + +enum ovpn_cmd { + CMD_INVALID, + CMD_NEW_IFACE, + CMD_DEL_IFACE, + CMD_LISTEN, + CMD_CONNECT, + CMD_NEW_PEER, + CMD_NEW_MULTI_PEER, + CMD_SET_PEER, + CMD_DEL_PEER, + CMD_GET_PEER, + CMD_NEW_KEY, + CMD_DEL_KEY, + CMD_GET_KEY, + CMD_SWAP_KEYS, + CMD_LISTEN_MCAST, +}; + +struct ovpn_ctx { + enum ovpn_cmd cmd; + + __u8 key_enc[KEY_LEN]; + __u8 key_dec[KEY_LEN]; + __u8 nonce[NONCE_LEN]; + + enum ovpn_cipher_alg cipher; + + sa_family_t sa_family; + + unsigned long peer_id, tx_id; + unsigned long lport; + + union { + struct sockaddr_in in4; + struct sockaddr_in6 in6; + } remote; + + union { + struct sockaddr_in in4; + struct sockaddr_in6 in6; + } peer_ip; + + bool peer_ip_set; + + unsigned int ifindex; + char ifname[IFNAMSIZ]; + enum ovpn_mode mode; + bool mode_set; + + int socket; + int cli_sockets[MAX_PEERS]; + + __u32 keepalive_interval; + __u32 keepalive_timeout; + + enum ovpn_key_direction key_dir; + enum ovpn_key_slot key_slot; + int key_id; + + uint32_t mark; + bool asymm_id; + + const char *peers_file; +}; + +static int ovpn_nl_recvmsgs(struct nl_ctx *ctx) +{ + int ret; + + ret = nl_recvmsgs(ctx->nl_sock, ctx->nl_cb); + + switch (ret) { + case -NLE_INTR: + fprintf(stderr, + "netlink received interrupt due to signal - ignoring\n"); + break; + case -NLE_NOMEM: + fprintf(stderr, "netlink out of memory error\n"); + break; + case -NLE_AGAIN: + fprintf(stderr, + "netlink reports blocking read - aborting wait\n"); + break; + default: + if (ret) + fprintf(stderr, "netlink reports error (%d): %s\n", + ret, nl_geterror(-ret)); + break; + } + + return ret; +} + +static struct nl_ctx *nl_ctx_alloc_flags(struct ovpn_ctx *ovpn, int cmd, + int flags) +{ + struct nl_ctx *ctx; + int err, ret; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) + return NULL; + + ctx->nl_sock = nl_socket_alloc(); + if (!ctx->nl_sock) { + fprintf(stderr, "cannot allocate netlink socket\n"); + goto err_free; + } + + nl_socket_set_buffer_size(ctx->nl_sock, 8192, 8192); + + ret = genl_connect(ctx->nl_sock); + if (ret) { + fprintf(stderr, "cannot connect to generic netlink: %s\n", + nl_geterror(ret)); + goto err_sock; + } + + /* enable Extended ACK for detailed error reporting */ + err = 1; + setsockopt(nl_socket_get_fd(ctx->nl_sock), SOL_NETLINK, NETLINK_EXT_ACK, + &err, sizeof(err)); + + ctx->ovpn_dco_id = genl_ctrl_resolve(ctx->nl_sock, OVPN_FAMILY_NAME); + if (ctx->ovpn_dco_id < 0) { + fprintf(stderr, "cannot find ovpn_dco netlink component: %d\n", + ctx->ovpn_dco_id); + goto err_free; + } + + ctx->nl_msg = nlmsg_alloc(); + if (!ctx->nl_msg) { + fprintf(stderr, "cannot allocate netlink message\n"); + goto err_sock; + } + + ctx->nl_cb = nl_cb_alloc(NL_CB_DEFAULT); + if (!ctx->nl_cb) { + fprintf(stderr, "failed to allocate netlink callback\n"); + goto err_msg; + } + + nl_socket_set_cb(ctx->nl_sock, ctx->nl_cb); + + genlmsg_put(ctx->nl_msg, 0, 0, ctx->ovpn_dco_id, 0, flags, cmd, 0); + + if (ovpn->ifindex > 0) + NLA_PUT_U32(ctx->nl_msg, OVPN_A_IFINDEX, ovpn->ifindex); + + return ctx; +nla_put_failure: +err_msg: + nlmsg_free(ctx->nl_msg); +err_sock: + nl_socket_free(ctx->nl_sock); +err_free: + free(ctx); + return NULL; +} + +static struct nl_ctx *nl_ctx_alloc(struct ovpn_ctx *ovpn, int cmd) +{ + return nl_ctx_alloc_flags(ovpn, cmd, 0); +} + +static void nl_ctx_free(struct nl_ctx *ctx) +{ + if (!ctx) + return; + + nl_socket_free(ctx->nl_sock); + nlmsg_free(ctx->nl_msg); + nl_cb_put(ctx->nl_cb); + free(ctx); +} + +static int ovpn_nl_cb_error(struct sockaddr_nl (*nla)__always_unused, + struct nlmsgerr *err, void *arg) +{ + struct nlmsghdr *nlh = (struct nlmsghdr *)err - 1; + struct nlattr *tb_msg[NLMSGERR_ATTR_MAX + 1]; + int len = nlh->nlmsg_len; + struct nlattr *attrs; + int *ret = arg; + int ack_len = sizeof(*nlh) + sizeof(int) + sizeof(*nlh); + + *ret = err->error; + + if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) + return NL_STOP; + + if (!(nlh->nlmsg_flags & NLM_F_CAPPED)) + ack_len += err->msg.nlmsg_len - sizeof(*nlh); + + if (len <= ack_len) + return NL_STOP; + + attrs = (void *)((uint8_t *)nlh + ack_len); + len -= ack_len; + + nla_parse(tb_msg, NLMSGERR_ATTR_MAX, attrs, len, NULL); + if (tb_msg[NLMSGERR_ATTR_MSG]) { + len = strnlen((char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]), + nla_len(tb_msg[NLMSGERR_ATTR_MSG])); + fprintf(stderr, "kernel error: %*s\n", len, + (char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG])); + } + + if (tb_msg[NLMSGERR_ATTR_MISS_NEST]) { + fprintf(stderr, "missing required nesting type %u\n", + nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_NEST])); + } + + if (tb_msg[NLMSGERR_ATTR_MISS_TYPE]) { + fprintf(stderr, "missing required attribute type %u\n", + nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_TYPE])); + } + + return NL_STOP; +} + +static int ovpn_nl_cb_finish(struct nl_msg (*msg)__always_unused, + void *arg) +{ + int *status = arg; + + *status = 0; + return NL_SKIP; +} + +static int ovpn_nl_cb_ack(struct nl_msg (*msg)__always_unused, + void *arg) +{ + int *status = arg; + + *status = 0; + return NL_STOP; +} + +static int ovpn_nl_msg_send(struct nl_ctx *ctx, ovpn_nl_cb cb) +{ + int status = 1; + + nl_cb_err(ctx->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &status); + nl_cb_set(ctx->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish, + &status); + nl_cb_set(ctx->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_ack, &status); + + if (cb) + nl_cb_set(ctx->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, ctx); + + nl_send_auto_complete(ctx->nl_sock, ctx->nl_msg); + + while (status == 1) + ovpn_nl_recvmsgs(ctx); + + if (status < 0) + fprintf(stderr, "failed to send netlink message: %s (%d)\n", + strerror(-status), status); + + return status; +} + +static int ovpn_parse_key(const char *file, struct ovpn_ctx *ctx) +{ + int idx_enc, idx_dec, ret = -1; + unsigned char *ckey = NULL; + __u8 *bkey = NULL; + size_t olen = 0; + long ckey_len; + FILE *fp; + + fp = fopen(file, "r"); + if (!fp) { + fprintf(stderr, "cannot open: %s\n", file); + return -1; + } + + /* get file size */ + fseek(fp, 0L, SEEK_END); + ckey_len = ftell(fp); + rewind(fp); + + /* if the file is longer, let's just read a portion */ + if (ckey_len > 256) + ckey_len = 256; + + ckey = malloc(ckey_len); + if (!ckey) + goto err; + + ret = fread(ckey, 1, ckey_len, fp); + if (ret != ckey_len) { + fprintf(stderr, + "couldn't read enough data from key file: %dbytes read\n", + ret); + goto err; + } + + olen = 0; + ret = mbedtls_base64_decode(NULL, 0, &olen, ckey, ckey_len); + if (ret != MBEDTLS_ERR_BASE64_BUFFER_TOO_SMALL) { + char buf[256]; + + mbedtls_strerror(ret, buf, sizeof(buf)); + fprintf(stderr, "unexpected base64 error1: %s (%d)\n", buf, + ret); + + goto err; + } + + bkey = malloc(olen); + if (!bkey) { + fprintf(stderr, "cannot allocate binary key buffer\n"); + goto err; + } + + ret = mbedtls_base64_decode(bkey, olen, &olen, ckey, ckey_len); + if (ret) { + char buf[256]; + + mbedtls_strerror(ret, buf, sizeof(buf)); + fprintf(stderr, "unexpected base64 error2: %s (%d)\n", buf, + ret); + + goto err; + } + + if (olen < 2 * KEY_LEN + NONCE_LEN) { + fprintf(stderr, + "not enough data in key file, found %zdB but needs %dB\n", + olen, 2 * KEY_LEN + NONCE_LEN); + goto err; + } + + switch (ctx->key_dir) { + case KEY_DIR_IN: + idx_enc = 0; + idx_dec = 1; + break; + case KEY_DIR_OUT: + idx_enc = 1; + idx_dec = 0; + break; + default: + goto err; + } + + memcpy(ctx->key_enc, bkey + KEY_LEN * idx_enc, KEY_LEN); + memcpy(ctx->key_dec, bkey + KEY_LEN * idx_dec, KEY_LEN); + memcpy(ctx->nonce, bkey + 2 * KEY_LEN, NONCE_LEN); + + ret = 0; + +err: + fclose(fp); + free(bkey); + free(ckey); + + return ret; +} + +static int ovpn_parse_cipher(const char *cipher, struct ovpn_ctx *ctx) +{ + if (strcmp(cipher, "aes") == 0) + ctx->cipher = OVPN_CIPHER_ALG_AES_GCM; + else if (strcmp(cipher, "chachapoly") == 0) + ctx->cipher = OVPN_CIPHER_ALG_CHACHA20_POLY1305; + else if (strcmp(cipher, "none") == 0) + ctx->cipher = OVPN_CIPHER_ALG_NONE; + else + return -ENOTSUP; + + return 0; +} + +static int ovpn_parse_key_direction(const char *dir, struct ovpn_ctx *ctx) +{ + int in_dir; + + in_dir = strtoll(dir, NULL, 10); + switch (in_dir) { + case KEY_DIR_IN: + case KEY_DIR_OUT: + ctx->key_dir = in_dir; + break; + default: + fprintf(stderr, + "invalid key direction provided. Can be 0 or 1 only\n"); + return -1; + } + + return 0; +} + +static int ovpn_socket(struct ovpn_ctx *ctx, sa_family_t family, int proto) +{ + struct sockaddr_storage local_sock = { 0 }; + struct sockaddr_in6 *in6; + struct sockaddr_in *in; + int ret, s, sock_type; + size_t sock_len; + + if (proto == IPPROTO_UDP) + sock_type = SOCK_DGRAM; + else if (proto == IPPROTO_TCP) + sock_type = SOCK_STREAM; + else + return -EINVAL; + + s = socket(family, sock_type, 0); + if (s < 0) { + perror("cannot create socket"); + return -1; + } + + switch (family) { + case AF_INET: + in = (struct sockaddr_in *)&local_sock; + in->sin_family = family; + in->sin_port = htons(ctx->lport); + in->sin_addr.s_addr = htonl(INADDR_ANY); + sock_len = sizeof(*in); + break; + case AF_INET6: + in6 = (struct sockaddr_in6 *)&local_sock; + in6->sin6_family = family; + in6->sin6_port = htons(ctx->lport); + in6->sin6_addr = in6addr_any; + sock_len = sizeof(*in6); + break; + default: + return -1; + } + + int opt = 1; + + ret = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + + if (ret < 0) { + perror("setsockopt for SO_REUSEADDR"); + return ret; + } + + ret = setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); + if (ret < 0) { + perror("setsockopt for SO_REUSEPORT"); + return ret; + } + + if (ctx->mark != 0) { + ret = setsockopt(s, SOL_SOCKET, SO_MARK, (void *)&ctx->mark, + sizeof(ctx->mark)); + if (ret < 0) { + perror("setsockopt for SO_MARK"); + return ret; + } + } + + if (family == AF_INET6) { + opt = 0; + if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &opt, + sizeof(opt))) { + perror("failed to set IPV6_V6ONLY"); + return -1; + } + } + + ret = bind(s, (struct sockaddr *)&local_sock, sock_len); + if (ret < 0) { + perror("cannot bind socket"); + goto err_socket; + } + + ctx->socket = s; + ctx->sa_family = family; + return 0; + +err_socket: + close(s); + return -1; +} + +static int ovpn_udp_socket(struct ovpn_ctx *ctx, sa_family_t family) +{ + return ovpn_socket(ctx, family, IPPROTO_UDP); +} + +static int ovpn_listen(struct ovpn_ctx *ctx, sa_family_t family) +{ + int ret; + + ret = ovpn_socket(ctx, family, IPPROTO_TCP); + if (ret < 0) + return ret; + + ret = listen(ctx->socket, 10); + if (ret < 0) { + perror("listen"); + close(ctx->socket); + return -1; + } + + return 0; +} + +static int ovpn_accept(struct ovpn_ctx *ctx) +{ + socklen_t socklen; + int ret; + + socklen = sizeof(ctx->remote); + ret = accept(ctx->socket, (struct sockaddr *)&ctx->remote, &socklen); + if (ret < 0) { + perror("accept"); + goto err; + } + + fprintf(stderr, "Connection received!\n"); + + switch (socklen) { + case sizeof(struct sockaddr_in): + case sizeof(struct sockaddr_in6): + break; + default: + fprintf(stderr, "error: expecting IPv4 or IPv6 connection\n"); + close(ret); + ret = -EINVAL; + goto err; + } + + return ret; +err: + close(ctx->socket); + return ret; +} + +static int ovpn_connect(struct ovpn_ctx *ovpn) +{ + socklen_t socklen; + int s, ret; + + s = socket(ovpn->remote.in4.sin_family, SOCK_STREAM, 0); + if (s < 0) { + perror("cannot create socket"); + return -1; + } + + switch (ovpn->remote.in4.sin_family) { + case AF_INET: + socklen = sizeof(struct sockaddr_in); + break; + case AF_INET6: + socklen = sizeof(struct sockaddr_in6); + break; + default: + return -EOPNOTSUPP; + } + + ret = connect(s, (struct sockaddr *)&ovpn->remote, socklen); + if (ret < 0) { + perror("connect"); + goto err; + } + + fprintf(stderr, "connected\n"); + + ovpn->socket = s; + + return 0; +err: + close(s); + return ret; +} + +static int ovpn_new_peer(struct ovpn_ctx *ovpn, bool is_tcp) +{ + struct nlattr *attr; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_NEW); + if (!ctx) + return -ENOMEM; + + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + if (ovpn->asymm_id) + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_TX_ID, ovpn->tx_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_SOCKET, ovpn->socket); + + if (!is_tcp) { + switch (ovpn->remote.in4.sin_family) { + case AF_INET: + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV4, + ovpn->remote.in4.sin_addr.s_addr); + NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT, + ovpn->remote.in4.sin_port); + break; + case AF_INET6: + NLA_PUT(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV6, + sizeof(ovpn->remote.in6.sin6_addr), + &ovpn->remote.in6.sin6_addr); + NLA_PUT_U32(ctx->nl_msg, + OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID, + ovpn->remote.in6.sin6_scope_id); + NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT, + ovpn->remote.in6.sin6_port); + break; + default: + fprintf(stderr, + "Invalid family for remote socket address\n"); + goto nla_put_failure; + } + } + + if (ovpn->peer_ip_set) { + switch (ovpn->peer_ip.in4.sin_family) { + case AF_INET: + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_VPN_IPV4, + ovpn->peer_ip.in4.sin_addr.s_addr); + break; + case AF_INET6: + NLA_PUT(ctx->nl_msg, OVPN_A_PEER_VPN_IPV6, + sizeof(struct in6_addr), + &ovpn->peer_ip.in6.sin6_addr); + break; + default: + fprintf(stderr, "Invalid family for peer address\n"); + goto nla_put_failure; + } + } + + nla_nest_end(ctx->nl_msg, attr); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_set_peer(struct ovpn_ctx *ovpn) +{ + struct nlattr *attr; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_SET); + if (!ctx) + return -ENOMEM; + + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_INTERVAL, + ovpn->keepalive_interval); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_TIMEOUT, + ovpn->keepalive_timeout); + nla_nest_end(ctx->nl_msg, attr); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_del_peer(struct ovpn_ctx *ovpn) +{ + struct nlattr *attr; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_DEL); + if (!ctx) + return -ENOMEM; + + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + nla_nest_end(ctx->nl_msg, attr); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_handle_peer(struct nl_msg *msg, void (*arg)__always_unused) +{ + struct nlattr *pattrs[OVPN_A_PEER_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *attrs[OVPN_A_MAX + 1]; + __u16 rport = 0, lport = 0; + + nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!attrs[OVPN_A_PEER]) { + fprintf(stderr, "no packet content in netlink message\n"); + return NL_SKIP; + } + + nla_parse(pattrs, OVPN_A_PEER_MAX, nla_data(attrs[OVPN_A_PEER]), + nla_len(attrs[OVPN_A_PEER]), NULL); + + if (pattrs[OVPN_A_PEER_ID]) + fprintf(stderr, "* Peer %u\n", + nla_get_u32(pattrs[OVPN_A_PEER_ID])); + + if (pattrs[OVPN_A_PEER_TX_ID]) + fprintf(stderr, "\tTX peer ID %u\n", + nla_get_u32(pattrs[OVPN_A_PEER_TX_ID])); + + if (pattrs[OVPN_A_PEER_SOCKET_NETNSID]) + fprintf(stderr, "\tsocket NetNS ID: %d\n", + nla_get_s32(pattrs[OVPN_A_PEER_SOCKET_NETNSID])); + + if (pattrs[OVPN_A_PEER_VPN_IPV4]) { + char buf[INET_ADDRSTRLEN]; + + inet_ntop(AF_INET, nla_data(pattrs[OVPN_A_PEER_VPN_IPV4]), + buf, sizeof(buf)); + fprintf(stderr, "\tVPN IPv4: %s\n", buf); + } + + if (pattrs[OVPN_A_PEER_VPN_IPV6]) { + char buf[INET6_ADDRSTRLEN]; + + inet_ntop(AF_INET6, nla_data(pattrs[OVPN_A_PEER_VPN_IPV6]), + buf, sizeof(buf)); + fprintf(stderr, "\tVPN IPv6: %s\n", buf); + } + + if (pattrs[OVPN_A_PEER_LOCAL_PORT]) + lport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_LOCAL_PORT])); + + if (pattrs[OVPN_A_PEER_REMOTE_PORT]) + rport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_REMOTE_PORT])); + + if (pattrs[OVPN_A_PEER_REMOTE_IPV6]) { + void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV6]; + char buf[INET6_ADDRSTRLEN]; + int scope_id = -1; + + if (pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]) { + void *p = pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]; + + scope_id = nla_get_u32(p); + } + + inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf)); + fprintf(stderr, "\tRemote: %s:%hu (scope-id: %u)\n", buf, rport, + scope_id); + + if (pattrs[OVPN_A_PEER_LOCAL_IPV6]) { + void *ip = pattrs[OVPN_A_PEER_LOCAL_IPV6]; + + inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf)); + fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport); + } + } + + if (pattrs[OVPN_A_PEER_REMOTE_IPV4]) { + void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV4]; + char buf[INET_ADDRSTRLEN]; + + inet_ntop(AF_INET, nla_data(ip), buf, sizeof(buf)); + fprintf(stderr, "\tRemote: %s:%hu\n", buf, rport); + + if (pattrs[OVPN_A_PEER_LOCAL_IPV4]) { + void *p = pattrs[OVPN_A_PEER_LOCAL_IPV4]; + + inet_ntop(AF_INET, nla_data(p), buf, sizeof(buf)); + fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport); + } + } + + if (pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]) { + void *p = pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]; + + fprintf(stderr, "\tKeepalive interval: %u sec\n", + nla_get_u32(p)); + } + + if (pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]) + fprintf(stderr, "\tKeepalive timeout: %u sec\n", + nla_get_u32(pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT])); + + if (pattrs[OVPN_A_PEER_VPN_RX_BYTES]) + fprintf(stderr, "\tVPN RX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_BYTES])); + + if (pattrs[OVPN_A_PEER_VPN_TX_BYTES]) + fprintf(stderr, "\tVPN TX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_BYTES])); + + if (pattrs[OVPN_A_PEER_VPN_RX_PACKETS]) + fprintf(stderr, "\tVPN RX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_PACKETS])); + + if (pattrs[OVPN_A_PEER_VPN_TX_PACKETS]) + fprintf(stderr, "\tVPN TX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_PACKETS])); + + if (pattrs[OVPN_A_PEER_LINK_RX_BYTES]) + fprintf(stderr, "\tLINK RX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_BYTES])); + + if (pattrs[OVPN_A_PEER_LINK_TX_BYTES]) + fprintf(stderr, "\tLINK TX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_BYTES])); + + if (pattrs[OVPN_A_PEER_LINK_RX_PACKETS]) + fprintf(stderr, "\tLINK RX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_PACKETS])); + + if (pattrs[OVPN_A_PEER_LINK_TX_PACKETS]) + fprintf(stderr, "\tLINK TX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_PACKETS])); + + return NL_SKIP; +} + +static int ovpn_get_peer(struct ovpn_ctx *ovpn) +{ + int flags = 0, ret = -1; + struct nlattr *attr; + struct nl_ctx *ctx; + + if (ovpn->peer_id == PEER_ID_UNDEF) + flags = NLM_F_DUMP; + + ctx = nl_ctx_alloc_flags(ovpn, OVPN_CMD_PEER_GET, flags); + if (!ctx) + return -ENOMEM; + + if (ovpn->peer_id != PEER_ID_UNDEF) { + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + nla_nest_end(ctx->nl_msg, attr); + } + + ret = ovpn_nl_msg_send(ctx, ovpn_handle_peer); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_new_key(struct ovpn_ctx *ovpn) +{ + struct nlattr *keyconf, *key_dir; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_NEW); + if (!ctx) + return -ENOMEM; + + keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_KEY_ID, ovpn->key_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_CIPHER_ALG, ovpn->cipher); + + key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_ENCRYPT_DIR); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_enc); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce); + nla_nest_end(ctx->nl_msg, key_dir); + + key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_DECRYPT_DIR); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_dec); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce); + nla_nest_end(ctx->nl_msg, key_dir); + + nla_nest_end(ctx->nl_msg, keyconf); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_del_key(struct ovpn_ctx *ovpn) +{ + struct nlattr *keyconf; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_DEL); + if (!ctx) + return -ENOMEM; + + keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot); + nla_nest_end(ctx->nl_msg, keyconf); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_handle_key(struct nl_msg *msg, void (*arg)__always_unused) +{ + struct nlattr *kattrs[OVPN_A_KEYCONF_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *attrs[OVPN_A_MAX + 1]; + + nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!attrs[OVPN_A_KEYCONF]) { + fprintf(stderr, "no packet content in netlink message\n"); + return NL_SKIP; + } + + nla_parse(kattrs, OVPN_A_KEYCONF_MAX, nla_data(attrs[OVPN_A_KEYCONF]), + nla_len(attrs[OVPN_A_KEYCONF]), NULL); + + if (kattrs[OVPN_A_KEYCONF_PEER_ID]) + fprintf(stderr, "* Peer %u\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_PEER_ID])); + if (kattrs[OVPN_A_KEYCONF_SLOT]) { + fprintf(stderr, "\t- Slot: "); + switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT])) { + case OVPN_KEY_SLOT_PRIMARY: + fprintf(stderr, "primary\n"); + break; + case OVPN_KEY_SLOT_SECONDARY: + fprintf(stderr, "secondary\n"); + break; + default: + fprintf(stderr, "invalid (%u)\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT])); + break; + } + } + if (kattrs[OVPN_A_KEYCONF_KEY_ID]) + fprintf(stderr, "\t- Key ID: %u\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_KEY_ID])); + if (kattrs[OVPN_A_KEYCONF_CIPHER_ALG]) { + fprintf(stderr, "\t- Cipher: "); + switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG])) { + case OVPN_CIPHER_ALG_NONE: + fprintf(stderr, "none\n"); + break; + case OVPN_CIPHER_ALG_AES_GCM: + fprintf(stderr, "aes-gcm\n"); + break; + case OVPN_CIPHER_ALG_CHACHA20_POLY1305: + fprintf(stderr, "chacha20poly1305\n"); + break; + default: + fprintf(stderr, "invalid (%u)\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG])); + break; + } + } + + return NL_SKIP; +} + +static int ovpn_get_key(struct ovpn_ctx *ovpn) +{ + struct nlattr *keyconf; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_GET); + if (!ctx) + return -ENOMEM; + + keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot); + nla_nest_end(ctx->nl_msg, keyconf); + + ret = ovpn_nl_msg_send(ctx, ovpn_handle_key); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_swap_keys(struct ovpn_ctx *ovpn) +{ + struct nl_ctx *ctx; + struct nlattr *kc; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_SWAP); + if (!ctx) + return -ENOMEM; + + kc = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + nla_nest_end(ctx->nl_msg, kc); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +/* Helper function used to easily add attributes to a rtnl message */ +static int ovpn_addattr(struct nlmsghdr *n, int maxlen, int type, + const void *data, int alen) +{ + int len = RTA_LENGTH(alen); + struct rtattr *rta; + + if ((int)(NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len)) > maxlen) { + fprintf(stderr, "%s: rtnl: message exceeded bound of %d\n", + __func__, maxlen); + return -EMSGSIZE; + } + + rta = nlmsg_tail(n); + rta->rta_type = type; + rta->rta_len = len; + + if (!data) + memset(RTA_DATA(rta), 0, alen); + else + memcpy(RTA_DATA(rta), data, alen); + + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len); + + return 0; +} + +static struct rtattr *ovpn_nest_start(struct nlmsghdr *msg, size_t max_size, + int attr) +{ + struct rtattr *nest = nlmsg_tail(msg); + + if (ovpn_addattr(msg, max_size, attr, NULL, 0) < 0) + return NULL; + + return nest; +} + +static void ovpn_nest_end(struct nlmsghdr *msg, struct rtattr *nest) +{ + nest->rta_len = (uint8_t *)nlmsg_tail(msg) - (uint8_t *)nest; +} + +#define RT_SNDBUF_SIZE (1024 * 2) +#define RT_RCVBUF_SIZE (1024 * 4) + +/* Open RTNL socket */ +static int ovpn_rt_socket(void) +{ + int sndbuf = RT_SNDBUF_SIZE, rcvbuf = RT_RCVBUF_SIZE, fd; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd < 0) { + fprintf(stderr, "%s: cannot open netlink socket\n", __func__); + return fd; + } + + if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf, + sizeof(sndbuf)) < 0) { + fprintf(stderr, "%s: SO_SNDBUF\n", __func__); + close(fd); + return -1; + } + + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, + sizeof(rcvbuf)) < 0) { + fprintf(stderr, "%s: SO_RCVBUF\n", __func__); + close(fd); + return -1; + } + + return fd; +} + +/* Bind socket to Netlink subsystem */ +static int ovpn_rt_bind(int fd, uint32_t groups) +{ + struct sockaddr_nl local = { 0 }; + socklen_t addr_len; + + local.nl_family = AF_NETLINK; + local.nl_groups = groups; + + if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) { + fprintf(stderr, "%s: cannot bind netlink socket: %d\n", + __func__, errno); + return -errno; + } + + addr_len = sizeof(local); + if (getsockname(fd, (struct sockaddr *)&local, &addr_len) < 0) { + fprintf(stderr, "%s: cannot getsockname: %d\n", __func__, + errno); + return -errno; + } + + if (addr_len != sizeof(local)) { + fprintf(stderr, "%s: wrong address length %d\n", __func__, + addr_len); + return -EINVAL; + } + + if (local.nl_family != AF_NETLINK) { + fprintf(stderr, "%s: wrong address family %d\n", __func__, + local.nl_family); + return -EINVAL; + } + + return 0; +} + +typedef int (*ovpn_parse_reply_cb)(struct nlmsghdr *msg, void *arg); + +/* Send Netlink message and run callback on reply (if specified) */ +static int ovpn_rt_send(struct nlmsghdr *payload, pid_t peer, + unsigned int groups, ovpn_parse_reply_cb cb, + void *arg_cb) +{ + int len, rem_len, fd, ret, rcv_len; + struct sockaddr_nl nladdr = { 0 }; + struct nlmsgerr *err; + struct nlmsghdr *h; + char buf[1024 * 16]; + struct iovec iov = { + .iov_base = payload, + .iov_len = payload->nlmsg_len, + }; + struct msghdr nlmsg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + nladdr.nl_family = AF_NETLINK; + nladdr.nl_pid = peer; + nladdr.nl_groups = groups; + + payload->nlmsg_seq = time(NULL); + + /* no need to send reply */ + if (!cb) + payload->nlmsg_flags |= NLM_F_ACK; + + fd = ovpn_rt_socket(); + if (fd < 0) { + fprintf(stderr, "%s: can't open rtnl socket\n", __func__); + return -errno; + } + + ret = ovpn_rt_bind(fd, 0); + if (ret < 0) { + fprintf(stderr, "%s: can't bind rtnl socket\n", __func__); + ret = -errno; + goto out; + } + + ret = sendmsg(fd, &nlmsg, 0); + if (ret < 0) { + fprintf(stderr, "%s: rtnl: error on sendmsg()\n", __func__); + ret = -errno; + goto out; + } + + /* prepare buffer to store RTNL replies */ + memset(buf, 0, sizeof(buf)); + iov.iov_base = buf; + + while (1) { + /* + * iov_len is modified by recvmsg(), therefore has to be initialized before + * using it again + */ + iov.iov_len = sizeof(buf); + rcv_len = recvmsg(fd, &nlmsg, 0); + if (rcv_len < 0) { + if (errno == EINTR || errno == EAGAIN) { + fprintf(stderr, "%s: interrupted call\n", + __func__); + continue; + } + fprintf(stderr, "%s: rtnl: error on recvmsg()\n", + __func__); + ret = -errno; + goto out; + } + + if (rcv_len == 0) { + fprintf(stderr, + "%s: rtnl: socket reached unexpected EOF\n", + __func__); + ret = -EIO; + goto out; + } + + if (nlmsg.msg_namelen != sizeof(nladdr)) { + fprintf(stderr, + "%s: sender address length: %u (expected %zu)\n", + __func__, nlmsg.msg_namelen, sizeof(nladdr)); + ret = -EIO; + goto out; + } + + h = (struct nlmsghdr *)buf; + while (rcv_len >= (int)sizeof(*h)) { + len = h->nlmsg_len; + rem_len = len - sizeof(*h); + + if (rem_len < 0 || len > rcv_len) { + if (nlmsg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "%s: truncated message\n", + __func__); + ret = -EIO; + goto out; + } + fprintf(stderr, "%s: malformed message: len=%d\n", + __func__, len); + ret = -EIO; + goto out; + } + + if (h->nlmsg_type == NLMSG_DONE) { + ret = 0; + goto out; + } + + if (h->nlmsg_type == NLMSG_ERROR) { + err = (struct nlmsgerr *)NLMSG_DATA(h); + if (rem_len < (int)sizeof(struct nlmsgerr)) { + fprintf(stderr, "%s: ERROR truncated\n", + __func__); + ret = -EIO; + goto out; + } + + if (err->error) { + fprintf(stderr, "%s: (%d) %s\n", + __func__, err->error, + strerror(-err->error)); + ret = err->error; + goto out; + } + + ret = 0; + if (cb) { + int r = cb(h, arg_cb); + + if (r <= 0) + ret = r; + } + goto out; + } + + if (cb) { + int r = cb(h, arg_cb); + + if (r <= 0) { + ret = r; + goto out; + } + } else { + fprintf(stderr, "%s: RTNL: unexpected reply\n", + __func__); + } + + rcv_len -= NLMSG_ALIGN(len); + h = (struct nlmsghdr *)((uint8_t *)h + + NLMSG_ALIGN(len)); + } + + if (nlmsg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "%s: message truncated\n", __func__); + continue; + } + + if (rcv_len) { + fprintf(stderr, "%s: rtnl: %d not parsed bytes\n", + __func__, rcv_len); + ret = -1; + goto out; + } + } +out: + close(fd); + + return ret; +} + +struct ovpn_link_req { + struct nlmsghdr n; + struct ifinfomsg i; + char buf[256]; +}; + +static int ovpn_new_iface(struct ovpn_ctx *ovpn) +{ + struct rtattr *linkinfo, *data; + struct ovpn_link_req req = { 0 }; + int ret = -1; + + fprintf(stdout, "Creating interface %s with mode %u\n", ovpn->ifname, + ovpn->mode); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i)); + req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + req.n.nlmsg_type = RTM_NEWLINK; + + if (ovpn_addattr(&req.n, sizeof(req), IFLA_IFNAME, ovpn->ifname, + strlen(ovpn->ifname) + 1) < 0) + goto err; + + linkinfo = ovpn_nest_start(&req.n, sizeof(req), IFLA_LINKINFO); + if (!linkinfo) + goto err; + + if (ovpn_addattr(&req.n, sizeof(req), IFLA_INFO_KIND, OVPN_FAMILY_NAME, + strlen(OVPN_FAMILY_NAME) + 1) < 0) + goto err; + + if (ovpn->mode_set) { + data = ovpn_nest_start(&req.n, sizeof(req), IFLA_INFO_DATA); + if (!data) + goto err; + + if (ovpn_addattr(&req.n, sizeof(req), IFLA_OVPN_MODE, + &ovpn->mode, sizeof(uint8_t)) < 0) + goto err; + + ovpn_nest_end(&req.n, data); + } + + ovpn_nest_end(&req.n, linkinfo); + + req.i.ifi_family = AF_PACKET; + + ret = ovpn_rt_send(&req.n, 0, 0, NULL, NULL); +err: + return ret; +} + +static int ovpn_del_iface(struct ovpn_ctx *ovpn) +{ + struct ovpn_link_req req = { 0 }; + + fprintf(stdout, "Deleting interface %s ifindex %u\n", ovpn->ifname, + ovpn->ifindex); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = RTM_DELLINK; + + req.i.ifi_family = AF_PACKET; + req.i.ifi_index = ovpn->ifindex; + + return ovpn_rt_send(&req.n, 0, 0, NULL, NULL); +} + +static int nl_seq_check(struct nl_msg (*msg)__always_unused, + void (*arg)__always_unused) +{ + return NL_OK; +} + +struct mcast_handler_args { + const char *group; + int id; +}; + +static int mcast_family_handler(struct nl_msg *msg, void *arg) +{ + struct mcast_handler_args *grp = arg; + struct nlattr *tb[CTRL_ATTR_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *mcgrp; + int rem_mcgrp; + + nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!tb[CTRL_ATTR_MCAST_GROUPS]) + return NL_SKIP; + + nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) { + struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1]; + + nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX, + nla_data(mcgrp), nla_len(mcgrp), NULL); + + if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] || + !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]) + continue; + if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]), + grp->group, nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]))) + continue; + grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]); + break; + } + + return NL_SKIP; +} + +static int mcast_error_handler(struct sockaddr_nl (*nla)__always_unused, + struct nlmsgerr *err, void *arg) +{ + int *ret = arg; + + *ret = err->error; + return NL_STOP; +} + +static int mcast_ack_handler(struct nl_msg (*msg)__always_unused, void *arg) +{ + int *ret = arg; + + *ret = 0; + return NL_STOP; +} + +static int ovpn_handle_msg(struct nl_msg *msg, void *arg) +{ + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *attrs[OVPN_A_MAX + 1]; + struct nlmsghdr *nlh = nlmsg_hdr(msg); + char ifname[IF_NAMESIZE]; + int *ret = arg; + __u32 ifindex; + + fprintf(stderr, "received message from ovpn-dco\n"); + + *ret = -1; + + if (!genlmsg_valid_hdr(nlh, 0)) { + fprintf(stderr, "invalid header\n"); + return NL_STOP; + } + + if (nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL)) { + fprintf(stderr, "received bogus data from ovpn-dco\n"); + return NL_STOP; + } + + if (!attrs[OVPN_A_IFINDEX]) { + fprintf(stderr, "no ifindex in this message\n"); + return NL_STOP; + } + + ifindex = nla_get_u32(attrs[OVPN_A_IFINDEX]); + if (!if_indextoname(ifindex, ifname)) { + fprintf(stderr, "cannot resolve ifname for ifindex: %u\n", + ifindex); + return NL_STOP; + } + + switch (gnlh->cmd) { + case OVPN_CMD_PEER_DEL_NTF: + fprintf(stdout, "received CMD_PEER_DEL_NTF\n"); + break; + case OVPN_CMD_PEER_FLOAT_NTF: + fprintf(stdout, "received CMD_PEER_FLOAT_NTF\n"); + break; + case OVPN_CMD_KEY_SWAP_NTF: + fprintf(stdout, "received CMD_KEY_SWAP_NTF\n"); + break; + default: + fprintf(stderr, "received unknown command: %d\n", gnlh->cmd); + return NL_STOP; + } + + *ret = 0; + return NL_OK; +} + +static int ovpn_get_mcast_id(struct nl_sock *sock, const char *family, + const char *group) +{ + struct nl_msg *msg; + struct nl_cb *cb; + int ret, ctrlid; + struct mcast_handler_args grp = { + .group = group, + .id = -ENOENT, + }; + + msg = nlmsg_alloc(); + if (!msg) + return -ENOMEM; + + cb = nl_cb_alloc(NL_CB_DEFAULT); + if (!cb) { + ret = -ENOMEM; + goto out_fail_cb; + } + + ctrlid = genl_ctrl_resolve(sock, "nlctrl"); + + genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); + + ret = -ENOBUFS; + NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family); + + ret = nl_send_auto_complete(sock, msg); + if (ret < 0) + goto nla_put_failure; + + ret = 1; + + nl_cb_err(cb, NL_CB_CUSTOM, mcast_error_handler, &ret); + nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, mcast_ack_handler, &ret); + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, mcast_family_handler, &grp); + + while (ret > 0) + nl_recvmsgs(sock, cb); + + if (ret == 0) + ret = grp.id; + nla_put_failure: + nl_cb_put(cb); + out_fail_cb: + nlmsg_free(msg); + return ret; +} + +static int ovpn_listen_mcast(void) +{ + struct nl_sock *sock; + struct nl_cb *cb; + int mcid, ret; + + sock = nl_socket_alloc(); + if (!sock) { + fprintf(stderr, "cannot allocate netlink socket\n"); + ret = -ENOMEM; + goto err_free; + } + + nl_socket_set_buffer_size(sock, 8192, 8192); + + ret = genl_connect(sock); + if (ret < 0) { + fprintf(stderr, "cannot connect to generic netlink: %s\n", + nl_geterror(ret)); + goto err_free; + } + + mcid = ovpn_get_mcast_id(sock, OVPN_FAMILY_NAME, OVPN_MCGRP_PEERS); + if (mcid < 0) { + fprintf(stderr, "cannot get mcast group: %s\n", + nl_geterror(mcid)); + goto err_free; + } + + ret = nl_socket_add_membership(sock, mcid); + if (ret) { + fprintf(stderr, "failed to join mcast group: %d\n", ret); + goto err_free; + } + + ret = 1; + cb = nl_cb_alloc(NL_CB_DEFAULT); + nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check, NULL); + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, &ret); + nl_cb_err(cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &ret); + + while (ret == 1) { + int err = nl_recvmsgs(sock, cb); + + if (err < 0) { + fprintf(stderr, + "cannot receive netlink message: (%d) %s\n", + err, nl_geterror(-err)); + ret = -1; + break; + } + } + + nl_cb_put(cb); +err_free: + nl_socket_free(sock); + return ret; +} + +static void usage(const char *cmd) +{ + fprintf(stderr, + "Usage %s <command> <iface> [arguments..]\n", + cmd); + fprintf(stderr, "where <command> can be one of the following\n\n"); + + fprintf(stderr, "* new_iface <iface> [mode]: create new ovpn interface\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tmode:\n"); + fprintf(stderr, "\t\t- P2P for peer-to-peer mode (i.e. client)\n"); + fprintf(stderr, "\t\t- MP for multi-peer mode (i.e. server)\n"); + + fprintf(stderr, "* del_iface <iface>: delete ovpn interface\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + + fprintf(stderr, + "* listen <iface> <lport> <id_type> <peers_file> [ipv6]: listen for incoming peer TCP connections\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tlport: TCP port to listen to\n"); + fprintf(stderr, "\tid_type:\n"); + fprintf(stderr, + "\t\t- SYMM for ignoring the TX peer ID from the peers_file\n"); + fprintf(stderr, + "\t\t- ASYMM for using the TX peer ID from the peers_file\n"); + fprintf(stderr, + "\tpeers_file: file containing one peer per line: Line format:\n"); + fprintf(stderr, "\t\t<peer_id> <tx_id> <vpnaddr>\n"); + fprintf(stderr, + "\tipv6: whether the socket should listen to the IPv6 wildcard address\n"); + + fprintf(stderr, + "* connect <iface> <peer_id> <tx_id> <raddr> <rport> [key_file]: start connecting peer of TCP-based VPN session\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, + "\tpeer_id: peer ID found in data packets received from this peer\n"); + fprintf(stderr, + "\ttx_id: peer ID to be used when sending to this peer, 'none' for symmetric peer ID\n"); + fprintf(stderr, "\traddr: peer IP address to connect to\n"); + fprintf(stderr, "\trport: peer TCP port to connect to\n"); + fprintf(stderr, + "\tkey_file: file containing the symmetric key for encryption\n"); + + fprintf(stderr, + "* new_peer <iface> <peer_id> <tx_id> <lport> <raddr> <rport> [vpnaddr]: add new peer\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, + "\tpeer_id: peer ID found in data packets received from this peer\n"); + fprintf(stderr, + "\ttx_id: peer ID to be used when sending to this peer, 'none' for symmetric peer ID\n"); + fprintf(stderr, "\tlport: local UDP port to bind to\n"); + fprintf(stderr, "\traddr: peer IP address\n"); + fprintf(stderr, "\trport: peer UDP port\n"); + fprintf(stderr, "\tvpnaddr: peer VPN IP\n"); + + fprintf(stderr, + "* new_multi_peer <iface> <lport> <id_type> <peers_file> [mark]: add multiple peers as listed in the file\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tlport: local UDP port to bind to\n"); + fprintf(stderr, "\tid_type:\n"); + fprintf(stderr, + "\t\t- SYMM for ignoring the TX peer ID from the peers_file\n"); + fprintf(stderr, + "\t\t- ASYMM for using the TX peer ID from the peers_file\n"); + fprintf(stderr, + "\tpeers_file: text file containing one peer per line. Line format:\n"); + fprintf(stderr, + "\t\t<peer_id> <tx_id> <raddr> <rport> <laddr> <lport> <vpnaddr>\n"); + fprintf(stderr, "\tmark: socket FW mark value\n"); + + fprintf(stderr, + "* set_peer <iface> <peer_id> <keepalive_interval> <keepalive_timeout>: set peer attributes\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n"); + fprintf(stderr, + "\tkeepalive_interval: interval for sending ping messages\n"); + fprintf(stderr, + "\tkeepalive_timeout: time after which a peer is timed out\n"); + + fprintf(stderr, "* del_peer <iface> <peer_id>: delete peer\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to delete\n"); + + fprintf(stderr, "* get_peer <iface> [peer_id]: retrieve peer(s) status\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, + "\tpeer_id: peer ID of the peer to query. All peers are returned if omitted\n"); + + fprintf(stderr, + "* new_key <iface> <peer_id> <slot> <key_id> <cipher> <key_dir> <key_file>: set data channel key\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, + "\tpeer_id: peer ID of the peer to configure the key for\n"); + fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n"); + fprintf(stderr, "\tkey_id: an ID from 0 to 7\n"); + fprintf(stderr, + "\tcipher: cipher to use, supported: aes (AES-GCM), chachapoly (CHACHA20POLY1305)\n"); + fprintf(stderr, + "\tkey_dir: key direction, must 0 on one host and 1 on the other\n"); + fprintf(stderr, "\tkey_file: file containing the pre-shared key\n"); + + fprintf(stderr, + "* del_key <iface> <peer_id> [slot]: erase existing data channel key\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n"); + fprintf(stderr, "\tslot: slot to erase. PRIMARY if omitted\n"); + + fprintf(stderr, + "* get_key <iface> <peer_id> <slot>: retrieve non sensible key data\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to query\n"); + fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n"); + + fprintf(stderr, + "* swap_keys <iface> <peer_id>: swap content of primary and secondary key slots\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n"); + + fprintf(stderr, + "* listen_mcast: listen to ovpn netlink multicast messages\n"); +} + +static int ovpn_parse_remote(struct ovpn_ctx *ovpn, const char *host, + const char *service, const char *vpnip) +{ + int ret; + struct addrinfo *result; + struct addrinfo hints = { + .ai_family = ovpn->sa_family, + .ai_socktype = SOCK_DGRAM, + .ai_protocol = IPPROTO_UDP + }; + + if (host) { + ret = getaddrinfo(host, service, &hints, &result); + if (ret) { + fprintf(stderr, "getaddrinfo on remote error: %s\n", + gai_strerror(ret)); + return -1; + } + + if (!(result->ai_family == AF_INET && + result->ai_addrlen == sizeof(struct sockaddr_in)) && + !(result->ai_family == AF_INET6 && + result->ai_addrlen == sizeof(struct sockaddr_in6))) { + ret = -EINVAL; + goto out; + } + + memcpy(&ovpn->remote, result->ai_addr, result->ai_addrlen); + } + + if (vpnip) { + ret = getaddrinfo(vpnip, NULL, &hints, &result); + if (ret) { + fprintf(stderr, "getaddrinfo on vpnip error: %s\n", + gai_strerror(ret)); + return -1; + } + + if (!(result->ai_family == AF_INET && + result->ai_addrlen == sizeof(struct sockaddr_in)) && + !(result->ai_family == AF_INET6 && + result->ai_addrlen == sizeof(struct sockaddr_in6))) { + ret = -EINVAL; + goto out; + } + + memcpy(&ovpn->peer_ip, result->ai_addr, result->ai_addrlen); + ovpn->sa_family = result->ai_family; + + ovpn->peer_ip_set = true; + } + + ret = 0; +out: + freeaddrinfo(result); + return ret; +} + +static int ovpn_parse_new_peer(struct ovpn_ctx *ovpn, const char *peer_id, + const char *tx_id, const char *raddr, + const char *rport, const char *vpnip) +{ + ovpn->peer_id = strtoul(peer_id, NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "rx peer ID value out of range\n"); + return -1; + } + + if (ovpn->asymm_id) { + ovpn->tx_id = strtoul(tx_id, NULL, 10); + if (errno == ERANGE || ovpn->tx_id > PEER_ID_UNDEF) { + fprintf(stderr, "tx peer ID value out of range\n"); + return -1; + } + } + + return ovpn_parse_remote(ovpn, raddr, rport, vpnip); +} + +static int ovpn_parse_key_slot(const char *arg, struct ovpn_ctx *ovpn) +{ + int slot = strtoul(arg, NULL, 10); + + if (errno == ERANGE || slot < 1 || slot > 2) { + fprintf(stderr, "key slot out of range\n"); + return -1; + } + + switch (slot) { + case 1: + ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY; + break; + case 2: + ovpn->key_slot = OVPN_KEY_SLOT_SECONDARY; + break; + } + + return 0; +} + +static int ovpn_send_tcp_data(int socket) +{ + uint16_t len = htons(1000); + uint8_t buf[1002]; + int ret; + + memcpy(buf, &len, sizeof(len)); + memset(buf + sizeof(len), 0x86, sizeof(buf) - sizeof(len)); + + ret = send(socket, buf, sizeof(buf), MSG_NOSIGNAL); + + fprintf(stdout, "Sent %u bytes over TCP socket\n", ret); + + return ret > 0 ? 0 : ret; +} + +static int ovpn_recv_tcp_data(int socket) +{ + uint8_t buf[1002]; + uint16_t len; + int ret; + + ret = recv(socket, buf, sizeof(buf), MSG_NOSIGNAL); + + if (ret < 2) { + fprintf(stderr, ">>>> Error while reading TCP data: %d\n", ret); + return ret; + } + + memcpy(&len, buf, sizeof(len)); + len = ntohs(len); + + fprintf(stdout, ">>>> Received %u bytes over TCP socket, header: %u\n", + ret, len); + + return 0; +} + +static enum ovpn_cmd ovpn_parse_cmd(const char *cmd) +{ + if (!strcmp(cmd, "new_iface")) + return CMD_NEW_IFACE; + + if (!strcmp(cmd, "del_iface")) + return CMD_DEL_IFACE; + + if (!strcmp(cmd, "listen")) + return CMD_LISTEN; + + if (!strcmp(cmd, "connect")) + return CMD_CONNECT; + + if (!strcmp(cmd, "new_peer")) + return CMD_NEW_PEER; + + if (!strcmp(cmd, "new_multi_peer")) + return CMD_NEW_MULTI_PEER; + + if (!strcmp(cmd, "set_peer")) + return CMD_SET_PEER; + + if (!strcmp(cmd, "del_peer")) + return CMD_DEL_PEER; + + if (!strcmp(cmd, "get_peer")) + return CMD_GET_PEER; + + if (!strcmp(cmd, "new_key")) + return CMD_NEW_KEY; + + if (!strcmp(cmd, "del_key")) + return CMD_DEL_KEY; + + if (!strcmp(cmd, "get_key")) + return CMD_GET_KEY; + + if (!strcmp(cmd, "swap_keys")) + return CMD_SWAP_KEYS; + + if (!strcmp(cmd, "listen_mcast")) + return CMD_LISTEN_MCAST; + + return CMD_INVALID; +} + +/* Send process to background and waits for signal. + * + * This helper is called at the end of commands + * creating sockets, so that the latter stay alive + * along with the process that created them. + * + * A signal is expected to be delivered in order to + * terminate the waiting processes + */ +static void ovpn_waitbg(void) +{ + daemon(1, 1); + pause(); +} + +static int ovpn_run_cmd(struct ovpn_ctx *ovpn) +{ + char peer_id[10], tx_id[10], vpnip[INET6_ADDRSTRLEN], laddr[128]; + char lport[10], raddr[128], rport[10]; + int n, ret; + FILE *fp; + + switch (ovpn->cmd) { + case CMD_NEW_IFACE: + ret = ovpn_new_iface(ovpn); + break; + case CMD_DEL_IFACE: + ret = ovpn_del_iface(ovpn); + break; + case CMD_LISTEN: + ret = ovpn_listen(ovpn, ovpn->sa_family); + if (ret < 0) { + fprintf(stderr, "cannot listen on TCP socket\n"); + return ret; + } + + fp = fopen(ovpn->peers_file, "r"); + if (!fp) { + fprintf(stderr, "cannot open file: %s\n", + ovpn->peers_file); + return -1; + } + + int num_peers = 0; + + while ((n = fscanf(fp, "%s %s %s\n", peer_id, tx_id, + vpnip)) == 3) { + struct ovpn_ctx peer_ctx = { 0 }; + + if (num_peers == MAX_PEERS) { + fprintf(stderr, "max peers reached!\n"); + return -E2BIG; + } + + peer_ctx.ifindex = ovpn->ifindex; + peer_ctx.sa_family = ovpn->sa_family; + peer_ctx.asymm_id = ovpn->asymm_id; + + peer_ctx.socket = ovpn_accept(ovpn); + if (peer_ctx.socket < 0) { + fprintf(stderr, "cannot accept connection!\n"); + return -1; + } + + /* store peer sockets to test TCP I/O */ + ovpn->cli_sockets[num_peers] = peer_ctx.socket; + + ret = ovpn_parse_new_peer(&peer_ctx, peer_id, tx_id, + NULL, NULL, vpnip); + if (ret < 0) { + fprintf(stderr, "error while parsing line\n"); + return -1; + } + + ret = ovpn_new_peer(&peer_ctx, true); + if (ret < 0) { + fprintf(stderr, + "cannot add peer to VPN: %s %s\n", + peer_id, vpnip); + return ret; + } + num_peers++; + } + + for (int i = 0; i < num_peers; i++) { + ret = ovpn_recv_tcp_data(ovpn->cli_sockets[i]); + if (ret < 0) + break; + } + ovpn_waitbg(); + break; + case CMD_CONNECT: + ret = ovpn_connect(ovpn); + if (ret < 0) { + fprintf(stderr, "cannot connect TCP socket\n"); + return ret; + } + + ret = ovpn_new_peer(ovpn, true); + if (ret < 0) { + fprintf(stderr, "cannot add peer to VPN\n"); + close(ovpn->socket); + return ret; + } + + if (ovpn->cipher != OVPN_CIPHER_ALG_NONE) { + ret = ovpn_new_key(ovpn); + if (ret < 0) { + fprintf(stderr, "cannot set key\n"); + return ret; + } + } + + ret = ovpn_send_tcp_data(ovpn->socket); + ovpn_waitbg(); + break; + case CMD_NEW_PEER: + ret = ovpn_udp_socket(ovpn, AF_INET6); + if (ret < 0) + return ret; + + ret = ovpn_new_peer(ovpn, false); + ovpn_waitbg(); + break; + case CMD_NEW_MULTI_PEER: + ret = ovpn_udp_socket(ovpn, AF_INET6); + if (ret < 0) + return ret; + + fp = fopen(ovpn->peers_file, "r"); + if (!fp) { + fprintf(stderr, "cannot open file: %s\n", + ovpn->peers_file); + return -1; + } + + while ((n = fscanf(fp, "%s %s %s %s %s %s %s\n", peer_id, tx_id, + laddr, lport, raddr, rport, vpnip)) == 7) { + struct ovpn_ctx peer_ctx = { 0 }; + + peer_ctx.ifindex = ovpn->ifindex; + peer_ctx.socket = ovpn->socket; + peer_ctx.sa_family = AF_UNSPEC; + peer_ctx.asymm_id = ovpn->asymm_id; + + ret = ovpn_parse_new_peer(&peer_ctx, peer_id, tx_id, + raddr, rport, vpnip); + if (ret < 0) { + fprintf(stderr, "error while parsing line\n"); + return -1; + } + + ret = ovpn_new_peer(&peer_ctx, false); + if (ret < 0) { + fprintf(stderr, + "cannot add peer to VPN: %s %s %s %s\n", + peer_id, raddr, rport, vpnip); + return ret; + } + } + ovpn_waitbg(); + break; + case CMD_SET_PEER: + ret = ovpn_set_peer(ovpn); + break; + case CMD_DEL_PEER: + ret = ovpn_del_peer(ovpn); + break; + case CMD_GET_PEER: + if (ovpn->peer_id == PEER_ID_UNDEF) + fprintf(stderr, "List of peers connected to: %s\n", + ovpn->ifname); + + ret = ovpn_get_peer(ovpn); + break; + case CMD_NEW_KEY: + ret = ovpn_new_key(ovpn); + break; + case CMD_DEL_KEY: + ret = ovpn_del_key(ovpn); + break; + case CMD_GET_KEY: + ret = ovpn_get_key(ovpn); + break; + case CMD_SWAP_KEYS: + ret = ovpn_swap_keys(ovpn); + break; + case CMD_LISTEN_MCAST: + ret = ovpn_listen_mcast(); + break; + case CMD_INVALID: + ret = -EINVAL; + break; + } + + return ret; +} + +static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[]) +{ + int ret; + + /* no args required for LISTEN_MCAST */ + if (ovpn->cmd == CMD_LISTEN_MCAST) + return 0; + + /* all commands need an ifname */ + if (argc < 3) + return -EINVAL; + + strscpy(ovpn->ifname, argv[2], IFNAMSIZ - 1); + ovpn->ifname[IFNAMSIZ - 1] = '\0'; + + /* all commands, except NEW_IFNAME, needs an ifindex */ + if (ovpn->cmd != CMD_NEW_IFACE) { + ovpn->ifindex = if_nametoindex(ovpn->ifname); + if (!ovpn->ifindex) { + fprintf(stderr, "cannot find interface: %s\n", + strerror(errno)); + return -1; + } + } + + switch (ovpn->cmd) { + case CMD_NEW_IFACE: + if (argc < 4) + break; + + if (!strcmp(argv[3], "P2P")) { + ovpn->mode = OVPN_MODE_P2P; + } else if (!strcmp(argv[3], "MP")) { + ovpn->mode = OVPN_MODE_MP; + } else { + fprintf(stderr, "Cannot parse iface mode: %s\n", + argv[3]); + return -1; + } + ovpn->mode_set = true; + break; + case CMD_DEL_IFACE: + break; + case CMD_LISTEN: + if (argc < 6) + return -EINVAL; + + ovpn->lport = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->lport > 65535) { + fprintf(stderr, "lport value out of range\n"); + return -1; + } + + if (strcmp(argv[4], "SYMM") == 0) { + ovpn->asymm_id = false; + } else if (strcmp(argv[4], "ASYMM") == 0) { + ovpn->asymm_id = true; + } else { + fprintf(stderr, "Cannot parse id type: %s\n", argv[4]); + return -1; + } + + ovpn->peers_file = argv[5]; + + ovpn->sa_family = AF_INET; + if (argc > 6 && !strcmp(argv[6], "ipv6")) + ovpn->sa_family = AF_INET6; + break; + case CMD_CONNECT: + if (argc < 7) + return -EINVAL; + + ovpn->sa_family = AF_INET; + ovpn->asymm_id = strcmp(argv[4], "none"); + + ret = ovpn_parse_new_peer(ovpn, argv[3], argv[4], argv[5], + argv[6], NULL); + if (ret < 0) { + fprintf(stderr, "Cannot parse remote peer data\n"); + return -1; + } + + if (argc > 7) { + ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY; + ovpn->key_id = 0; + ovpn->cipher = OVPN_CIPHER_ALG_AES_GCM; + ovpn->key_dir = KEY_DIR_OUT; + + ret = ovpn_parse_key(argv[7], ovpn); + if (ret) + return -1; + } + break; + case CMD_NEW_PEER: + if (argc < 8) + return -EINVAL; + + ovpn->asymm_id = strcmp(argv[4], "none"); + + ovpn->lport = strtoul(argv[5], NULL, 10); + if (errno == ERANGE || ovpn->lport > 65535) { + fprintf(stderr, "lport value out of range\n"); + return -1; + } + + const char *vpnip = (argc > 8) ? argv[8] : NULL; + + ret = ovpn_parse_new_peer(ovpn, argv[3], argv[4], argv[6], + argv[7], vpnip); + if (ret < 0) + return -1; + break; + case CMD_NEW_MULTI_PEER: + if (argc < 6) + return -EINVAL; + + ovpn->lport = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->lport > 65535) { + fprintf(stderr, "lport value out of range\n"); + return -1; + } + + if (!strcmp(argv[4], "SYMM")) { + ovpn->asymm_id = false; + } else if (!strcmp(argv[4], "ASYMM")) { + ovpn->asymm_id = true; + } else { + fprintf(stderr, "Cannot parse id type: %s\n", argv[4]); + return -1; + } + + ovpn->peers_file = argv[5]; + + ovpn->mark = 0; + if (argc > 6) { + ovpn->mark = strtoul(argv[6], NULL, 10); + if (errno == ERANGE || ovpn->mark > UINT32_MAX) { + fprintf(stderr, "mark value out of range\n"); + return -1; + } + } + break; + case CMD_SET_PEER: + if (argc < 6) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ovpn->keepalive_interval = strtoul(argv[4], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, + "keepalive interval value out of range\n"); + return -1; + } + + ovpn->keepalive_timeout = strtoul(argv[5], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, + "keepalive interval value out of range\n"); + return -1; + } + break; + case CMD_DEL_PEER: + if (argc < 4) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + break; + case CMD_GET_PEER: + ovpn->peer_id = PEER_ID_UNDEF; + if (argc > 3) { + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + } + break; + case CMD_NEW_KEY: + if (argc < 9) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ret = ovpn_parse_key_slot(argv[4], ovpn); + if (ret) + return -1; + + ovpn->key_id = strtoul(argv[5], NULL, 10); + if (errno == ERANGE || ovpn->key_id > 2) { + fprintf(stderr, "key ID out of range\n"); + return -1; + } + + ret = ovpn_parse_cipher(argv[6], ovpn); + if (ret < 0) + return -1; + + ret = ovpn_parse_key_direction(argv[7], ovpn); + if (ret < 0) + return -1; + + ret = ovpn_parse_key(argv[8], ovpn); + if (ret) + return -1; + break; + case CMD_DEL_KEY: + if (argc < 4) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ret = ovpn_parse_key_slot(argv[4], ovpn); + if (ret) + return ret; + break; + case CMD_GET_KEY: + if (argc < 5) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ret = ovpn_parse_key_slot(argv[4], ovpn); + if (ret) + return ret; + break; + case CMD_SWAP_KEYS: + if (argc < 4) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + break; + case CMD_LISTEN_MCAST: + break; + case CMD_INVALID: + break; + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + struct ovpn_ctx ovpn; + int ret; + + if (argc < 2) { + usage(argv[0]); + return -1; + } + + memset(&ovpn, 0, sizeof(ovpn)); + ovpn.sa_family = AF_UNSPEC; + ovpn.cipher = OVPN_CIPHER_ALG_NONE; + + ovpn.cmd = ovpn_parse_cmd(argv[1]); + if (ovpn.cmd == CMD_INVALID) { + fprintf(stderr, "Error: unknown command.\n\n"); + usage(argv[0]); + return -1; + } + + ret = ovpn_parse_cmd_args(&ovpn, argc, argv); + if (ret < 0) { + fprintf(stderr, "Error: invalid arguments.\n\n"); + if (ret == -EINVAL) + usage(argv[0]); + return ret; + } + + ret = ovpn_run_cmd(&ovpn); + if (ret) + fprintf(stderr, "Cannot execute command: %s (%d)\n", + strerror(-ret), ret); + + return ret; +} diff --git a/tools/testing/selftests/net/ovpn/tcp_peers.txt b/tools/testing/selftests/net/ovpn/tcp_peers.txt new file mode 100644 index 000000000000..3cb67b560705 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/tcp_peers.txt @@ -0,0 +1,6 @@ +1 10 5.5.5.2 +2 11 5.5.5.3 +3 12 5.5.5.4 +4 13 5.5.5.5 +5 14 5.5.5.6 +6 15 5.5.5.7 diff --git a/tools/testing/selftests/net/ovpn/test-chachapoly.sh b/tools/testing/selftests/net/ovpn/test-chachapoly.sh new file mode 100755 index 000000000000..32504079a2b8 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-chachapoly.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +ALG="chachapoly" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh new file mode 100755 index 000000000000..093d44772ffd --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +PROTO="TCP" + +source test-close-socket.sh diff --git a/tools/testing/selftests/net/ovpn/test-close-socket.sh b/tools/testing/selftests/net/ovpn/test-close-socket.sh new file mode 100755 index 000000000000..0d09df14fe8e --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-close-socket.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +#set -x +set -e + +source ./common.sh + +cleanup + +modprobe -q ovpn || true + +for p in $(seq 0 ${NUM_PEERS}); do + create_ns ${p} +done + +for p in $(seq 0 ${NUM_PEERS}); do + setup_ns ${p} 5.5.5.$((${p} + 1))/24 +done + +for p in $(seq 0 ${NUM_PEERS}); do + add_peer ${p} +done + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120 + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} $((${p}+9)) 60 120 +done + +sleep 1 + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1)) +done + +ip netns exec peer0 iperf3 -1 -s & +sleep 1 +ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1 + +cleanup + +modprobe -r ovpn || true diff --git a/tools/testing/selftests/net/ovpn/test-float.sh b/tools/testing/selftests/net/ovpn/test-float.sh new file mode 100755 index 000000000000..ba5d725e18b0 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-float.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +FLOAT="1" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test-large-mtu.sh b/tools/testing/selftests/net/ovpn/test-large-mtu.sh new file mode 100755 index 000000000000..ce2a2cb64f72 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-large-mtu.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +MTU="1500" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test-mark.sh b/tools/testing/selftests/net/ovpn/test-mark.sh new file mode 100755 index 000000000000..8534428ed3eb --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-mark.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +# Author: Ralf Lici <ralf@mandelbit.com> +# Antonio Quartulli <antonio@openvpn.net> + +#set -x +set -e + +MARK=1056 + +source ./common.sh + +cleanup + +modprobe -q ovpn || true + +for p in $(seq 0 "${NUM_PEERS}"); do + create_ns "${p}" +done + +for p in $(seq 0 3); do + setup_ns "${p}" 5.5.5.$((p + 1))/24 +done + +# add peer0 with mark +ip netns exec peer0 "${OVPN_CLI}" new_multi_peer tun0 1 ASYMM \ + "${UDP_PEERS_FILE}" \ + ${MARK} +for p in $(seq 1 3); do + ip netns exec peer0 "${OVPN_CLI}" new_key tun0 "${p}" 1 0 "${ALG}" 0 \ + data64.key +done + +for p in $(seq 1 3); do + add_peer "${p}" +done + +for p in $(seq 1 3); do + ip netns exec peer0 "${OVPN_CLI}" set_peer tun0 "${p}" 60 120 + ip netns exec peer"${p}" "${OVPN_CLI}" set_peer tun"${p}" \ + $((p + 9)) 60 120 +done + +sleep 1 + +for p in $(seq 1 3); do + ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((p + 1)) +done + +echo "Adding an nftables drop rule based on mark value ${MARK}" +ip netns exec peer0 nft flush ruleset +ip netns exec peer0 nft 'add table inet filter' +ip netns exec peer0 nft 'add chain inet filter output { + type filter hook output priority 0; + policy accept; +}' +ip netns exec peer0 nft add rule inet filter output \ + meta mark == ${MARK} \ + counter drop + +DROP_COUNTER=$(ip netns exec peer0 nft list chain inet filter output \ + | sed -n 's/.*packets \([0-9]*\).*/\1/p') +sleep 1 + +# ping should fail +for p in $(seq 1 3); do + PING_OUTPUT=$(ip netns exec peer0 ping \ + -qfc 500 -w 1 5.5.5.$((p + 1)) 2>&1) && exit 1 + echo "${PING_OUTPUT}" + LOST_PACKETS=$(echo "$PING_OUTPUT" \ + | awk '/packets transmitted/ { print $1 }') + # increment the drop counter by the amount of lost packets + DROP_COUNTER=$((DROP_COUNTER + LOST_PACKETS)) +done + +# check if the final nft counter matches our counter +TOTAL_COUNT=$(ip netns exec peer0 nft list chain inet filter output \ + | sed -n 's/.*packets \([0-9]*\).*/\1/p') +if [ "${DROP_COUNTER}" -ne "${TOTAL_COUNT}" ]; then + echo "Expected ${TOTAL_COUNT} drops, got ${DROP_COUNTER}" + exit 1 +fi + +echo "Removing the drop rule" +ip netns exec peer0 nft flush ruleset +sleep 1 + +for p in $(seq 1 3); do + ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((p + 1)) +done + +cleanup + +modprobe -r ovpn || true diff --git a/tools/testing/selftests/net/ovpn/test-symmetric-id-float.sh b/tools/testing/selftests/net/ovpn/test-symmetric-id-float.sh new file mode 100755 index 000000000000..b3711a81b463 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-symmetric-id-float.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Ralf Lici <ralf@mandelbit.com> +# Antonio Quartulli <antonio@openvpn.net> + +SYMMETRIC_ID="1" +FLOAT="1" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test-symmetric-id-tcp.sh b/tools/testing/selftests/net/ovpn/test-symmetric-id-tcp.sh new file mode 100755 index 000000000000..188cafb67b2f --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-symmetric-id-tcp.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Ralf Lici <ralf@mandelbit.com> +# Antonio Quartulli <antonio@openvpn.net> + +PROTO="TCP" +SYMMETRIC_ID=1 + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test-symmetric-id.sh b/tools/testing/selftests/net/ovpn/test-symmetric-id.sh new file mode 100755 index 000000000000..35b119c72e4f --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-symmetric-id.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Ralf Lici <ralf@mandelbit.com> +# Antonio Quartulli <antonio@openvpn.net> + +SYMMETRIC_ID="1" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test-tcp.sh b/tools/testing/selftests/net/ovpn/test-tcp.sh new file mode 100755 index 000000000000..ba3f1f315a34 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-tcp.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +PROTO="TCP" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh new file mode 100755 index 000000000000..b60e94a4094e --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test.sh @@ -0,0 +1,165 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +#set -x +set -e + +source ./common.sh + +cleanup + +modprobe -q ovpn || true + +for p in $(seq 0 ${NUM_PEERS}); do + create_ns ${p} +done + +for p in $(seq 0 ${NUM_PEERS}); do + setup_listener ${p} +done + +for p in $(seq 0 ${NUM_PEERS}); do + setup_ns ${p} 5.5.5.$((${p} + 1))/24 ${MTU} +done + +for p in $(seq 0 ${NUM_PEERS}); do + add_peer ${p} +done + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120 + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} \ + $((${p}+ID_OFFSET)) 60 120 +done + +sleep 1 + +TCPDUMP_TIMEOUT="1.5s" +for p in $(seq 1 ${NUM_PEERS}); do + # The first part of the data packet header consists of: + # - TCP only: 2 bytes for the packet length + # - 5 bits for opcode ("9" for DATA_V2) + # - 3 bits for key-id ("0" at this point) + # - 12 bytes for peer-id: + # - with asymmetric ID: "${p}" one way and "${p} + 9" the other way + # - with symmetric ID: "${p}" both ways + HEADER1=$(printf "0x4800000%x" ${p}) + HEADER2=$(printf "0x4800000%x" $((${p} + ID_OFFSET))) + RADDR="" + if [ "${PROTO}" == "UDP" ]; then + RADDR=$(awk "NR == ${p} {print \$3}" ${UDP_PEERS_FILE}) + fi + + timeout ${TCPDUMP_TIMEOUT} ip netns exec peer${p} \ + tcpdump --immediate-mode -p -ni veth${p} -c 1 \ + "$(build_capture_filter "${HEADER1}" "${RADDR}")" \ + >/dev/null 2>&1 & + TCPDUMP_PID1=$! + timeout ${TCPDUMP_TIMEOUT} ip netns exec peer${p} \ + tcpdump --immediate-mode -p -ni veth${p} -c 1 \ + "$(build_capture_filter "${HEADER2}" "${RADDR}")" \ + >/dev/null 2>&1 & + TCPDUMP_PID2=$! + + sleep 0.3 + ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1)) + ip netns exec peer0 ping -qfc 500 -s 3000 -w 3 5.5.5.$((${p} + 1)) + + wait ${TCPDUMP_PID1} + wait ${TCPDUMP_PID2} +done + +# ping LAN behind client 1 +ip netns exec peer0 ping -qfc 500 -w 3 ${LAN_IP} + +if [ "$FLOAT" == "1" ]; then + # make clients float.. + for p in $(seq 1 ${NUM_PEERS}); do + ip -n peer${p} addr del 10.10.${p}.2/24 dev veth${p} + ip -n peer${p} addr add 10.10.${p}.3/24 dev veth${p} + done + for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer${p} ping -qfc 500 -w 3 5.5.5.1 + done +fi + +ip netns exec peer0 iperf3 -1 -s & +sleep 1 +ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1 + +echo "Adding secondary key and then swap:" +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 2 1 ${ALG} 0 \ + data64.key + ip netns exec peer${p} ${OVPN_CLI} new_key tun${p} \ + $((${p} + ID_OFFSET)) 2 1 ${ALG} 1 data64.key + ip netns exec peer${p} ${OVPN_CLI} swap_keys tun${p} \ + $((${p} + ID_OFFSET)) +done + +sleep 1 + +echo "Querying all peers:" +ip netns exec peer0 ${OVPN_CLI} get_peer tun0 +ip netns exec peer1 ${OVPN_CLI} get_peer tun1 + +echo "Querying peer 1:" +ip netns exec peer0 ${OVPN_CLI} get_peer tun0 1 + +echo "Querying non-existent peer 20:" +ip netns exec peer0 ${OVPN_CLI} get_peer tun0 20 || true + +echo "Deleting peer 1:" +ip netns exec peer0 ${OVPN_CLI} del_peer tun0 1 +ip netns exec peer1 ${OVPN_CLI} del_peer tun1 $((1 + ID_OFFSET)) + +echo "Querying keys:" +for p in $(seq 2 ${NUM_PEERS}); do + ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} \ + $((${p} + ID_OFFSET)) 1 + ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} \ + $((${p} + ID_OFFSET)) 2 +done + +echo "Deleting peer while sending traffic:" +(ip netns exec peer2 ping -qf -w 4 5.5.5.1)& +sleep 2 +ip netns exec peer0 ${OVPN_CLI} del_peer tun0 2 +# following command fails in TCP mode +# (both ends get conn reset when one peer disconnects) +ip netns exec peer2 ${OVPN_CLI} del_peer tun2 $((2 + ID_OFFSET)) || true + +echo "Deleting keys:" +for p in $(seq 3 ${NUM_PEERS}); do + ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} \ + $((${p} + ID_OFFSET)) 1 + ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} \ + $((${p} + ID_OFFSET)) 2 +done + +echo "Setting timeout to 3s MP:" +for p in $(seq 3 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 3 3 || true + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} \ + $((${p} + ID_OFFSET)) 0 0 +done +# wait for peers to timeout +sleep 5 + +echo "Setting timeout to 3s P2P:" +for p in $(seq 3 ${NUM_PEERS}); do + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} \ + $((${p} + ID_OFFSET)) 3 3 +done +sleep 5 + +for p in $(seq 0 ${NUM_PEERS}); do + compare_ntfs ${p} +done + +cleanup + +modprobe -r ovpn || true diff --git a/tools/testing/selftests/net/ovpn/udp_peers.txt b/tools/testing/selftests/net/ovpn/udp_peers.txt new file mode 100644 index 000000000000..93de6465353c --- /dev/null +++ b/tools/testing/selftests/net/ovpn/udp_peers.txt @@ -0,0 +1,6 @@ +1 10 10.10.1.1 1 10.10.1.2 1 5.5.5.2 +2 11 10.10.2.1 1 10.10.2.2 1 5.5.5.3 +3 12 10.10.3.1 1 10.10.3.2 1 5.5.5.4 +4 13 fd00:0:0:4::1 1 fd00:0:0:4::2 1 5.5.5.5 +5 14 fd00:0:0:5::1 1 fd00:0:0:5::2 1 5.5.5.6 +6 15 fd00:0:0:6::1 1 fd00:0:0:6::2 1 5.5.5.7 diff --git a/tools/testing/selftests/net/packetdrill/Makefile b/tools/testing/selftests/net/packetdrill/Makefile index 31cfb666ba8b..ff54641493e9 100644 --- a/tools/testing/selftests/net/packetdrill/Makefile +++ b/tools/testing/selftests/net/packetdrill/Makefile @@ -1,9 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_INCLUDES := ksft_runner.sh \ - defaults.sh \ - set_sysctls.py \ - ../../kselftest/ktap_helpers.sh +TEST_INCLUDES := \ + defaults.sh \ + ksft_runner.sh \ + set_sysctls.py \ + ../../kselftest/ktap_helpers.sh \ +# end of TEST_INCLUDES TEST_PROGS := $(wildcard *.pkt) diff --git a/tools/testing/selftests/net/packetdrill/config b/tools/testing/selftests/net/packetdrill/config index 0237ed98f3c0..c4a19a785521 100644 --- a/tools/testing/selftests/net/packetdrill/config +++ b/tools/testing/selftests/net/packetdrill/config @@ -1,6 +1,6 @@ -CONFIG_IPV6=y -CONFIG_HZ_1000=y CONFIG_HZ=1000 +CONFIG_HZ_1000=y +CONFIG_IPV6=y CONFIG_NET_NS=y CONFIG_NET_SCH_FIFO=y CONFIG_NET_SCH_FQ=y diff --git a/tools/testing/selftests/net/packetdrill/defaults.sh b/tools/testing/selftests/net/packetdrill/defaults.sh index 1095a7b22f44..37edd3dc3b07 100755 --- a/tools/testing/selftests/net/packetdrill/defaults.sh +++ b/tools/testing/selftests/net/packetdrill/defaults.sh @@ -51,7 +51,8 @@ sysctl -q net.ipv4.tcp_pacing_ss_ratio=200 sysctl -q net.ipv4.tcp_pacing_ca_ratio=120 sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1 -sysctl -q net.ipv4.tcp_fastopen=0x70403 +sysctl -q net.ipv4.tcp_fastopen=0x3 +# Use TFO_COOKIE in ksft_runner.sh for this key. sysctl -q net.ipv4.tcp_fastopen_key=a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4 sysctl -q net.ipv4.tcp_syncookies=1 diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index ef8b25a606d8..0a97d5ae3469 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -3,21 +3,35 @@ source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh" -readonly ipv4_args=('--ip_version=ipv4 ' - '--local_ip=192.168.0.1 ' - '--gateway_ip=192.168.0.1 ' - '--netmask_ip=255.255.0.0 ' - '--remote_ip=192.0.2.1 ' - '-D CMSG_LEVEL_IP=SOL_IP ' - '-D CMSG_TYPE_RECVERR=IP_RECVERR ') - -readonly ipv6_args=('--ip_version=ipv6 ' - '--mtu=1520 ' - '--local_ip=fd3d:0a0b:17d6::1 ' - '--gateway_ip=fd3d:0a0b:17d6:8888::1 ' - '--remote_ip=fd3d:fa7b:d17d::1 ' - '-D CMSG_LEVEL_IP=SOL_IPV6 ' - '-D CMSG_TYPE_RECVERR=IPV6_RECVERR ') +declare -A ip_args=( + [ipv4]="--ip_version=ipv4 + --local_ip=192.168.0.1 + --gateway_ip=192.168.0.1 + --netmask_ip=255.255.0.0 + --remote_ip=192.0.2.1 + -D TFO_COOKIE=3021b9d889017eeb + -D TFO_COOKIE_ZERO=b7c12350a90dc8f5 + -D CMSG_LEVEL_IP=SOL_IP + -D CMSG_TYPE_RECVERR=IP_RECVERR" + [ipv4-mapped-ipv6]="--ip_version=ipv4-mapped-ipv6 + --local_ip=192.168.0.1 + --gateway_ip=192.168.0.1 + --netmask_ip=255.255.0.0 + --remote_ip=192.0.2.1 + -D TFO_COOKIE=3021b9d889017eeb + -D TFO_COOKIE_ZERO=b7c12350a90dc8f5 + -D CMSG_LEVEL_IP=SOL_IPV6 + -D CMSG_TYPE_RECVERR=IPV6_RECVERR" + [ipv6]="--ip_version=ipv6 + --mtu=1520 + --local_ip=fd3d:0a0b:17d6::1 + --gateway_ip=fd3d:0a0b:17d6:8888::1 + --remote_ip=fd3d:fa7b:d17d::1 + -D TFO_COOKIE=c1d1e9742a47a9bc + -D TFO_COOKIE_ZERO=82af1a8f9a205c34 + -D CMSG_LEVEL_IP=SOL_IPV6 + -D CMSG_TYPE_RECVERR=IPV6_RECVERR" +) if [ $# -ne 1 ]; then ktap_exit_fail_msg "usage: $0 <script>" @@ -35,28 +49,23 @@ failfunc=ktap_test_fail if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then optargs+=('--tolerance_usecs=14000') + failfunc=ktap_test_xfail +fi - # xfail tests that are known flaky with dbg config, not fixable. - # still run them for coverage (and expect 100% pass without dbg). - declare -ar xfail_list=( - "tcp_eor_no-coalesce-retrans.pkt" - "tcp_fast_recovery_prr-ss.*.pkt" - "tcp_slow_start_slow-start-after-win-update.pkt" - "tcp_timestamping.*.pkt" - "tcp_user_timeout_user-timeout-probe.pkt" - "tcp_zerocopy_epoll_.*.pkt" - "tcp_tcp_info_tcp-info-.*-limited.pkt" - ) - readonly xfail_regex="^($(printf '%s|' "${xfail_list[@]}"))$" - [[ "$script" =~ ${xfail_regex} ]] && failfunc=ktap_test_xfail +ip_versions=$(grep -E '^--ip_version=' $script | cut -d '=' -f 2) +if [[ -z $ip_versions ]]; then + ip_versions="ipv4 ipv6 ipv4-mapped-ipv6" +elif [[ ! "$ip_versions" =~ ^ipv[46]$ ]]; then + ktap_exit_fail_msg "Too many or unsupported --ip_version: $ip_versions" + exit "$KSFT_FAIL" fi ktap_print_header -ktap_set_plan 2 +ktap_set_plan $(echo $ip_versions | wc -w) -unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $script > /dev/null \ - && ktap_test_pass "ipv4" || $failfunc "ipv4" -unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $script > /dev/null \ - && ktap_test_pass "ipv6" || $failfunc "ipv6" +for ip_version in $ip_versions; do + unshare -n packetdrill ${ip_args[$ip_version]} ${optargs[@]} $script > /dev/null \ + && ktap_test_pass $ip_version || $failfunc $ip_version +done ktap_finished diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt new file mode 100644 index 000000000000..07e9936e70e6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt @@ -0,0 +1,24 @@ +// 3rd ACK + 1st data segment lost, data segments with ce + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + ++0.05 < SEWA 0:0(0) win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> +// 3rd ACK lost +// 1st data segment lost ++0.05 < [ce] EAP. 1001:2001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1 ceb 1000 e0b 1,nop,nop,nop,sack 1001:2001> ++.002 accept(3, ..., ...) = 4 + ++0.2 < [ce] EAP. 1:1001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.001 > [ect0] EWA. 1:1(0) ack 2001 <ECN e1b 1 ceb 2000 e0b 1,nop> + ++0.05 < [ce] EAP. 2001:3001(1000) ack 1 win 264 ++.001 > [ect0] . 1:1(0) ack 3001 <ECN e1b 1 ceb 3000 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt new file mode 100644 index 000000000000..76b8422b34dc --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt @@ -0,0 +1,30 @@ +// 3rd ACK + 1st data segment lost, 2nd data segments with ce + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1016,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> +// 3rd ACK lost ++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.01 write(4, ..., 2000) = 2000 +// 1st data segment lost + 2nd gets CE ++.002 > [ect0] .5 1:1005(1004) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++.000 > [ect0] P.5 1005:2001(996) ack 1 <ECN e1b 1 ceb 0 e0b 1, nop> ++0.05 < [ect0] .6 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 996 e1b 1,nop,nop,nop,sack 1005:2001> + ++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }% + ++0.002~+0.1 > [ect0] .5 1:1005(1004) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++.05 < [ect0] .6 1:1(0) ack 2001 win 264 <ECN e0b 1005 ceb 996 e1b 1,nop> + ++0.01 write(4, ..., 1000) = 1000 ++0~+0.002 > [ect0] P.5 2001:3001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.1 < [ect0] .5 1:1001(1000) ack 3001 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++0~+0.01 > [ect0] .5 3001:3001(0) ack 1001 <ECN e1b 1 ceb 0 e0b 1001,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt new file mode 100644 index 000000000000..84060e490589 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt @@ -0,0 +1,19 @@ +// Test 3rd ACK flags when SYN-ACK is rexmitted + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.1 < [ect0] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// Our code currently sends a challenge ACK +// when it receives a SYN in ESTABLISHED state +// based on the latest SYN ++.002 > [ect0] A. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt new file mode 100644 index 000000000000..d3fe09d0606f --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt @@ -0,0 +1,18 @@ +// Third ACK CE increases r.cep + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + ++0.05 < SEWA 0:0(0) win 32767 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ce] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] WAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt new file mode 100644 index 000000000000..d28722db42b1 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt @@ -0,0 +1,22 @@ +// 3rd ACK lost, CE for the first data segment + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + ++0.05 < SEWA 0:0(0) win 32767 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> +// 3rd ACK lost ++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] WA. 1:1(0) ack 1001 <ECN e1b 1 ceb 1000 e0b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.05 < [ce] EAP. 1001:2001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.001 > [ect0] EWA. 1:1(0) ack 2001 <ECN e1b 1 ceb 2000 e0b 1 ,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt new file mode 100644 index 000000000000..a4d808116e34 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt @@ -0,0 +1,26 @@ +// Test SYN/ACK rexmit triggered 3rd ACK duplicate + CE on first data seg + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> + +// SYN/ACK rexmitted => two 3rd ACKs in-flight ++1.0~+1.1 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +// Delivered 1st 3rd ACK ++0.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + +// Duplicate 3rd ACK delivered ++1.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> + ++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] WA. 1:1(0) ack 1001 <ECN e1b 1 ceb 1000 e0b 1,nop> + +0 read(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt new file mode 100644 index 000000000000..410a303c6d49 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt @@ -0,0 +1,13 @@ +// Test that when accurate ECN is disabled, +// client uses RFC3168 ECN for SYN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=1 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,nop,nop,nop,wscale 8> ++.002 > [noecn] . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt new file mode 100644 index 000000000000..10728114b11b --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt @@ -0,0 +1,28 @@ +// Test that SYN-ACK with ACE flags and without +// ACE flags got dropped. Although we disable ECN, +// we shouldn't consider this as blackholed as +// these are dropped due to congestion + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < [ect0] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [noecn] SA. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> + +// Retransmit SYN ++0.1 < [noecn] S 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + ++0.1 < [noecn] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + +// Write with AccECN option but with ip-noecn since we received one SYN with ACE=0 ++0.01 write(4, ..., 100) = 100 ++.002 > [noecn] P5. 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt new file mode 100644 index 000000000000..04d928f0d44d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt @@ -0,0 +1,18 @@ +// Test AccECN -> RFC3168 fallback when sysctl asks for RFC3168 ECN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=1 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0.05 < . 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] P. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt new file mode 100644 index 000000000000..788af6bea69c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt @@ -0,0 +1,34 @@ +// Client negotiates AccECN and starts sending +// AccECN option in last ACK and data segments +// Middlebox drops AccECN option and client +// reverts to ACE flags only + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +sysctl -q net.ipv4.tcp_ecn_option_beacon=1 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] EA. 1:1(0) ack 1001 <ECN e1b 1 ceb 0 e0b 1001,nop> + +0 read(4, ..., 1000) = 1000 + ++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] EA. 1:1(0) ack 1001 <ECN e1b 1 ceb 0 e0b 2001,nop,nop,nop,sack 1:1001> + ++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] EA. 1:1(0) ack 1001 <nop,nop,sack 1:1001> + ++0.05 < [ect0] EAP. 1001:2001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] EA. 1:1(0) ack 2001 + +0 read(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt new file mode 100644 index 000000000000..f5839c2e682d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt @@ -0,0 +1,38 @@ +// Client negotiates AccECN and starts sending +// AccECN option in last ACK and data segments +// Middlebox accepts AccECN option but some packets +// are lost due to congestion. Client should +// continue to send AccECN option + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.102 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.1 < [ect0] SW. 0:0(0) ack 1 win 32767 <mss 1024,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] A. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + +// Send ++0.01 write(4, ..., 3000) = 3000 ++.002 > [ect0] .5 1:1013(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++.002 > [ect0] P.5 1013:2025(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++.002 > [ect0] P.5 2025:3001(976) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + +// First two segments were lost due to congestion as SACK was +// received acknowledging 3rd segment ++0.1 < [ect0] .5 1:1(0) ack 1 win 264 <ECN e1b 1 ceb 0 e0b 977,nop,nop,nop,sack 2025:3001> + +// Since data with option was SACKed, we can +// continue to use AccECN option for the rest of +// the connection. This one is a rexmt ++.02~+0.5 > [ect0] .5 1:1013(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++0.1 < [ect0] .5 1:1(0) ack 3001 win 264 <ECN e1b 1 ceb 0 e0b 3000,nop> + +// Send new data, it should contain AccECN option ++0.01 write(4, ..., 2000) = 2000 ++.002 > [ect0] .5 3001:4013(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++.002 > [ect0] P.5 4013:5001(988) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt new file mode 100644 index 000000000000..c00b36d6a833 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt @@ -0,0 +1,12 @@ +// AccECN sysctl server-side only, no ECN/AccECN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=5 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < S. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,nop,nop,nop,wscale 8> ++.002 > . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt new file mode 100644 index 000000000000..f9c27f39f354 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt @@ -0,0 +1,25 @@ +// Test basic connection teardown where local process closes first: +// the local process calls close() first, so we send a FIN, and receive an ACK. +// Then we receive a FIN and ACK it. + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +.01...0.011 connect(3, ..., ...) = 0 + +0 > [noecn] SEWA 0:0(0) <...> + +0 < [ect1] SW. 0:0(0) ack 1 win 32768 <mss 1000,nop,wscale 6,nop,nop,sackOK> + +0 > [ect0] EW. 1:1(0) ack 1 + + +0 write(3, ..., 1000) = 1000 + +0 > [ect0] P5. 1:1001(1000) ack 1 + +0 < [ect0] .5 1:1(0) ack 1001 win 257 + + +0 close(3) = 0 + +0 > [ect0] F5. 1001:1001(0) ack 1 + +0 < [ect0] .5 1:1(0) ack 1002 win 257 + + +0 < [ect0] F5. 1:1(0) ack 1002 win 257 + +0 > [ect0] . 1002:1002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt new file mode 100644 index 000000000000..6d771234124a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt @@ -0,0 +1,25 @@ +// Test a large ACK (> ACE field max) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 14600) = 14600 ++.002 > [ect0] P.5 1:14601(14600) ack 1 ++0.05 < [ect0] .5 1:1(0) ack 1461 win 264 ++0.05 < [ect0] .5 1:1(0) ack 14601 win 264 + ++0.01 %{ assert tcpi_delivered_ce == 8, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt new file mode 100644 index 000000000000..76384f52b021 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt @@ -0,0 +1,31 @@ +// Test false overflow detection with option used to rule out overflow + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + +// Stop sending option to allow easier testing ++0 `sysctl -q net.ipv4.tcp_ecn_option=0` + ++0.002 write(4, ..., 14600) = 14600 ++.002 > [ect0] P.5 1:14601(14600) ack 1 + ++0.05 < [ect0] .5 1:1(0) ack 1460 win 264 <ECN e0b 1461 ceb 0 e1b 1,nop> ++0.05 < [ect0] .5 1:1(0) ack 14601 win 264 <ECN e0b 14601 ceb 0 e1b 1,nop> + ++0.01 %{ +assert tcpi_delivered_ce == 0, tcpi_delivered_ce +assert tcpi_delivered_e0_bytes == 14600, tcpi_delivered_e0_bytes +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt new file mode 100644 index 000000000000..8bce5dce35a2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt @@ -0,0 +1,24 @@ +// Test a large ACK (> ACE field max) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 14600) = 14600 ++.002 > [ect0] P.5 1:14601(14600) ack 1 ++0.05 < [ect0] .5 1:1(0) ack 14601 win 264 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt new file mode 100644 index 000000000000..5f2b147214f4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt @@ -0,0 +1,25 @@ +// Test a large ACK (> ACE field max) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 14600) = 14600 ++.002 > [ect0] P.5 1:14601(14600) ack 1 + // Fake CE ++0.05 < [ect0] .6 1:1(0) ack 14601 win 264 + ++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt new file mode 100644 index 000000000000..fd07bdc14f37 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt @@ -0,0 +1,25 @@ +// Test a large ACK (at ACE field max delta) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 14600) = 14600 ++.002 > [ect0] P.5 1:14601(14600) ack 1 + // Fake CE ++0.05 < [ect0] .4 1:1(0) ack 14601 win 264 + ++0.01 %{ assert tcpi_delivered_ce == 7, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt new file mode 100644 index 000000000000..cb1e70ff2d26 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt @@ -0,0 +1,70 @@ +// Test basic AccECN CEP/CEB/E0B/E1B functionality & CEP wrapping + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ +assert tcpi_delivered_ce == 0, tcpi_delivered_ce +assert tcpi_delivered_ce_bytes == 0, tcpi_delivered_ce_bytes +}% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + // Fake CE ++0.05 < [ect0] WA. 1:1(0) ack 1001 win 264 <ECN e0b 1 ceb 1000 e1b 1,nop> + ++0.01 %{ +assert tcpi_delivered_ce == 1, tcpi_delivered_ce +assert tcpi_delivered_ce_bytes == 1000, tcpi_delivered_ce_bytes +}% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + // Fake ect0 ++0.05 < [ect0] WA. 1:1(0) ack 2001 win 264 <ECN e0b 1001 ceb 1000 e1b 1,nop> + ++0.01 %{ +assert tcpi_delivered_ce == 1, tcpi_delivered_ce +assert tcpi_delivered_e0_bytes == 1000, tcpi_delivered_e0_bytes +}% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 2001:3001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + // Fake ce ++0.05 < [ect0] EWA. 1:1(0) ack 3001 win 264 <ECN e0b 1001 ceb 2000 e1b 1,nop> + ++0.01 %{ +assert tcpi_delivered_ce == 2, tcpi_delivered_ce +assert tcpi_delivered_ce_bytes == 2000, tcpi_delivered_ce_bytes +}% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 3001:4001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + // Fake ect1 ++0.05 < [ect0] EWA. 1:1(0) ack 4001 win 264 <ECN e0b 1001 ceb 2000 e1b 1001,nop> + ++0.01 %{ +assert tcpi_delivered_ce == 2, tcpi_delivered_ce +assert tcpi_delivered_e1_bytes == 1000, tcpi_delivered_e1_bytes +}% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 4001:5001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + // Fake ce ++0.05 < [ect0] . 1:1(0) ack 5001 win 264 <ECN e0b 1001 ceb 3000 e1b 1001,nop> + ++0.01 %{ +assert tcpi_delivered_ce == 3, tcpi_delivered_ce +assert tcpi_delivered_ce_bytes == 3000, tcpi_delivered_ce_bytes +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt new file mode 100644 index 000000000000..6627c7bb2d26 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt @@ -0,0 +1,12 @@ +// Test that tcp_ecn=4 uses RFC3168 ECN for SYN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=4 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.05 connect(4, ..., ...) = 0 + ++.002 > SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt new file mode 100644 index 000000000000..51879477bb50 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt @@ -0,0 +1,35 @@ +// Test basic AccECN CEP/CEB/E0B/E1B functionality & CEP wrapping + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] WA. 1:1(0) ack 1001 <ECN e1b 1 ceb 1000 e0b 1,nop> + +0 read(4, ..., 1000) = 1000 + ++0.05 < [ect0] EAP. 1001:2001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] WA. 1:1(0) ack 2001 <ECN e1b 1 ceb 1000 e0b 1001,nop> + +0 read(4, ..., 1000) = 1000 + ++0.05 < [ce] EAP. 2001:3001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] EWA. 1:1(0) ack 3001 <ECN e1b 1 ceb 2000 e0b 1001,nop> + +0 read(4, ..., 1000) = 1000 + ++0.05 < [ect1] EAP. 3001:4001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] EWA. 1:1(0) ack 4001 <ECN e1b 1001 ceb 2000 e0b 1001,nop> + +0 read(4, ..., 1000) = 1000 + ++0.05 < [ce] EAP. 4001:5001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] . 1:1(0) ack 5001 <ECN e1b 1001 ceb 3000 e0b 1001,nop> + +0 read(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt new file mode 100644 index 000000000000..0c72fa4a1251 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt @@ -0,0 +1,14 @@ +// Test IP flags drop +--tolerance_usecs=50000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 1.1 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.02 ~ +1.1 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [noecn] . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt new file mode 100644 index 000000000000..171f9433e55f --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt @@ -0,0 +1,16 @@ +// SYN/ACK option drop test + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.02 ~+2 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.02 ~+5 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.02 ~+8 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt new file mode 100644 index 000000000000..0f65cf56cd2b --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt @@ -0,0 +1,28 @@ +// Test that SYN-ACK with ACE flags and without +// ACE flags got dropped. Although we disable ECN, +// we shouldn't consider this as blackholed as +// these are dropped due to congestion + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> + +// Retransmit SYN-ACK without option ++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// SYN-ACK maybe getting blackholed, disable ECN ++2~+2.2 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++4~+4.4 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// Received an ACK after sending 3rd retransmission, not a blackhole ++0.1 < [noecn] . 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt new file mode 100644 index 000000000000..343181633980 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt @@ -0,0 +1,18 @@ +// Test that SYN with ACE flags and without +// ACE flags got dropped. Although we disable +// ECN, we shouldn't consider this as blackholed +// as these are dropped due to congestion + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 3.1 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.02~+1.1 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.02~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.02~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0~+0.01 > [noecn] . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt new file mode 100644 index 000000000000..37dabc4603c8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt @@ -0,0 +1,23 @@ +// Test AccECN flags bleach + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] . 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [noecn] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt new file mode 100644 index 000000000000..5b14892fda51 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt @@ -0,0 +1,23 @@ +// Test basic AccECN negotiation + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256 <ECN e0b 1001 ceb 0 e1b 0,nop> + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt new file mode 100644 index 000000000000..25f7cb2feb25 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt @@ -0,0 +1,26 @@ +// Test basic AccECN negotiation + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt new file mode 100644 index 000000000000..50e08c492a69 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt @@ -0,0 +1,23 @@ +// Test basic AccECN negotiation without option + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] W. 1:1(0) ack 1 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 ++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1001:2001(1000) ack 1 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt new file mode 100644 index 000000000000..2904f1ba9975 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt @@ -0,0 +1,23 @@ +// Test basic AccECN negotiation, late option enable + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] W. 1:1(0) ack 1 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 ++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256 <ECN e0b 1001 ceb 0 e1b 1,nop> + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt new file mode 100644 index 000000000000..64e0fc1c1f14 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt @@ -0,0 +1,20 @@ +// Test client behavior on receiving a non ECN SYN-ACK +// after receiving an AccECN SYN-ACK and moving to +// ESTABLISHED state + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> +// Receive an AccECN SYN-ACK and move to ESTABLISHED ++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + +// Receive a non ECN SYN-ACK and send a challenge ACK with ACE feedback ++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt new file mode 100644 index 000000000000..f407c629a3f7 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt @@ -0,0 +1,27 @@ +// Test basic AccECN negotiation with option off using sysctl + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 ++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1001:2001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt new file mode 100644 index 000000000000..32454e7187f9 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt @@ -0,0 +1,27 @@ +// Test no progress filtering + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + // Fake CE and claim no progress ++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 1000 e1b 1,nop> + ++0.01 %{ +assert tcpi_delivered_ce == 0, tcpi_delivered_ce +assert tcpi_delivered_ce_bytes == 0, tcpi_delivered_ce_bytes +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt new file mode 100644 index 000000000000..6597d5f2d778 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt @@ -0,0 +1,28 @@ +// Test that SYN-ACK with ACE flags and without +// ACE flags got dropped. Although we disable ECN, +// we shouldn't consider this as blackholed as +// these are dropped due to congestion + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < [noecn] S 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// Retransmit SYN ++0.1 < [ect0] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + ++0.1 < [noecn] . 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 + +// Write with AccECN option but with ip-noecn since we received one SYN with ACE=0 ++0.01 write(4, ..., 100) = 100 ++.002 > [noecn] P. 1:101(100) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt new file mode 100644 index 000000000000..0f97dfcfa82d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt @@ -0,0 +1,18 @@ +// Test RFC3168 fallback when sysctl asks for AccECN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEW 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0.05 < . 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] P. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt new file mode 100644 index 000000000000..9baffdd66fe5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt @@ -0,0 +1,18 @@ +// Test RFC3168 ECN when sysctl asks for RFC3168 ECN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=1 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEW 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0.05 < . 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] P. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt new file mode 100644 index 000000000000..3fc56f9c6a6f --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt @@ -0,0 +1,28 @@ +// Test SACK space grab to fit AccECN option +--tcp_ts_tick_usecs=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++.01 < [ect1] EAP. 1001:2001(1000) ack 1 win 264 ++0.002 > [ect0] EA. 1:1(0) ack 1 <ECN e1b 1001 ceb 0 e0b 1,nop,nop,nop,sack 1001:2001> ++.01 < [ect0] EAP. 3001:4001(1000) ack 1 win 264 ++0.002 > [ect0] EA. 1:1(0) ack 1 <ECN e1b 1001 ceb 0 e0b 1001,nop,nop,nop,sack 3001:4001 1001:2001> ++.01 < [ce] EAP. 5001:6001(1000) ack 1 win 264 ++0.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1001 ceb 1000 e0b 1001,nop,nop,nop,sack 5001:6001 3001:4001 1001:2001> +// DSACK works? ++.01 < [ect0] EAP. 5001:6001(1000) ack 1 win 264 ++0.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1001 ceb 1000 e0b 2001,nop,nop,nop,sack 5001:6001 5001:6001 3001:4001> ++.01 < [ect1] EAP. 6001:7001(1000) ack 1 win 264 ++0.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 2001 ceb 1000 e0b 2001,nop,nop,nop,sack 5001:7001 3001:4001 1001:2001> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt new file mode 100644 index 000000000000..1c075b5d81ae --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt @@ -0,0 +1,39 @@ +// Test SACK space grab to fit AccECN option +--tcp_ts_tick_usecs=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,sackOK,TS val 1 ecr 0,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 1,ECN e1b 1 ceb 0 e0b 1,nop,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 100,ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + +// One SACK block should allow all 3 AccECN fields: ++.01 < [ect1] EAP. 1001:2001(1000) ack 1 win 264 <nop,nop,TS val 3 ecr 100> ++0.002 > [ect0] EA. 1:1(0) ack 1 <nop,nop,TS val 160 ecr 2,ECN e1b 1001 ceb 0 e0b 1,nop,nop,nop,sack 1001:2001> + +// Two SACK blocks should fit w/ AccECN if we only need to use 2 AccECN fields: check ect1 arriving. ++.01 < [ect1] EAP. 3001:4001(1000) ack 1 win 264 <nop,nop,TS val 4 ecr 100> ++0.002 > [ect0] EA. 1:1(0) ack 1 <nop,nop,TS val 172 ecr 2,ECN e1b 2001 ceb 0,nop,nop,sack 3001:4001 1001:2001> + +// Two SACK blocks should fit w/ AccECN if we only need to use 2 AccECN fields: check CE arriving. ++.01 < [ce] EAP. 5001:6001(1000) ack 1 win 264 <nop,nop,TS val 5 ecr 100> ++0.002 > [ect0] WA. 1:1(0) ack 1 <nop,nop,TS val 184 ecr 2,ECN e1b 2001 ceb 1000,nop,nop,sack 5001:6001 3001:4001> + +// Check that DSACK works, using 2 SACK blocks in total, if we only need to use 2 AccECN fields: check ect1 arriving. ++.01 < [ect1] EAP. 5001:6001(1000) ack 1 win 264 <nop,nop,TS val 5 ecr 100> ++0.002 > [ect0] WA. 1:1(0) ack 1 <nop,nop,TS val 196 ecr 2,ECN e1b 3001 ceb 1000,nop,nop,sack 5001:6001 5001:6001> + +// Check the case where the AccECN option doesn't fit, because sending ect0 +// with order 1 would rquire 3 AccECN fields, +// and TS (12 bytes) + 2 SACK blocks (20 bytes) + 3 AccECN fields (2 + 3*3 bytes) > 40 bytes. +// That's OK; Linux TCP AccECN is optimized for the ECT1 case, not ECT0. ++.01 < [ect0] EAP. 6001:7001(1000) ack 1 win 264 <nop,nop,TS val 5 ecr 100> ++0.002 > [ect0] WA. 1:1(0) ack 1 <nop,nop,TS val 204 ecr 2,nop,nop,sack 5001:7001 3001:4001 1001:2001> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt new file mode 100644 index 000000000000..6b88ab78bfce --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt @@ -0,0 +1,20 @@ +// Test against classic ECN server +// Not-ECT on SYN and server sets 1|0|1 (AE is unused for classic ECN) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [noecn] SEA. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8> ++.002 > [ect0] W. 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700> + ++0 write(4, ..., 100) = 100 ++.002 > [ect0] P.5 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700> ++0 close(4) = 0 + ++.002 > [ect0] F.5 101:101(0) ack 1 <nop,nop,TS val 400 ecr 700> ++0.1 < [noecn] R. 1:1(0) ack 102 win 4242 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt new file mode 100644 index 000000000000..d24ada008ece --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt @@ -0,0 +1,20 @@ +// Test against classic ECN server +// Not-ECT on SYN and server sets 0|0|1 + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [noecn] SE. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8> ++.002 > [noecn] . 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700> + ++0 write(4, ..., 100) = 100 ++.002 > [ect0] P. 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700> ++0 close(4) = 0 + ++0 > [noecn] F. 101:101(0) ack 1 <...> ++0.1 < R. 1:1(0) ack 102 win 4242 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt new file mode 100644 index 000000000000..a20d7e890ee1 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt @@ -0,0 +1,19 @@ +// Test against broken server (1|1|1) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [noecn] SEWA. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8> ++.002 > [noecn] . 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700> + ++0 write(4, ..., 100) = 100 ++.002 > [noecn] P. 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700> ++0 close(4) = 0 + ++.002 > [noecn] F. 101:101(0) ack 1 <...> ++0.1 < [noecn] R. 1:1(0) ack 102 win 4242 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt new file mode 100644 index 000000000000..428255bedab7 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt @@ -0,0 +1,19 @@ +// Test against Non ECN server (0|0|0) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8> ++.002 > [noecn] . 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700> + ++0 write(4, ..., 100) = 100 ++.002 > [noecn] P. 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700> ++0 close(4) = 0 + ++.002 > [noecn] F. 101:101(0) ack 1 <nop,nop,TS val 400 ecr 700> ++0.1 < [noecn] R. 1:1(0) ack 102 win 4242 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt new file mode 100644 index 000000000000..e9a5a0d3677c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt @@ -0,0 +1,18 @@ +// Test AccECN with sysctl set to server-side only + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=5 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt new file mode 100644 index 000000000000..412fa903105c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt @@ -0,0 +1,18 @@ +// Test that SYN with ACE flags was Acked +// after 2nd retransmission. In this case, +// since we got SYN-ACK that supports Accurate +// ECN, we consider this as successful negotiation + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 2.1 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++1~+1.1 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++1~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + ++0.1 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1016,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0~+0.01 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt new file mode 100644 index 000000000000..4622754a2270 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt @@ -0,0 +1,16 @@ +// Test that SYN with ACE flags got dropped +// We retry one more time with ACE and then +// fallback to disabled ECN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 2.1 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++1~+1.1 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++1~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0~+0.01 > [noecn] . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt new file mode 100644 index 000000000000..ee15f108cafe --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt @@ -0,0 +1,27 @@ +// Test that SYN-ACK with ACE flags was Acked +// after 2nd retransmission. In this case, +// since we got the last ACK that supports Accurate +// ECN, we consider this as successful negotiation + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> + +// Retransmit SYN-ACK without option ++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// SYN-ACK maybe getting blackholed, disable ECN ++2~+2.2 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// Received an ACK with ACE flags, state should be set to negotiation succeeded ++0.1 < [noecn] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt new file mode 100644 index 000000000000..ccfe353a8ee4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt @@ -0,0 +1,26 @@ +// Test that SYN-ACK with ACE flags got dropped +// We retry one more time with ACE and then +// fallback to disabled ECN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> + +// Retransmit SYN-ACK without option ++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// SYN-ACK maybe getting blackholed, disable ECN ++2~+2.2 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// Received an ACK with no ACE flags, state should be set to blackholed ++0.1 < [noecn] . 1:1(0) ack 1 win 320 ++0 accept(3, ..., ...) = 4 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt new file mode 100644 index 000000000000..dc83f7a18180 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt @@ -0,0 +1,13 @@ +// Test AccECN ECN field reflector in SYNACK + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < [ce] SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SWA. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt new file mode 100644 index 000000000000..e63a8d018c37 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt @@ -0,0 +1,13 @@ +// Test AccECN ECN field reflector in SYNACK + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < [ect0] SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SA. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt new file mode 100644 index 000000000000..23c0e43b3dbe --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt @@ -0,0 +1,13 @@ +// Test AccECN ECN field reflector in SYNACK + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < [ect1] SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SEW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt new file mode 100644 index 000000000000..c3497738f680 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt @@ -0,0 +1,27 @@ +// Test SYNACK CE & received_ce update + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [ce] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.01 write(4, ..., 100) = 100 ++.002 > [ect0] P.6 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++0.05 < [ect0] P.5 1:101(100) ack 101 win 256 <ECN e0b 101 ceb 0 e1b 1,nop> ++.002 > [ect0] .6 101:101(0) ack 101 <ECN e1b 1 ceb 0 e0b 101,nop> + ++0.01 write(4, ..., 100) = 100 ++.002 > [ect0] P.6 101:201(100) ack 101 <ECN e1b 1 ceb 0 e0b 101,nop> + ++0.1 < [ect1] P.5 201:301(100) ack 201 win 256 <ECN e0b 101 ceb 0 e1b 1,nop> ++.002 > [ect0] .6 201:201(0) ack 101 <ECN e1b 101 ceb 0 e0b 101,nop,nop,nop,sack 201:301> + ++0.01 < [ce] .6 401:501(100) ack 201 win 256 <ECN e0b 101 ceb 0 e1b 1,nop> ++.002 > [ect0] .7 201:201(0) ack 101 <ECN e1b 101 ceb 100 e0b 101,nop,nop,nop,sack 401:501 201:301> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt new file mode 100644 index 000000000000..5fd77f466572 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt @@ -0,0 +1,22 @@ +// Reflected SYNACK CE mark increases delivered_ce + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_fallback=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + ++0.05 < SEWA 0:0(0) win 32767 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> +// Fake ce for prev, ECT validator must be disabled for this to work ++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt new file mode 100644 index 000000000000..f6ad1ea5c0c4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt @@ -0,0 +1,24 @@ +// Test SYN=0 reflector + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [ect0] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] A. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.01 write(4, ..., 100) = 100 ++.002 > [ect0] P.5 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++0.05 < [ect0] P.5 1:1(0) ack 101 win 256 <ECN e0b 101 ceb 0 e1b 1,nop> + ++0.01 < [ect0] P.5 1:101(100) ack 101 win 256 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] .5 101:101(0) ack 101 <ECN e1b 1 ceb 0 e0b 101,nop> ++0 read(4, ..., 100) = 100 + ++0 close(4) = 0 ++0 > F.5 101:101(0) ack 101 <...> ++0.1 < R. 101:101(0) ack 102 win 4242 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt new file mode 100644 index 000000000000..7ecfc5fb9dbb --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt @@ -0,0 +1,24 @@ +// Test SYN=0 reflector + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [ect1] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] EW. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.01 write(4, ..., 100) = 100 ++.002 > [ect0] P.5 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++0.05 < [ect1] P.5 1:1(0) ack 101 win 256 <ECN e0b 101 ceb 0 e1b 1,nop> + ++0.01 < [ect1] P.5 1:101(100) ack 101 win 256 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] .5 101:101(0) ack 101 <ECN e1b 101 ceb 0 e0b 1,nop> ++0 read(4, ..., 100) = 100 + ++0 close(4) = 0 ++0 > F5. 101:101(0) ack 101 <...> ++0.1 < R. 101:101(0) ack 102 win 4242 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt new file mode 100644 index 000000000000..9e0959782ef5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt @@ -0,0 +1,15 @@ +// Test 3rd ACK flags when SYN-ACK is rexmitted + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt new file mode 100644 index 000000000000..a5a41633af07 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt @@ -0,0 +1,25 @@ +// Test that we retransmit SYN-ACK with ACE and without +// AccECN options after +// SYN-ACK was lost and TCP moved to TCPS_SYN_RECEIVED + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> + +// Retransmit SYN-ACK without option ++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0.1 < [noecn] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + +// We try to write with AccECN option ++0.01 write(4, ..., 100) = 100 ++.002 > [ect0] P5. 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt new file mode 100644 index 000000000000..f3fe2f098966 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt @@ -0,0 +1,26 @@ +// Test TS progress filtering +--tcp_ts_tick_usecs=1000 +--tolerance_usecs=7000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,sackOK,TS val 1 ecr 0,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,sackOK,TS val 10 ecr 1,ECN e1b 1 ceb 0 e0b 1,nop,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 10> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 <nop,nop,TS val 83 ecr 2> + // Fake CE and claim no progress ++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 83> + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt new file mode 100644 index 000000000000..1446799d2481 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt @@ -0,0 +1,25 @@ +// Test TS progress filtering +--tcp_ts_tick_usecs=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,sackOK,TS val 1 ecr 0,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,sackOK,TS val 10 ecr 1,ECN e1b 1 ceb 0 e0b 1,nop,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 10> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 <nop,nop,TS val 83 ecr 2> + // Fake CE and claim no progress ++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <nop,nop,TS val 3 ecr 83> + ++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt new file mode 100644 index 000000000000..319f81dd717d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Minimal active open. +// First to close connection. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + + // Connect to server: active open: three-way handshake + +0...0 connect(4, ..., ...) = 0 + +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8> + +0 < S. 0:0(0) ack 1 win 65535 <mss 1460,sackOK,nop,nop,nop,wscale 7> + +0 > . 1:1(0) ack 1 + + // Send data + +0 send(4, ..., 1000, 0) = 1000 + +0 > P. 1:1001(1000) ack 1 + +0 < . 1:1(0) ack 1001 win 257 + + +0 close(4) = 0 + +0 > F. 1001:1001(0) ack 1 + +0 < F. 1:1(0) ack 1002 win 257 + +0 > . 1002:1002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt new file mode 100644 index 000000000000..e72a291b666e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Minimal passive open. +// Peer is first to close. + +`./defaults.sh` + + // Open listener socket + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + // Incoming connection: passive open: three-way handshake + +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 8> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + // Open connection socket and close listener socket + +0 accept(3, ..., ...) = 4 + +0 close(3) = 0 + + // Peer sends data: acknowledge and receive + +0 < P. 1:1001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 1001 + +0 recv(4, ..., 1000, 0) = 1000 + + // Peer initiates connection close + +0 < F. 1001:1001(0) ack 1 win 257 + +.04 > . 1:1(0) ack 1002 + + // Local socket also closes its side + +0 close(4) = 0 + +0 > F. 1:1(0) ack 1002 + +0 < . 1002:1002(0) ack 2 win 257 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt index 914eabab367a..657e42ca65b5 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Test for blocking read. + --tolerance_usecs=10000 +--mss=1000 `./defaults.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt new file mode 100644 index 000000000000..eef01d5f1118 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +.1 < . 1:1(0) ack 1 win 32792 + + + +0 accept(3, ..., ...) = 4 + +0 < . 1:1001(1000) ack 1 win 32792 + +0 > . 1:1(0) ack 1001 + +0 read(4, ..., 1000) = 1000 + +// resend the payload + a FIN + +0 < F. 1:1001(1000) ack 1 win 32792 +// Why do we have a delay and no dsack ? + +0~+.04 > . 1:1(0) ack 1002 + + +0 close(4) = 0 + +// According to RFC 2525, section 2.17 +// we should _not_ send an RST here, because there was no data to consume. + +0 > F. 1:1(0) ack 1002 diff --git a/tools/testing/selftests/net/packetdrill/tcp_disorder_fin_in_FIN_WAIT.pkt b/tools/testing/selftests/net/packetdrill/tcp_disorder_fin_in_FIN_WAIT.pkt new file mode 100644 index 000000000000..336cbf7815c8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_disorder_fin_in_FIN_WAIT.pkt @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Check fix in 795a7dfbc3d9 ("net: tcp: accept old ack during closing") + +// Set up config. +`./defaults.sh` + +// Initialize a server socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7> + * > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 shutdown(4, SHUT_WR) = 0 + * > F. 1:1(0) ack 1 + +// We expect to receive one ACK. +// But what happens if a FIN was already in transmt and received out-of-order ? + + +0 < . 2:2(0) ack 2 win 257 + +// This FIN packet was sent before the prior ACK (see ack 1). + +0 < F. 1:1(0) ack 1 win 257 + +// Even if the FIN is received out-of-order, we should ACK it. + + * > . 2:2(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt new file mode 100644 index 000000000000..c790d0af635e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test various DSACK (RFC 2883) behaviors. + +--mss=1000 + +`./defaults.sh` + + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + +// First SACK range. + +0 < P. 1001:2001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 <nop, nop, sack 1001:2001> + +// Check SACK coalescing (contiguous sequence). + +0 < P. 2001:3001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 <nop,nop,sack 1001:3001> + +// Check we have two SACK ranges for non contiguous sequences. + +0 < P. 4001:5001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 <nop,nop,sack 4001:5001 1001:3001> + +// Three ranges. + +0 < P. 7001:8001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 <nop,nop,sack 7001:8001 4001:5001 1001:3001> + +// DSACK (1001:3001) + SACK (6001:7001) + +0 < P. 1:6001(6000) ack 1 win 1024 + +0 > . 1:1(0) ack 6001 <nop,nop,sack 1001:3001 7001:8001> + +// DSACK (7001:8001) + +0 < P. 6001:8001(2000) ack 1 win 1024 + +0 > . 1:1(0) ack 8001 <nop,nop,sack 7001:8001> + +// DSACK for an older segment. + +0 < P. 1:1001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 8001 <nop,nop,sack 1:1001> diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt new file mode 100644 index 000000000000..32aff9bc4052 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test TFO_SERVER_COOKIE_NOT_REQD flag on receiving +// SYN with data but without Fast Open cookie option. + +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x202` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Since TFO_SERVER_COOKIE_NOT_REQD, a TFO socket will be created with +// the data accepted. + +0 < S 0:1000(1000) win 32792 <mss 1460,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1001 <mss 1460,nop,nop,sackOK> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 read(4, ..., 1024) = 1000 + +// Data After SYN will be accepted too. + +0 < . 1001:2001(1000) ack 1 win 5840 + +0 > . 1:1(0) ack 2001 + +// Should change the implementation later to set the SYN flag as well. + +0 read(4, ..., 1024) = 1000 + +0 write(4, ..., 1000) = 1000 + +0 > P. 1:1001(1000) ack 2001 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt new file mode 100644 index 000000000000..649997a58099 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test TFO_SERVER_WO_SOCKOPT1 without setsockopt(TCP_FASTOPEN) + +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x402` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 read(4, ..., 512) = 10 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt new file mode 100644 index 000000000000..4a00e0d994f2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Server w/o TCP_FASTOPEN socket option + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,FO TFO_COOKIE> + +// Data is ignored since TCP_FASTOPEN is not set on the listener + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = -1 EAGAIN (Resource temporarily unavailable) + +// The above should block until ack comes in below. + +0 < . 1:31(30) ack 1 win 5840 + +0 accept(3, ..., ...) = 4 + + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 read(4, ..., 512) = 30 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt new file mode 100644 index 000000000000..345ed26ff7f8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test that TFO-enabled server would not respond SYN-ACK with any TFO option +// when receiving a pure SYN-data. It should respond a pure SYN-ack. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6> + +0 > S. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 100 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 close(3) = 0 + +// Test ECN-setup SYN with ECN disabled because this has happened in reality + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < SEW 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6> + +0 > S. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 100 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 close(3) = 0 + +// Test ECN-setup SYN w/ ECN enabled + +0 `sysctl -q net.ipv4.tcp_ecn=2` + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < SEW 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6> + +0 > SE. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 100 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 close(3) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt new file mode 100644 index 000000000000..98e6f84497cd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test TFO server with SYN that has TFO cookie and data. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 read(4, ..., 512) = 10 + +0 write(4, ..., 100) = 100 + +0 > P. 1:101(100) ack 11 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt new file mode 100644 index 000000000000..95b1047ffdd5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test zero-payload packet w/ valid TFO cookie - a TFO socket will +// still be created and accepted but read() will not return until a +// later pkt with 10 byte. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +// A TFO socket is created and is writable. + +0 write(4, ..., 100) = 100 + +0 > P. 1:101(100) ack 1 + +0...0.300 read(4, ..., 512) = 10 + +.3 < P. 1:11(10) ack 1 win 5840 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt new file mode 100644 index 000000000000..f75efd51ed0c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// A reproducer case for a TFO SYNACK RTO undo bug in: +// 794200d66273 ("tcp: undo cwnd on Fast Open spurious SYNACK retransmit") +// This sequence that tickles this bug is: +// - Fast Open server receives TFO SYN with data, sends SYNACK +// - (client receives SYNACK and sends ACK, but ACK is lost) +// - server app sends some data packets +// - (N of the first data packets are lost) +// - server receives client ACK that has a TS ECR matching first SYNACK, +// and also SACKs suggesting the first N data packets were lost +// - server performs undo of SYNACK RTO, then immediately enters recovery +// - buggy behavior in 794200d66273 then performed an undo that caused +// the connection to be in a bad state, in CA_Open with retrans_out != 0 + +// Check that outbound TS Val ticks are as we would expect with 1000 usec per +// timestamp tick: +--tcp_ts_tick_usecs=1000 + +`./defaults.sh` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:1000(1000) win 65535 <mss 1012,sackOK,TS val 1000 ecr 0,wscale 7,nop,nop,nop,FO TFO_COOKIE> + +0 > S. 0:0(0) ack 1001 <mss 1460,sackOK,TS val 2000 ecr 1000,nop,wscale 8> + +0 accept(3, ..., ...) = 4 + +// Application writes more data + +.010 write(4, ..., 10000) = 10000 + +0 > P. 1:5001(5000) ack 1001 <nop,nop,TS val 2010 ecr 1000> + +0 > P. 5001:10001(5000) ack 1001 <nop,nop,TS val 2010 ecr 1000> + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +0 < . 1001:1001(0) ack 1 win 257 <TS val 1010 ecr 2000,sack 2001:5001> + +0 > P. 1:2001(2000) ack 1001 <nop,nop,TS val 2010 ecr 1010> + +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }% + +0 %{ assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd }% + + +0 < . 1001:1001(0) ack 1 win 257 <TS val 1011 ecr 2000,sack 2001:6001> + +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }% + +0 %{ assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt new file mode 100644 index 000000000000..c3cb0e8bdcf8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Test the Experimental Option +// +// SYN w/ FOEXP w/o cookie must generates SYN+ACK w/ FOEXP +// w/ a valid cookie, and the cookie must be the same one +// with one generated by IANA FO + +`./defaults.sh` + +// Request a TFO cookie by Experimental Option +// This must generate the same TFO_COOKIE + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FOEXP> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,FOEXP TFO_COOKIE> + + +0 close(3) = 0 + +// Test if FOEXP with a valid cookie creates a TFO socket + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FOEXP TFO_COOKIE> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 read(4, ..., 512) = 10 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt new file mode 100644 index 000000000000..dc09f8d9a381 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a FIN pkt with the ACK bit to a TFO socket. +// The socket will go to TCP_CLOSE_WAIT state and data can be +// read until the socket is closed, at which time a FIN will be sent. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +// FIN is acked and the socket goes to TCP_CLOSE_WAIT state +// in tcp_fin() called from tcp_data_queue(). + +0 < F. 11:11(0) ack 1 win 32792 + +0 > . 1:1(0) ack 12 + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_CLOSE_WAIT, tcpi_state }% + + +0 read(4, ..., 512) = 10 + +0 close(4) = 0 + +0 > F. 1:1(0) ack 12 + * > F. 1:1(0) ack 12 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt new file mode 100644 index 000000000000..d5543672e2bd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send an ICMP host_unreachable pkt to a pending SYN_RECV req. +// +// If it's a TFO req, the ICMP error will cause it to switch +// to TCP_CLOSE state but remains in the acceptor queue. + +--ip_version=ipv4 + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +// Out-of-window icmp is ignored but accounted. + +0 `nstat > /dev/null` + +0 < icmp unreachable [5000:6000(1000)] + +0 `nstat | grep TcpExtOutOfWindowIcmps > /dev/null` + +// Valid ICMP unreach. + +0 < icmp unreachable host_unreachable [0:10(10)] + +// Unlike the non-TFO case, the req is still there to be accepted. + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +// tcp_done_with_error() in tcp_v4_err() sets sk->sk_state +// to TCP_CLOSE + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + +// The 1st read will succeed and return the data in SYN + +0 read(4, ..., 512) = 10 + +// The 2nd read will fail. + +0 read(4, ..., 512) = -1 EHOSTUNREACH (No route to host) + +// But is no longer writable because it's in TCP_CLOSE state. + +0 write(4, ..., 100) = -1 EPIPE (Broken Pipe) + +// inbound pkt will trigger RST because the socket has been moved +// off the TCP hash tables. + +0 < . 1:1(0) ack 1 win 32792 + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt new file mode 100644 index 000000000000..040d5547ed80 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a TFO socket after it has been accepted. +// +// First read() will return all the data and this is consistent +// with the non-TFO case. Second read will return -1 + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% + +// 1st read will return the data from SYN. +// tcp_reset() sets sk->sk_err to ECONNRESET for SYN_RECV. + +0 < R. 11:11(0) win 32792 + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + +// This one w/o ACK bit will cause the same effect. +// +0 < R 11:11(0) win 32792 +// See Step 2 in tcp_validate_incoming(). + +// found_ok_skb in tcp_recvmsg_locked() + +0 read(4, ..., 512) = 10 + +// !copied && sk->sk_err -> sock_error(sk) + +0 read(4, ..., 512) = -1 ECONNRESET (Connection reset by peer) + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt new file mode 100644 index 000000000000..7f9de6c66cbd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a TFO socket before it is accepted. +// +// The socket won't go away and after it's accepted the data +// in the SYN pkt can still be read. But that's about all that +// the acceptor can do with the socket. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// 1st read will return the data from SYN. + +0 < R. 11:11(0) win 257 + +// This one w/o ACK bit will cause the same effect. +// +0 < R 11:11(0) win 257 + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + + +0 read(4, ..., 512) = 10 + +0 read(4, ..., 512) = -1 ECONNRESET (Connection reset by peer) + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt new file mode 100644 index 000000000000..548a87701b5d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a TFO socket after it is accepted. +// +// The socket will change to TCP_CLOSE state with pending data so +// write() will fail. Pending data can be still be read and close() +// won't trigger RST if data is not read +// +// 565b7b2d2e63 ("tcp: do not send reset to already closed sockets") +// https://lore.kernel.org/netdev/4C1A2502.1030502@openvz.org/ + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop, FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% + +// tcp_done() sets sk->sk_state to TCP_CLOSE and clears tp->fastopen_rsk + +0 < R. 11:11(0) win 32792 + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + + +0 write(4, ..., 100) = -1 ECONNRESET(Connection reset by peer) + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt new file mode 100644 index 000000000000..20090bf77655 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a fully established socket with pending data before +// it is accepted. +// +// The socket with pending data won't go away and can still be accepted +// with data read. But it will be in TCP_CLOSE state. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Invalid cookie, so accept() fails. + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO aaaaaaaaaaaaaaaa,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK, FO TFO_COOKIE,nop,nop> + + +0 accept(3, ..., ...) = -1 EAGAIN (Resource temporarily unavailable) + +// Complete 3WHS and send data and RST + +0 < . 1:1(0) ack 1 win 32792 + +0 < . 1:11(10) ack 1 win 32792 + +0 < R. 11:11(0) win 32792 + +// A valid reset won't make the fully-established socket go away. +// It's just that the acceptor will get a dead, unusable socket +// in TCP_CLOSE state. + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + + +0 write(4, ..., 100) = -1 ECONNRESET(Connection reset by peer) + +0 read(4, ..., 512) = 10 + +0 read(4, ..., 512) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt new file mode 100644 index 000000000000..9f52d7de3436 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Test the server cookie is generated by aes64 encoding of remote and local +// IP addresses with a master key specified via sockopt TCP_FASTOPEN_KEY +// +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen_key=00000000-00000000-00000000-00000000` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +// Set a key of a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4 (big endian). +// This would produce a cookie of TFO_COOKIE like many other +// tests (which the same key but set via sysctl). + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, + "\xa1\xa1\xa1\xa1\xb2\xb2\xb2\xb2\xc3\xc3\xc3\xc3\xd4\xd4\xd4\xd4", 16) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Request a valid cookie TFO_COOKIE + +0 < S 1428932:1428942(10) win 10000 <mss 1012,nop,nop,FO,sackOK,TS val 1 ecr 0,nop,wscale 7> + +0 > S. 0:0(0) ack 1428933 <mss 1460,sackOK,TS val 10000 ecr 1,nop,wscale 8,FO TFO_COOKIE,nop,nop> + +0 < . 1:1(0) ack 1 win 257 <nop,nop,TS val 2 ecr 10000> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + + +0 close(4) = 0 + +0 > F. 1:1(0) ack 1 <nop,nop,TS val 10001 ecr 2> + +0 < F. 1:1(0) ack 2 win 257 <nop,nop,TS val 3 ecr 10001> + +0 > . 2:2(0) ack 2 <nop,nop,TS val 10002 ecr 3> + + +0 close(3) = 0 + +// Restart the listener + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Test setting the key in the listen state, and produces an identical cookie + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, + "\xa1\xa1\xa1\xa1\xb2\xb2\xb2\xb2\xc3\xc3\xc3\xc3\xd4\xd4\xd4\xd4", 16) = 0 + + +0 < S 6814000:6815000(1000) win 10000 <mss 1012,nop,nop,FO TFO_COOKIE,sackOK,TS val 10 ecr 0,nop,wscale 7> + +0 > S. 0:0(0) ack 6815001 <mss 1460,sackOK,TS val 10000 ecr 10,nop,wscale 8> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 < . 1001:1001(0) ack 1 win 257 <nop,nop,TS val 12 ecr 10000> + +0 read(4, ..., 8192) = 1000 + + +0 close(4) = 0 + +0 > F. 1:1(0) ack 1001 <nop,nop,TS val 10101 ecr 12> + +0 < F. 1001:1001(0) ack 2 win 257 <nop,nop,TS val 112 ecr 10101> + +0 > . 2:2(0) ack 1002 <nop,nop,TS val 10102 ecr 112> + + +0 close(3) = 0 + +// Restart the listener + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Test invalid key length (must be 16 bytes) + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, "", 0) = -1 (Invalid Argument) + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, "", 3) = -1 (Invalid Argument) + +// Previous cookie won't be accepted b/c this listener uses the global key (0-0-0-0) + +0 < S 6814000:6815000(1000) win 10000 <mss 1012,nop,nop,FO TFO_COOKIE,sackOK,TS val 10 ecr 0,nop,wscale 7> + +0 > S. 0:0(0) ack 6814001 <mss 1460,sackOK,TS val 10000 ecr 10,nop,wscale 8,FO TFO_COOKIE_ZERO,nop,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt new file mode 100644 index 000000000000..e82e06da44c9 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Close a listener socket with pending TFO child. +// This will trigger RST pkt to go out. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +// RST pkt is generated for each not-yet-accepted TFO child. +// inet_csk_listen_stop() -> inet_child_forget() -> tcp_disconnect() +// -> tcp_need_reset() is true for SYN_RECV + +0 close(3) = 0 + +0 > R. 1:1(0) ack 11 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt new file mode 100644 index 000000000000..2a148bb14cbf --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,nop,nop,sackOK,nop,nop,FO TFO_COOKIE> + +0 > S. 0:0(0) ack 11 win 65535 <mss 1460,nop,nop,sackOK> + +// sk->sk_state is TCP_SYN_RECV + +0 accept(3, ..., ...) = 4 + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% + +// tcp_disconnect() sets sk->sk_state to TCP_CLOSE + +0 connect(4, AF_UNSPEC, ...) = 0 + +0 > R. 1:1(0) ack 11 win 65535 + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + +// connect() sets sk->sk_state to TCP_SYN_SENT + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation is now in progress) + +0 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 %{ assert tcpi_state == TCP_SYN_SENT, tcpi_state }% + +// tp->fastopen_rsk must be NULL + +1 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8> diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt new file mode 100644 index 000000000000..09fb63f78a0e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Close a TFO socket with unread data. +// This will trigger a RST pkt. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% + +// data_was_unread == true in __tcp_close() + +0 close(4) = 0 + +0 > R. 1:1(0) ack 11 diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt index df49c67645ac..e13f0eee9795 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Test TCP_INQ and TCP_CM_INQ on the client side. + +--mss=1000 + `./defaults.sh ` diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt index 04a5e2590c62..14dd5f813d50 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Test TCP_INQ and TCP_CM_INQ on the server side. + +--mss=1000 + `./defaults.sh ` diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt new file mode 100644 index 000000000000..09aabc775e80 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"` + +// Test that a not-yet-accepted socket does not change +// its initial sk_rcvbuf (tcp_rmem[1]) when receiving ooo packets. + + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + +0 < . 2001:41001(39000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001> + +0 < . 41001:101001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001> + +0 < . 1:1001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 1001 <nop,nop,sack 2001:101001> + +0 < . 1001:2001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 101001 + + +0 accept(3, ..., ...) = 4 + + +0 %{ assert SK_MEMINFO_RCVBUF == 131072, SK_MEMINFO_RCVBUF }% + + +0 close(4) = 0 + +0 close(3) = 0 + +// Test that ooo packets for accepted sockets do increase sk_rcvbuf + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < . 2001:41001(39000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001> + +0 < . 41001:101001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001> + + +0 %{ assert SK_MEMINFO_RCVBUF > 131072, SK_MEMINFO_RCVBUF }% + diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt new file mode 100644 index 000000000000..7e6bc5fb0c8d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"` + + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < . 2001:11001(9000) ack 1 win 257 + +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:11001> + +// check that ooo packet properly updates tcpi_rcv_mss + +0 %{ assert tcpi_rcv_mss == 1000, tcpi_rcv_mss }% + + +0 < . 11001:21001(10000) ack 1 win 257 + +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:21001> + diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt new file mode 100644 index 000000000000..12882be10f2e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + + 0 `nstat -n` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [10000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 0> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < P. 1:4001(4000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// packet in sequence : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +0 < P. 4001:54001(50000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// ooo packet. : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +1 < P. 5001:55001(50000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// SKB_DROP_REASON_TCP_INVALID_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +0 < P. 70001:80001(10000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + + +0 read(4, ..., 100000) = 4000 + +// If queue is empty, accept a packet even if its end_seq is above rcv_mwnd_seq + +0 < P. 4001:54001(50000) ack 1 win 257 + * > . 1:1(0) ack 54001 win 0 + +// Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times. ++0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "` diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_neg_window.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_neg_window.pkt new file mode 100644 index 000000000000..b9ab264b2a11 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_neg_window.pkt @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test maximum advertised window limit when rcv_nxt advances past +// rcv_mwnd_seq. The "usable window" must be properly clamped to zero +// rather than becoming negative. + +--mss=1000 + +`./defaults.sh` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + +// A too big packet is accepted if the receive queue is empty. It +// does not trigger an immediate ACK. + +0 < P. 1:20001(20000) ack 1 win 257 + +0 %{ assert tcpi_bytes_received == 20000, tcpi_bytes_received; }% + +// Send a RST immediately so that there is no rcv_wup/rcv_mwnd_seq update yet + +0 < R. 20001:20001(0) ack 1 win 257 + +// Verify that the RST was accepted. Indirectly this also verifies that no +// immediate ACK was sent for the data packet above. + +0 < . 20001:20001(0) ack 1 win 257 + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_allowed.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_allowed.pkt new file mode 100644 index 000000000000..6af0e0eb183a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_allowed.pkt @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_shrink_window=1 +sysctl -q net.ipv4.tcp_rmem="4096 32768 $((32*1024*1024))"` + + 0 `nstat -n` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 10> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < P. 1:10001(10000) ack 1 win 257 + * > . 1:1(0) ack 10001 win 15 + + +0 < P. 10001:11024(1023) ack 1 win 257 + * > . 1:1(0) ack 11024 win 13 + +// Max window seq advertised 10001 + 15*1024 = 25361, last advertised: 11024 + 13*1024 = 24336 + +// Segment beyond the max window is dropped + +0 < P. 11024:25362(14338) ack 1 win 257 + * > . 1:1(0) ack 11024 win 13 + +// Segment using the max window is accepted + +0 < P. 11024:25361(14337) ack 1 win 257 + * > . 1:1(0) ack 25361 win 0 + +// Check LINUX_MIB_BEYOND_WINDOW has been incremented once + +0 `nstat | grep TcpExtBeyondWindow | grep -q " 1 "` diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_nomem.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_nomem.pkt new file mode 100644 index 000000000000..a80eb55dc69a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_nomem.pkt @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 +// When tcp_receive_window() < tcp_max_receive_window(), tcp_sequence() accepts +// packets that would be dropped under normal conditions (i.e. tcp_receive_window() +// equal to tcp_max_receive_window()). +// Test that such packets are handled as expected for RWIN == 0 and for RWIN > 0. + +--mss=1000 + +`./defaults.sh` + + 0 `nstat -n` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1000000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 4> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + +// Put 1040000 bytes into the receive buffer + +0 < P. 1:65001(65000) ack 1 win 257 + * > . 1:1(0) ack 65001 + +0 < P. 65001:130001(65000) ack 1 win 257 + * > . 1:1(0) ack 130001 + +0 < P. 130001:195001(65000) ack 1 win 257 + * > . 1:1(0) ack 195001 + +0 < P. 195001:260001(65000) ack 1 win 257 + * > . 1:1(0) ack 260001 + +0 < P. 260001:325001(65000) ack 1 win 257 + * > . 1:1(0) ack 325001 + +0 < P. 325001:390001(65000) ack 1 win 257 + * > . 1:1(0) ack 390001 + +0 < P. 390001:455001(65000) ack 1 win 257 + * > . 1:1(0) ack 455001 + +0 < P. 455001:520001(65000) ack 1 win 257 + * > . 1:1(0) ack 520001 + +0 < P. 520001:585001(65000) ack 1 win 257 + * > . 1:1(0) ack 585001 + +0 < P. 585001:650001(65000) ack 1 win 257 + * > . 1:1(0) ack 650001 + +0 < P. 650001:715001(65000) ack 1 win 257 + * > . 1:1(0) ack 715001 + +0 < P. 715001:780001(65000) ack 1 win 257 + * > . 1:1(0) ack 780001 + +0 < P. 780001:845001(65000) ack 1 win 257 + * > . 1:1(0) ack 845001 + +0 < P. 845001:910001(65000) ack 1 win 257 + * > . 1:1(0) ack 910001 + +0 < P. 910001:975001(65000) ack 1 win 257 + * > . 1:1(0) ack 975001 + +0 < P. 975001:1040001(65000) ack 1 win 257 + * > . 1:1(0) ack 1040001 + +// Trigger an extreme memory squeeze by shrinking SO_RCVBUF + +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [16000], 4) = 0 + + +0 < P. 1040001:1105001(65000) ack 1 win 257 + * > . 1:1(0) ack 1040001 win 0 +// Check LINUX_MIB_TCPRCVQDROP has been incremented + +0 `nstat -s | grep TcpExtTCPRcvQDrop| grep -q " 1 "` + +// RWIN == 0: rcv_wup = 1040001, rcv_wnd = 0, rcv_mwnd_seq > 1105001 (significantly larger, typically ~1970000) + +// Accept pure ack with seq in max adv. window + +0 write(4, ..., 1000) = 1000 + +0 > P. 1:1001(1000) ack 1040001 win 0 + +0 < . 1105001:1105001(0) ack 1001 win 257 + +// In order segment, in max adv. window -> drop (SKB_DROP_REASON_TCP_ZEROWINDOW) + +0 < P. 1040001:1041001(1000) ack 1001 win 257 + +0 > . 1001:1001(0) ack 1040001 win 0 +// Ooo partial segment, in max adv. window -> drop (SKB_DROP_REASON_TCP_ZEROWINDOW) + +0 < P. 1039001:1041001(2000) ack 1001 win 257 + +0 > . 1001:1001(0) ack 1040001 win 0 <nop,nop,sack 1039001:1040001> +// Check LINUX_MIB_TCPZEROWINDOWDROP has been incremented twice + +0 `nstat -s | grep TcpExtTCPZeroWindowDrop| grep -q " 2 "` + +// Ooo segment, in max adv. window -> drop (SKB_DROP_REASON_TCP_OVERWINDOW) + +0 < P. 1105001:1106001(1000) ack 1001 win 257 + +0 > . 1001:1001(0) ack 1040001 win 0 +// Ooo segment, beyond max adv. window -> drop (SKB_DROP_REASON_TCP_INVALID_SEQUENCE) + +0 < P. 2000001:2001001(1000) ack 1001 win 257 + +0 > . 1001:1001(0) ack 1040001 win 0 +// Check LINUX_MIB_BEYOND_WINDOW has been incremented twice + +0 `nstat -s | grep TcpExtBeyondWindow | grep -q " 2 "` + +// Read all data + +0 read(4, ..., 2000000) = 1040000 + * > . 1001:1001(0) ack 1040001 + +// RWIN > 0: rcv_wup = 1040001, 0 < rcv_wnd < 32000, rcv_mwnd_seq > 1105001 (significantly larger, typically ~1970000) + +// Accept pure ack with seq in max adv. window, beyond adv. window + +0 write(4, ..., 1000) = 1000 + +0 > P. 1001:2001(1000) ack 1040001 + +0 < . 1105001:1105001(0) ack 2001 win 257 + +// In order segment, in max adv. window, in adv. window -> accept +// Note: This also ensures that we cannot hit the empty queue exception in tcp_sequence() in the following tests + +0 < P. 1040001:1041001(1000) ack 2001 win 257 + * > . 2001:2001(0) ack 1041001 + +// Ooo partial segment, in adv. window -> accept + +0 < P. 1040001:1042001(2000) ack 2001 win 257 + +0 > . 2001:2001(0) ack 1042001 <nop,nop,sack 1040001:1041001> + +// Ooo segment, in max adv. window, beyond adv. window -> drop (SKB_DROP_REASON_TCP_OVERWINDOW) + +0 < P. 1105001:1106001(1000) ack 2001 win 257 + +0 > . 2001:2001(0) ack 1042001 +// Ooo segment, beyond max adv. window, beyond adv. window -> drop (SKB_DROP_REASON_TCP_INVALID_SEQUENCE) + +0 < P. 2000001:2001001(1000) ack 2001 win 257 + +0 > . 2001:2001(0) ack 1042001 +// Check LINUX_MIB_BEYOND_WINDOW has been incremented twice + +0 `nstat -s | grep TcpExtBeyondWindow | grep -q " 4 "` + +// We are allowed to go beyond the window and buffer with one packet + +0 < P. 1042001:1062001(20000) ack 2001 win 257 + * > . 2001:2001(0) ack 1062001 + +0 < P. 1062001:1082001(20000) ack 2001 win 257 + * > . 2001:2001(0) ack 1082001 win 0 + +// But not more: In order segment, in max adv. window -> drop (SKB_DROP_REASON_TCP_ZEROWINDOW) + +0 < P. 1082001:1083001(1000) ack 2001 win 257 + * > . 2001:2001(0) ack 1082001 +// Check LINUX_MIB_TCPZEROWINDOWDROP has been incremented again + +0 `nstat -s | grep TcpExtTCPZeroWindowDrop| grep -q " 3 "` diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_zero_wnd_fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_zero_wnd_fin.pkt new file mode 100644 index 000000000000..e245359a1a91 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_zero_wnd_fin.pkt @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Some TCP stacks send FINs even though the window is closed. We break +// a possible FIN/ACK loop by accepting the FIN. + +--mss=1000 + +`./defaults.sh` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 0> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < P. 1:60001(60000) ack 1 win 257 + * > . 1:1(0) ack 60001 win 0 + + +0 < F. 60001:60001(0) ack 1 win 257 + +0 > . 1:1(0) ack 60002 win 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt new file mode 100644 index 000000000000..47550df124ce --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Test SYN+ACK RTX with 1s RTO. +// +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_rto_max_ms=1000` + +// +// Test 1: TFO SYN+ACK +// + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 1000 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +// RTO must be capped to 1s + +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 < . 11:11(0) ack 1 win 1000 <mss 1460,nop,nop,sackOK> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 close(4) = 0 + +0 close(3) = 0 + + +// +// Test 2: non-TFO SYN+ACK +// + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 1000 <mss 1460,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +// RTO must be capped to 1s + +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + + +0 < . 1:1(0) ack 1 win 1000 <mss 1460,nop,nop,sackOK> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + + +0 close(4) = 0 + +0 close(3) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt index b2b2cdf27e20..454441e7ecff 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt @@ -1,6 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 // Test that we correctly skip zero-length IOVs. + +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt new file mode 100644 index 000000000000..95a1957a2cf9 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test after "tcp: tcp_tx_timestamp() must look at the rtx queue" + +// This test is about receiving the SCM_TSTAMP_ACK, +// we do not care about its SCM_TIMESTAMPING precision. +--tolerance_usecs=1000000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_min_tso_segs=70 +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Establish connection and verify that there was no error. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.010 < S. 0:0(0) ack 1 win 65535 <mss 1000,sackOK,TS val 700 ecr 100,nop,wscale 7> + +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700> + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [30000], 4) = 0 + + +0 write(3, ..., 9880) = 9880 + +0 > P. 1:9881(9880) ack 1 <nop,nop,TS val 200 ecr 700> ++.010 < . 1:1(0) ack 9881 win 10000 <nop,nop,TS val 701 ecr 200> + + +0 write(3, ..., 19760) = 19760 + +0 > P. 9881:29641(19760) ack 1 <nop,nop,TS val 201 ecr 701> ++.010 < . 1:1(0) ack 29641 win 10000 <nop,nop,TS val 702 ecr 201> + + +0 write(3, ..., 39520) = 39520 + +0 > P. 29641:69161(39520) ack 1 <nop,nop,TS val 202 ecr 702> ++.010 < . 1:1(0) ack 69161 win 10000 <nop,nop,TS val 703 ecr 202> + +// One more write to increase cwnd + +0 write(3, ..., 79040) = 79040 + +0 > P. 69161:108681(39520) ack 1 <nop,nop,TS val 203 ecr 703> + +0 > P. 108681:148201(39520) ack 1 <nop,nop,TS val 203 ecr 703> ++.010 < . 1:1(0) ack 148201 win 1000 <nop,nop,TS val 704 ecr 203> + + +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + +// We have one write filling one skb +// last byte can not be stored because of our small SO_SNDBUF + +0 write(3, ..., 65209) = 65208 + +0 > P. 148201:213409(65208) ack 1 <nop,nop,TS val 204 ecr 704> ++.010 < . 1:1(0) ack 213409 win 1000 <nop,nop,TS val 705 ecr 204> + +// SCM_TSTAMP_ACK should be received after the last ack at +// t=60ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=60000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=65207}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt index 183051ba0cae..6882b8240a8a 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt @@ -23,14 +23,16 @@ // install a qdisc dropping all packets +0 `tc qdisc delete dev tun0 root 2>/dev/null ; tc qdisc add dev tun0 root pfifo limit 0` + +0 write(4, ..., 24) = 24 // When qdisc is congested we retry every 500ms // (TCP_RESOURCE_PROBE_INTERVAL) and therefore // we retry 6 times before hitting 3s timeout. // First verify that the connection is alive: -+3.250 write(4, ..., 24) = 24 ++3 write(4, ..., 24) = 24 + // Now verify that shortly after that the socket is dead: - +.100 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out) ++1 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out) +0 %{ assert tcpi_probes == 6, tcpi_probes; \ assert tcpi_backoff == 0, tcpi_backoff }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt index a82c8899d36b..0a0700afdaa3 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt @@ -4,6 +4,8 @@ // send a packet with MSG_ZEROCOPY and receive the notification ID // repeat and verify IDs are consecutive +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt index c01915e7f4a1..df91675d2991 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt @@ -3,6 +3,8 @@ // // send multiple packets, then read one range of all notifications. +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt index 6509882932e9..2963cfcb14df 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Minimal client-side zerocopy test +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt index 2cd78755cb2a..ea0c2fa73c2d 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt @@ -7,6 +7,8 @@ // First send on a closed socket and wait for (absent) notification. // Then connect and send and verify that notification nr. is zero. +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt index 7671c20e01cf..4df978a9b82e 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt @@ -7,6 +7,9 @@ // fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR // is correctly fired only once, when EPOLLET is set. send another packet with // MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once. + +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt index fadc480fdb7f..36b6edc4858c 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt @@ -8,6 +8,9 @@ // fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR // is correctly fired only once, when EPOLLET is set. send another packet with // MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once. + +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt index 5bfa0d1d2f4a..1bea6f3b4558 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt @@ -8,6 +8,9 @@ // is correctly fired only once, when EPOLLONESHOT is set. send another packet // with MSG_ZEROCOPY. confirm that EPOLLERR is not fired. Rearm the FD and // confirm that EPOLLERR is correctly set. + +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt index 4a73bbf46961..e27c21ff5d18 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt @@ -8,6 +8,8 @@ // one will have no data in the initial send. On return 0 the // zerocopy notification counter is not incremented. Verify this too. +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` // Send a FastOpen request, no cookie yet so no data in SYN diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt index 36086c5877ce..b1fa77c77dfa 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt @@ -4,6 +4,8 @@ // send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the // kernel returns the notification ID. +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x207` diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt index 672f817faca0..2f5317d0a9fa 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt @@ -7,6 +7,8 @@ // because each iovec element becomes a frag // 3) the PSH bit is set on an skb when it runs out of fragments +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt index a9a1ac0aea4f..9d5272c6b207 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt @@ -4,6 +4,8 @@ // verify that SO_EE_CODE_ZEROCOPY_COPIED is set on zerocopy // packets of all sizes, including the smallest payload, 1B. +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 66be7699c72c..a3323c21f001 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -205,7 +205,6 @@ # Check that PMTU exceptions are created for both paths. source lib.sh -source net_helper.sh PAUSE_ON_FAIL=no VERBOSE=0 @@ -1090,10 +1089,11 @@ cleanup() { cleanup_all_ns - ip link del veth_A-C 2>/dev/null - ip link del veth_A-R1 2>/dev/null - cleanup_del_ovs_internal - cleanup_del_ovs_vswitchd + [ -e "/sys/class/net/veth_A-C" ] && ip link del veth_A-C + [ -e "/sys/class/net/veth_A-R1" ] && ip link del veth_A-R1 + [ -e "/sys/class/net/ovs_br0" ] && cleanup_del_ovs_internal + [ -e "/sys/class/net/ovs_br0" ] && cleanup_del_ovs_vswitchd + rm -f "$tmpoutfile" } diff --git a/tools/testing/selftests/net/ppp/Makefile b/tools/testing/selftests/net/ppp/Makefile new file mode 100644 index 000000000000..b39b0abadde6 --- /dev/null +++ b/tools/testing/selftests/net/ppp/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 + +top_srcdir = ../../../../.. + +TEST_PROGS := \ + ppp_async.sh \ + pppoe.sh \ +# end of TEST_PROGS + +TEST_FILES := \ + ppp_common.sh \ + pppoe-server-options \ +# end of TEST_FILES + +include ../../lib.mk diff --git a/tools/testing/selftests/net/ppp/config b/tools/testing/selftests/net/ppp/config new file mode 100644 index 000000000000..b45d25c5b970 --- /dev/null +++ b/tools/testing/selftests/net/ppp/config @@ -0,0 +1,9 @@ +CONFIG_IPV6=y +CONFIG_PACKET=y +CONFIG_PPP=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPPOE=m +CONFIG_PPPOE_HASH_BITS_4=y +CONFIG_VETH=y diff --git a/tools/testing/selftests/net/ppp/ppp_async.sh b/tools/testing/selftests/net/ppp/ppp_async.sh new file mode 100755 index 000000000000..10f54c8dd0bc --- /dev/null +++ b/tools/testing/selftests/net/ppp/ppp_async.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ppp_common.sh + +# Temporary files for PTY symlinks +TTY_DIR=$(mktemp -d /tmp/ppp.XXXXXX) +TTY_SERVER="$TTY_DIR"/server +TTY_CLIENT="$TTY_DIR"/client + +# shellcheck disable=SC2329 +cleanup() { + cleanup_all_ns + [ -n "$SOCAT_PID" ] && kill_process "$SOCAT_PID" + rm -fr "$TTY_DIR" +} + +trap cleanup EXIT + +ppp_common_init +modprobe -q ppp_async + +# Create the virtual serial device +socat -d PTY,link="$TTY_SERVER",rawer PTY,link="$TTY_CLIENT",rawer & +SOCAT_PID=$! + +# Wait for symlinks to be created +slowwait 5 [ -L "$TTY_SERVER" ] + +# Start the PPP Server +ip netns exec "$NS_SERVER" pppd "$TTY_SERVER" 115200 \ + "$IP_SERVER":"$IP_CLIENT" \ + local noauth nodefaultroute debug + +# Start the PPP Client +ip netns exec "$NS_CLIENT" pppd "$TTY_CLIENT" 115200 \ + local noauth updetach nodefaultroute debug + +ppp_test_connectivity + +log_test "PPP async" + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/ppp/ppp_common.sh b/tools/testing/selftests/net/ppp/ppp_common.sh new file mode 100644 index 000000000000..40bbec317039 --- /dev/null +++ b/tools/testing/selftests/net/ppp/ppp_common.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2153 + +source ../lib.sh + +IP_SERVER="192.168.200.1" +IP_CLIENT="192.168.200.2" + +ppp_common_init() { + # Package requirements + require_command socat + require_command pppd + require_command iperf3 + + # Check for root privileges + if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit "$ksft_skip" + fi + + # Namespaces + setup_ns NS_SERVER NS_CLIENT +} + +ppp_check_addr() { + dev=$1 + addr=$2 + ns=$3 + ip -netns "$ns" -4 addr show dev "$dev" 2>/dev/null | grep -q "$addr" + return $? +} + +ppp_test_connectivity() { + slowwait 10 ppp_check_addr "ppp0" "$IP_CLIENT" "$NS_CLIENT" + + ip netns exec "$NS_CLIENT" ping -c 3 "$IP_SERVER" + check_err $? + + ip netns exec "$NS_SERVER" iperf3 -s -1 -D + wait_local_port_listen "$NS_SERVER" 5201 tcp + + ip netns exec "$NS_CLIENT" iperf3 -c "$IP_SERVER" -Z -t 2 + check_err $? +} diff --git a/tools/testing/selftests/net/ppp/pppoe-server-options b/tools/testing/selftests/net/ppp/pppoe-server-options new file mode 100644 index 000000000000..66c8c9d319e9 --- /dev/null +++ b/tools/testing/selftests/net/ppp/pppoe-server-options @@ -0,0 +1,2 @@ +noauth +noipdefault diff --git a/tools/testing/selftests/net/ppp/pppoe.sh b/tools/testing/selftests/net/ppp/pppoe.sh new file mode 100755 index 000000000000..f67b51df7490 --- /dev/null +++ b/tools/testing/selftests/net/ppp/pppoe.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ppp_common.sh + +VETH_SERVER="veth-server" +VETH_CLIENT="veth-client" +PPPOE_LOG=$(mktemp /tmp/pppoe.XXXXXX) + +# shellcheck disable=SC2329 +cleanup() { + cleanup_all_ns + [ -n "$SOCAT_PID" ] && kill_process "$SOCAT_PID" + rm -f "$PPPOE_LOG" +} + +trap cleanup EXIT + +require_command pppoe-server +ppp_common_init +modprobe -q pppoe + +# Try to locate pppoe.so plugin +PPPOE_PLUGIN=$(find /usr/{lib,lib64,lib32}/pppd/ -name pppoe.so -type f -print -quit) +if [ -z "$PPPOE_PLUGIN" ]; then + log_test_skip "PPPoE: pppoe.so plugin not found" + exit "$EXIT_STATUS" +fi + +# Create the veth pair +ip link add "$VETH_SERVER" type veth peer name "$VETH_CLIENT" +ip link set "$VETH_SERVER" netns "$NS_SERVER" +ip link set "$VETH_CLIENT" netns "$NS_CLIENT" +ip -netns "$NS_SERVER" link set "$VETH_SERVER" up +ip -netns "$NS_CLIENT" link set "$VETH_CLIENT" up + +# Start socat as syslog listener +socat -v -u UNIX-RECV:/dev/log OPEN:/dev/null > "$PPPOE_LOG" 2>&1 & +SOCAT_PID=$! + +# Start the PPP Server. Note that versions before 4.0 ignore -g option and +# instead use a hardcoded plugin path, so they may fail to find the plugin. +ip netns exec "$NS_SERVER" pppoe-server -I "$VETH_SERVER" \ + -L "$IP_SERVER" -R "$IP_CLIENT" -N 1 -q "$(command -v pppd)" \ + -k -O "$(pwd)/pppoe-server-options" -g "$PPPOE_PLUGIN" + +# Start the PPP Client +ip netns exec "$NS_CLIENT" pppd \ + local debug updetach noipdefault noauth nodefaultroute \ + plugin "$PPPOE_PLUGIN" nic-"$VETH_CLIENT" + +ppp_test_connectivity + +log_test "PPPoE" + +# Dump syslog messages if the test failed +if [ "$RET" -ne 0 ]; then + while read -r _sign _date _time len _from _to + do len=${len##*=} + read -n "$len" -r LINE + echo "$LINE" + done < "$PPPOE_LOG" +fi + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/proc_net_pktgen.c b/tools/testing/selftests/net/proc_net_pktgen.c new file mode 100644 index 000000000000..fab3b5c2e25d --- /dev/null +++ b/tools/testing/selftests/net/proc_net_pktgen.c @@ -0,0 +1,690 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * proc_net_pktgen: kselftest for /proc/net/pktgen interface + * + * Copyright (c) 2025 Peter Seiderer <ps.report@gmx.net> + * + */ +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> + +#include "kselftest_harness.h" + +static const char ctrl_cmd_stop[] = "stop"; +static const char ctrl_cmd_start[] = "start"; +static const char ctrl_cmd_reset[] = "reset"; + +static const char wrong_ctrl_cmd[] = "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"; + +static const char thr_cmd_add_loopback_0[] = "add_device lo@0"; +static const char thr_cmd_rm_loopback_0[] = "rem_device_all"; + +static const char wrong_thr_cmd[] = "forsureawrongcommand"; +static const char legacy_thr_cmd[] = "max_before_softirq"; + +static const char wrong_dev_cmd[] = "forsurewrongcommand"; +static const char dev_cmd_min_pkt_size_0[] = "min_pkt_size"; +static const char dev_cmd_min_pkt_size_1[] = "min_pkt_size "; +static const char dev_cmd_min_pkt_size_2[] = "min_pkt_size 0"; +static const char dev_cmd_min_pkt_size_3[] = "min_pkt_size 1"; +static const char dev_cmd_min_pkt_size_4[] = "min_pkt_size 100"; +static const char dev_cmd_min_pkt_size_5[] = "min_pkt_size=1001"; +static const char dev_cmd_min_pkt_size_6[] = "min_pkt_size =2002"; +static const char dev_cmd_min_pkt_size_7[] = "min_pkt_size= 3003"; +static const char dev_cmd_min_pkt_size_8[] = "min_pkt_size = 4004"; +static const char dev_cmd_max_pkt_size_0[] = "max_pkt_size 200"; +static const char dev_cmd_pkt_size_0[] = "pkt_size 300"; +static const char dev_cmd_imix_weights_0[] = "imix_weights 0,7 576,4 1500,1"; +static const char dev_cmd_imix_weights_1[] = "imix_weights 101,1 102,2 103,3 104,4 105,5 106,6 107,7 108,8 109,9 110,10 111,11 112,12 113,13 114,14 115,15 116,16 117,17 118,18 119,19 120,20"; +static const char dev_cmd_imix_weights_2[] = "imix_weights 100,1 102,2 103,3 104,4 105,5 106,6 107,7 108,8 109,9 110,10 111,11 112,12 113,13 114,14 115,15 116,16 117,17 118,18 119,19 120,20 121,21"; +static const char dev_cmd_imix_weights_3[] = "imix_weights"; +static const char dev_cmd_imix_weights_4[] = "imix_weights "; +static const char dev_cmd_imix_weights_5[] = "imix_weights 0"; +static const char dev_cmd_imix_weights_6[] = "imix_weights 0,"; +static const char dev_cmd_debug_0[] = "debug 1"; +static const char dev_cmd_debug_1[] = "debug 0"; +static const char dev_cmd_frags_0[] = "frags 100"; +static const char dev_cmd_delay_0[] = "delay 100"; +static const char dev_cmd_delay_1[] = "delay 2147483647"; +static const char dev_cmd_rate_0[] = "rate 0"; +static const char dev_cmd_rate_1[] = "rate 100"; +static const char dev_cmd_ratep_0[] = "ratep 0"; +static const char dev_cmd_ratep_1[] = "ratep 200"; +static const char dev_cmd_udp_src_min_0[] = "udp_src_min 1"; +static const char dev_cmd_udp_dst_min_0[] = "udp_dst_min 2"; +static const char dev_cmd_udp_src_max_0[] = "udp_src_max 3"; +static const char dev_cmd_udp_dst_max_0[] = "udp_dst_max 4"; +static const char dev_cmd_clone_skb_0[] = "clone_skb 1"; +static const char dev_cmd_clone_skb_1[] = "clone_skb 0"; +static const char dev_cmd_count_0[] = "count 100"; +static const char dev_cmd_src_mac_count_0[] = "src_mac_count 100"; +static const char dev_cmd_dst_mac_count_0[] = "dst_mac_count 100"; +static const char dev_cmd_burst_0[] = "burst 0"; +static const char dev_cmd_node_0[] = "node 100"; +static const char dev_cmd_xmit_mode_0[] = "xmit_mode start_xmit"; +static const char dev_cmd_xmit_mode_1[] = "xmit_mode netif_receive"; +static const char dev_cmd_xmit_mode_2[] = "xmit_mode queue_xmit"; +static const char dev_cmd_xmit_mode_3[] = "xmit_mode nonsense"; +static const char dev_cmd_flag_0[] = "flag UDPCSUM"; +static const char dev_cmd_flag_1[] = "flag !UDPCSUM"; +static const char dev_cmd_flag_2[] = "flag nonsense"; +static const char dev_cmd_dst_min_0[] = "dst_min 101.102.103.104"; +static const char dev_cmd_dst_0[] = "dst 101.102.103.104"; +static const char dev_cmd_dst_max_0[] = "dst_max 201.202.203.204"; +static const char dev_cmd_dst6_0[] = "dst6 2001:db38:1234:0000:0000:0000:0000:0000"; +static const char dev_cmd_dst6_min_0[] = "dst6_min 2001:db8:1234:0000:0000:0000:0000:0000"; +static const char dev_cmd_dst6_max_0[] = "dst6_max 2001:db8:1234:0000:0000:0000:0000:0000"; +static const char dev_cmd_src6_0[] = "src6 2001:db38:1234:0000:0000:0000:0000:0000"; +static const char dev_cmd_src_min_0[] = "src_min 101.102.103.104"; +static const char dev_cmd_src_max_0[] = "src_max 201.202.203.204"; +static const char dev_cmd_dst_mac_0[] = "dst_mac 01:02:03:04:05:06"; +static const char dev_cmd_src_mac_0[] = "src_mac 11:12:13:14:15:16"; +static const char dev_cmd_clear_counters_0[] = "clear_counters"; +static const char dev_cmd_flows_0[] = "flows 100"; +static const char dev_cmd_spi_0[] = "spi 100"; +static const char dev_cmd_flowlen_0[] = "flowlen 100"; +static const char dev_cmd_queue_map_min_0[] = "queue_map_min 1"; +static const char dev_cmd_queue_map_max_0[] = "queue_map_max 2"; +static const char dev_cmd_mpls_0[] = "mpls 00000001"; +static const char dev_cmd_mpls_1[] = "mpls 00000001,000000f2"; +static const char dev_cmd_mpls_2[] = "mpls 00000f00,00000f01,00000f02,00000f03,00000f04,00000f05,00000f06,00000f07,00000f08,00000f09,00000f0a,00000f0b,00000f0c,00000f0d,00000f0e,00000f0f"; +static const char dev_cmd_mpls_3[] = "mpls 00000f00,00000f01,00000f02,00000f03,00000f04,00000f05,00000f06,00000f07,00000f08,00000f09,00000f0a,00000f0b,00000f0c,00000f0d,00000f0e,00000f0f,00000f10"; +static const char dev_cmd_vlan_id_0[] = "vlan_id 1"; +static const char dev_cmd_vlan_p_0[] = "vlan_p 1"; +static const char dev_cmd_vlan_cfi_0[] = "vlan_cfi 1"; +static const char dev_cmd_vlan_id_1[] = "vlan_id 4096"; +static const char dev_cmd_svlan_id_0[] = "svlan_id 1"; +static const char dev_cmd_svlan_p_0[] = "svlan_p 1"; +static const char dev_cmd_svlan_cfi_0[] = "svlan_cfi 1"; +static const char dev_cmd_svlan_id_1[] = "svlan_id 4096"; +static const char dev_cmd_tos_0[] = "tos 0"; +static const char dev_cmd_tos_1[] = "tos 0f"; +static const char dev_cmd_tos_2[] = "tos 0ff"; +static const char dev_cmd_traffic_class_0[] = "traffic_class f0"; +static const char dev_cmd_skb_priority_0[] = "skb_priority 999"; + +FIXTURE(proc_net_pktgen) { + int ctrl_fd; + int thr_fd; + int dev_fd; +}; + +FIXTURE_SETUP(proc_net_pktgen) { + int r; + ssize_t len; + + r = system("modprobe pktgen"); + ASSERT_EQ(r, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?"); + + self->ctrl_fd = open("/proc/net/pktgen/pgctrl", O_RDWR); + ASSERT_GE(self->ctrl_fd, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?"); + + self->thr_fd = open("/proc/net/pktgen/kpktgend_0", O_RDWR); + ASSERT_GE(self->thr_fd, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?"); + + len = write(self->thr_fd, thr_cmd_add_loopback_0, sizeof(thr_cmd_add_loopback_0)); + ASSERT_EQ(len, sizeof(thr_cmd_add_loopback_0)) TH_LOG("device lo@0 already registered?"); + + self->dev_fd = open("/proc/net/pktgen/lo@0", O_RDWR); + ASSERT_GE(self->dev_fd, 0) TH_LOG("device entry for lo@0 missing?"); +} + +FIXTURE_TEARDOWN(proc_net_pktgen) { + int ret; + ssize_t len; + + ret = close(self->dev_fd); + EXPECT_EQ(ret, 0); + + len = write(self->thr_fd, thr_cmd_rm_loopback_0, sizeof(thr_cmd_rm_loopback_0)); + EXPECT_EQ(len, sizeof(thr_cmd_rm_loopback_0)); + + ret = close(self->thr_fd); + EXPECT_EQ(ret, 0); + + ret = close(self->ctrl_fd); + EXPECT_EQ(ret, 0); +} + +TEST_F(proc_net_pktgen, wrong_ctrl_cmd) { + for (int i = 0; i <= sizeof(wrong_ctrl_cmd); i++) { + ssize_t len; + + len = write(self->ctrl_fd, wrong_ctrl_cmd, i); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + } +} + +TEST_F(proc_net_pktgen, ctrl_cmd) { + ssize_t len; + + len = write(self->ctrl_fd, ctrl_cmd_stop, sizeof(ctrl_cmd_stop)); + EXPECT_EQ(len, sizeof(ctrl_cmd_stop)); + + len = write(self->ctrl_fd, ctrl_cmd_stop, sizeof(ctrl_cmd_stop) - 1); + EXPECT_EQ(len, sizeof(ctrl_cmd_stop) - 1); + + len = write(self->ctrl_fd, ctrl_cmd_start, sizeof(ctrl_cmd_start)); + EXPECT_EQ(len, sizeof(ctrl_cmd_start)); + + len = write(self->ctrl_fd, ctrl_cmd_start, sizeof(ctrl_cmd_start) - 1); + EXPECT_EQ(len, sizeof(ctrl_cmd_start) - 1); + + len = write(self->ctrl_fd, ctrl_cmd_reset, sizeof(ctrl_cmd_reset)); + EXPECT_EQ(len, sizeof(ctrl_cmd_reset)); + + len = write(self->ctrl_fd, ctrl_cmd_reset, sizeof(ctrl_cmd_reset) - 1); + EXPECT_EQ(len, sizeof(ctrl_cmd_reset) - 1); +} + +TEST_F(proc_net_pktgen, wrong_thr_cmd) { + for (int i = 0; i <= sizeof(wrong_thr_cmd); i++) { + ssize_t len; + + len = write(self->thr_fd, wrong_thr_cmd, i); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + } +} + +TEST_F(proc_net_pktgen, legacy_thr_cmd) { + for (int i = 0; i <= sizeof(legacy_thr_cmd); i++) { + ssize_t len; + + len = write(self->thr_fd, legacy_thr_cmd, i); + if (i < (sizeof(legacy_thr_cmd) - 1)) { + /* incomplete command string */ + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + } else { + /* complete command string without/with trailing '\0' */ + EXPECT_EQ(len, i); + } + } +} + +TEST_F(proc_net_pktgen, wrong_dev_cmd) { + for (int i = 0; i <= sizeof(wrong_dev_cmd); i++) { + ssize_t len; + + len = write(self->dev_fd, wrong_dev_cmd, i); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + } +} + +TEST_F(proc_net_pktgen, dev_cmd_min_pkt_size) { + ssize_t len; + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_0, sizeof(dev_cmd_min_pkt_size_0)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_0)); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_0, sizeof(dev_cmd_min_pkt_size_0) - 1); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_0) - 1); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_1, sizeof(dev_cmd_min_pkt_size_1)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_1)); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_1, sizeof(dev_cmd_min_pkt_size_1) - 1); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_1) - 1); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_2, sizeof(dev_cmd_min_pkt_size_2)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_2)); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_2, sizeof(dev_cmd_min_pkt_size_2) - 1); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_2) - 1); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_3, sizeof(dev_cmd_min_pkt_size_3)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_3)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_4, sizeof(dev_cmd_min_pkt_size_4)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_4)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_5, sizeof(dev_cmd_min_pkt_size_5)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_5)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_6, sizeof(dev_cmd_min_pkt_size_6)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_6)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_7, sizeof(dev_cmd_min_pkt_size_7)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_7)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_8, sizeof(dev_cmd_min_pkt_size_8)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_8)); +} + +TEST_F(proc_net_pktgen, dev_cmd_max_pkt_size) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_max_pkt_size_0, sizeof(dev_cmd_max_pkt_size_0)); + EXPECT_EQ(len, sizeof(dev_cmd_max_pkt_size_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_pkt_size) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_pkt_size_0, sizeof(dev_cmd_pkt_size_0)); + EXPECT_EQ(len, sizeof(dev_cmd_pkt_size_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_imix_weights) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_imix_weights_0, sizeof(dev_cmd_imix_weights_0)); + EXPECT_EQ(len, sizeof(dev_cmd_imix_weights_0)); + + len = write(self->dev_fd, dev_cmd_imix_weights_1, sizeof(dev_cmd_imix_weights_1)); + EXPECT_EQ(len, sizeof(dev_cmd_imix_weights_1)); + + len = write(self->dev_fd, dev_cmd_imix_weights_2, sizeof(dev_cmd_imix_weights_2)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, E2BIG); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_3, sizeof(dev_cmd_imix_weights_3)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_3, sizeof(dev_cmd_imix_weights_3) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_4, sizeof(dev_cmd_imix_weights_4)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_4, sizeof(dev_cmd_imix_weights_4) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_5, sizeof(dev_cmd_imix_weights_5)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_5, sizeof(dev_cmd_imix_weights_5) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_6, sizeof(dev_cmd_imix_weights_6)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_6, sizeof(dev_cmd_imix_weights_6) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); +} + +TEST_F(proc_net_pktgen, dev_cmd_debug) { + ssize_t len; + + /* debug on */ + len = write(self->dev_fd, dev_cmd_debug_0, sizeof(dev_cmd_debug_0)); + EXPECT_EQ(len, sizeof(dev_cmd_debug_0)); + + /* debug off */ + len = write(self->dev_fd, dev_cmd_debug_1, sizeof(dev_cmd_debug_1)); + EXPECT_EQ(len, sizeof(dev_cmd_debug_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_frags) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_frags_0, sizeof(dev_cmd_frags_0)); + EXPECT_EQ(len, sizeof(dev_cmd_frags_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_delay) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_delay_0, sizeof(dev_cmd_delay_0)); + EXPECT_EQ(len, sizeof(dev_cmd_delay_0)); + + len = write(self->dev_fd, dev_cmd_delay_1, sizeof(dev_cmd_delay_1)); + EXPECT_EQ(len, sizeof(dev_cmd_delay_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_rate) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_rate_0, sizeof(dev_cmd_rate_0)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + len = write(self->dev_fd, dev_cmd_rate_1, sizeof(dev_cmd_rate_1)); + EXPECT_EQ(len, sizeof(dev_cmd_rate_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_ratep) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_ratep_0, sizeof(dev_cmd_ratep_0)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + len = write(self->dev_fd, dev_cmd_ratep_1, sizeof(dev_cmd_ratep_1)); + EXPECT_EQ(len, sizeof(dev_cmd_ratep_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_udp_src_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_udp_src_min_0, sizeof(dev_cmd_udp_src_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_udp_src_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_udp_dst_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_udp_dst_min_0, sizeof(dev_cmd_udp_dst_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_udp_dst_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_udp_src_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_udp_src_max_0, sizeof(dev_cmd_udp_src_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_udp_src_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_udp_dst_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_udp_dst_max_0, sizeof(dev_cmd_udp_dst_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_udp_dst_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_clone_skb) { + ssize_t len; + + /* clone_skb on (gives EOPNOTSUPP on lo device) */ + len = write(self->dev_fd, dev_cmd_clone_skb_0, sizeof(dev_cmd_clone_skb_0)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EOPNOTSUPP); + + /* clone_skb off */ + len = write(self->dev_fd, dev_cmd_clone_skb_1, sizeof(dev_cmd_clone_skb_1)); + EXPECT_EQ(len, sizeof(dev_cmd_clone_skb_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_count) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_count_0, sizeof(dev_cmd_count_0)); + EXPECT_EQ(len, sizeof(dev_cmd_count_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src_mac_count) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src_mac_count_0, sizeof(dev_cmd_src_mac_count_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src_mac_count_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst_mac_count) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_mac_count_0, sizeof(dev_cmd_dst_mac_count_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_mac_count_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_burst) { + ssize_t len; + + /* burst off */ + len = write(self->dev_fd, dev_cmd_burst_0, sizeof(dev_cmd_burst_0)); + EXPECT_EQ(len, sizeof(dev_cmd_burst_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_node) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_node_0, sizeof(dev_cmd_node_0)); + EXPECT_EQ(len, sizeof(dev_cmd_node_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_xmit_mode) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_xmit_mode_0, sizeof(dev_cmd_xmit_mode_0)); + EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_0)); + + len = write(self->dev_fd, dev_cmd_xmit_mode_1, sizeof(dev_cmd_xmit_mode_1)); + EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_1)); + + len = write(self->dev_fd, dev_cmd_xmit_mode_2, sizeof(dev_cmd_xmit_mode_2)); + EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_2)); + + len = write(self->dev_fd, dev_cmd_xmit_mode_3, sizeof(dev_cmd_xmit_mode_3)); + EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_3)); +} + +TEST_F(proc_net_pktgen, dev_cmd_flag) { + ssize_t len; + + /* flag UDPCSUM on */ + len = write(self->dev_fd, dev_cmd_flag_0, sizeof(dev_cmd_flag_0)); + EXPECT_EQ(len, sizeof(dev_cmd_flag_0)); + + /* flag UDPCSUM off */ + len = write(self->dev_fd, dev_cmd_flag_1, sizeof(dev_cmd_flag_1)); + EXPECT_EQ(len, sizeof(dev_cmd_flag_1)); + + /* flag invalid */ + len = write(self->dev_fd, dev_cmd_flag_2, sizeof(dev_cmd_flag_2)); + EXPECT_EQ(len, sizeof(dev_cmd_flag_2)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_min_0, sizeof(dev_cmd_dst_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_0, sizeof(dev_cmd_dst_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_max_0, sizeof(dev_cmd_dst_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst6) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst6_0, sizeof(dev_cmd_dst6_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst6_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst6_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst6_min_0, sizeof(dev_cmd_dst6_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst6_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst6_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst6_max_0, sizeof(dev_cmd_dst6_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst6_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src6) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src6_0, sizeof(dev_cmd_src6_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src6_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src_min_0, sizeof(dev_cmd_src_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src_max_0, sizeof(dev_cmd_src_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst_mac) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_mac_0, sizeof(dev_cmd_dst_mac_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_mac_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src_mac) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src_mac_0, sizeof(dev_cmd_src_mac_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src_mac_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_clear_counters) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_clear_counters_0, sizeof(dev_cmd_clear_counters_0)); + EXPECT_EQ(len, sizeof(dev_cmd_clear_counters_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_flows) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_flows_0, sizeof(dev_cmd_flows_0)); + EXPECT_EQ(len, sizeof(dev_cmd_flows_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_spi) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_spi_0, sizeof(dev_cmd_spi_0)); + EXPECT_EQ(len, sizeof(dev_cmd_spi_0)) TH_LOG("CONFIG_XFRM not enabled?"); +} + +TEST_F(proc_net_pktgen, dev_cmd_flowlen) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_flowlen_0, sizeof(dev_cmd_flowlen_0)); + EXPECT_EQ(len, sizeof(dev_cmd_flowlen_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_queue_map_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_queue_map_min_0, sizeof(dev_cmd_queue_map_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_queue_map_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_queue_map_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_queue_map_max_0, sizeof(dev_cmd_queue_map_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_queue_map_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_mpls) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_mpls_0, sizeof(dev_cmd_mpls_0)); + EXPECT_EQ(len, sizeof(dev_cmd_mpls_0)); + + len = write(self->dev_fd, dev_cmd_mpls_1, sizeof(dev_cmd_mpls_1)); + EXPECT_EQ(len, sizeof(dev_cmd_mpls_1)); + + len = write(self->dev_fd, dev_cmd_mpls_2, sizeof(dev_cmd_mpls_2)); + EXPECT_EQ(len, sizeof(dev_cmd_mpls_2)); + + len = write(self->dev_fd, dev_cmd_mpls_3, sizeof(dev_cmd_mpls_3)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, E2BIG); +} + +TEST_F(proc_net_pktgen, dev_cmd_vlan_id) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_vlan_id_0, sizeof(dev_cmd_vlan_id_0)); + EXPECT_EQ(len, sizeof(dev_cmd_vlan_id_0)); + + len = write(self->dev_fd, dev_cmd_vlan_p_0, sizeof(dev_cmd_vlan_p_0)); + EXPECT_EQ(len, sizeof(dev_cmd_vlan_p_0)); + + len = write(self->dev_fd, dev_cmd_vlan_cfi_0, sizeof(dev_cmd_vlan_cfi_0)); + EXPECT_EQ(len, sizeof(dev_cmd_vlan_cfi_0)); + + len = write(self->dev_fd, dev_cmd_vlan_id_1, sizeof(dev_cmd_vlan_id_1)); + EXPECT_EQ(len, sizeof(dev_cmd_vlan_id_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_svlan_id) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_svlan_id_0, sizeof(dev_cmd_svlan_id_0)); + EXPECT_EQ(len, sizeof(dev_cmd_svlan_id_0)); + + len = write(self->dev_fd, dev_cmd_svlan_p_0, sizeof(dev_cmd_svlan_p_0)); + EXPECT_EQ(len, sizeof(dev_cmd_svlan_p_0)); + + len = write(self->dev_fd, dev_cmd_svlan_cfi_0, sizeof(dev_cmd_svlan_cfi_0)); + EXPECT_EQ(len, sizeof(dev_cmd_svlan_cfi_0)); + + len = write(self->dev_fd, dev_cmd_svlan_id_1, sizeof(dev_cmd_svlan_id_1)); + EXPECT_EQ(len, sizeof(dev_cmd_svlan_id_1)); +} + + +TEST_F(proc_net_pktgen, dev_cmd_tos) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_tos_0, sizeof(dev_cmd_tos_0)); + EXPECT_EQ(len, sizeof(dev_cmd_tos_0)); + + len = write(self->dev_fd, dev_cmd_tos_1, sizeof(dev_cmd_tos_1)); + EXPECT_EQ(len, sizeof(dev_cmd_tos_1)); + + len = write(self->dev_fd, dev_cmd_tos_2, sizeof(dev_cmd_tos_2)); + EXPECT_EQ(len, sizeof(dev_cmd_tos_2)); +} + + +TEST_F(proc_net_pktgen, dev_cmd_traffic_class) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_traffic_class_0, sizeof(dev_cmd_traffic_class_0)); + EXPECT_EQ(len, sizeof(dev_cmd_traffic_class_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_skb_priority) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_skb_priority_0, sizeof(dev_cmd_skb_priority_0)); + EXPECT_EQ(len, sizeof(dev_cmd_skb_priority_0)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 84c524357075..ab8d8b7e6cb0 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -54,7 +54,7 @@ #include <unistd.h> #include "psock_lib.h" -#include "../kselftest.h" +#include "kselftest.h" #define RING_NUM_FRAMES 20 diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h index 6e4fef560873..067265b0a554 100644 --- a/tools/testing/selftests/net/psock_lib.h +++ b/tools/testing/selftests/net/psock_lib.h @@ -22,10 +22,6 @@ #define PORT_BASE 8000 -#ifndef __maybe_unused -# define __maybe_unused __attribute__ ((__unused__)) -#endif - static __maybe_unused void pair_udp_setfilter(int fd) { /* the filter below checks for all of the following conditions that diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c index 404a2ce759ab..7caf3135448d 100644 --- a/tools/testing/selftests/net/psock_tpacket.c +++ b/tools/testing/selftests/net/psock_tpacket.c @@ -12,7 +12,7 @@ * * Datapath: * Open a pair of packet sockets and send resp. receive an a priori known - * packet pattern accross the sockets and check if it was received resp. + * packet pattern across the sockets and check if it was received resp. * sent correctly. Fanout in combination with RX_RING is currently not * tested here. * @@ -22,6 +22,7 @@ * - TPACKET_V3: RX_RING */ +#undef NDEBUG #include <stdio.h> #include <stdlib.h> #include <sys/types.h> @@ -33,7 +34,6 @@ #include <ctype.h> #include <fcntl.h> #include <unistd.h> -#include <bits/wordsize.h> #include <net/ethernet.h> #include <netinet/ip.h> #include <arpa/inet.h> @@ -46,7 +46,7 @@ #include "psock_lib.h" -#include "../kselftest.h" +#include "kselftest.h" #ifndef bug_on # define bug_on(cond) assert(!(cond)) @@ -785,7 +785,7 @@ static int test_kernel_bit_width(void) static int test_user_bit_width(void) { - return __WORDSIZE; + return sizeof(long) * 8; } static const char *tpacket_str[] = { diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile index 612a7219990e..fe363be8e358 100644 --- a/tools/testing/selftests/net/rds/Makefile +++ b/tools/testing/selftests/net/rds/Makefile @@ -5,8 +5,15 @@ all: TEST_PROGS := run.sh -TEST_FILES := include.sh test.py +TEST_FILES := \ + include.sh \ + settings \ + test.py \ +# end of TEST_FILES -EXTRA_CLEAN := /tmp/rds_logs include.sh +EXTRA_CLEAN := \ + include.sh \ + /tmp/rds_logs \ +# end of EXTRA_CLEAN include ../../lib.mk diff --git a/tools/testing/selftests/net/rds/README.txt b/tools/testing/selftests/net/rds/README.txt index cbde2951ab13..c6fe003d503b 100644 --- a/tools/testing/selftests/net/rds/README.txt +++ b/tools/testing/selftests/net/rds/README.txt @@ -31,8 +31,11 @@ EXAMPLE: # Alternatly create a gcov disabled .config tools/testing/selftests/net/rds/config.sh + # Config paths may also be specified with the -c flag + tools/testing/selftests/net/rds/config.sh -c .config.local + # build the kernel - vng --build --config tools/testing/selftests/net/config + vng --build --config .config # launch the tests in a VM vng -v --rwdir ./ --run . --user root --cpus 4 -- \ diff --git a/tools/testing/selftests/net/rds/config b/tools/testing/selftests/net/rds/config new file mode 100644 index 000000000000..97db7ecb892a --- /dev/null +++ b/tools/testing/selftests/net/rds/config @@ -0,0 +1,5 @@ +CONFIG_NET_NS=y +CONFIG_NET_SCH_NETEM=y +CONFIG_RDS=y +CONFIG_RDS_TCP=y +CONFIG_VETH=y diff --git a/tools/testing/selftests/net/rds/config.sh b/tools/testing/selftests/net/rds/config.sh index 791c8dbe1095..29a79314dd60 100755 --- a/tools/testing/selftests/net/rds/config.sh +++ b/tools/testing/selftests/net/rds/config.sh @@ -6,15 +6,20 @@ set -u set -x unset KBUILD_OUTPUT +CONF_FILE="" +FLAGS=() GENERATE_GCOV_REPORT=0 -while getopts "g" opt; do +while getopts "gc:" opt; do case ${opt} in g) GENERATE_GCOV_REPORT=1 ;; + c) + CONF_FILE=$OPTARG + ;; :) - echo "USAGE: config.sh [-g]" + echo "USAGE: config.sh [-g] [-c config]" exit 1 ;; ?) @@ -24,30 +29,32 @@ while getopts "g" opt; do esac done -CONF_FILE="tools/testing/selftests/net/config" +if [[ "$CONF_FILE" != "" ]]; then + FLAGS=(--file "$CONF_FILE") +fi # no modules -scripts/config --file "$CONF_FILE" --disable CONFIG_MODULES +scripts/config "${FLAGS[@]}" --disable CONFIG_MODULES # enable RDS -scripts/config --file "$CONF_FILE" --enable CONFIG_RDS -scripts/config --file "$CONF_FILE" --enable CONFIG_RDS_TCP +scripts/config "${FLAGS[@]}" --enable CONFIG_RDS +scripts/config "${FLAGS[@]}" --enable CONFIG_RDS_TCP if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then # instrument RDS and only RDS - scripts/config --file "$CONF_FILE" --enable CONFIG_GCOV_KERNEL - scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL - scripts/config --file "$CONF_FILE" --enable GCOV_PROFILE_RDS + scripts/config "${FLAGS[@]}" --enable CONFIG_GCOV_KERNEL + scripts/config "${FLAGS[@]}" --disable GCOV_PROFILE_ALL + scripts/config "${FLAGS[@]}" --enable GCOV_PROFILE_RDS else - scripts/config --file "$CONF_FILE" --disable CONFIG_GCOV_KERNEL - scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL - scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_RDS + scripts/config "${FLAGS[@]}" --disable CONFIG_GCOV_KERNEL + scripts/config "${FLAGS[@]}" --disable GCOV_PROFILE_ALL + scripts/config "${FLAGS[@]}" --disable GCOV_PROFILE_RDS fi # need network namespaces to run tests with veth network interfaces -scripts/config --file "$CONF_FILE" --enable CONFIG_NET_NS -scripts/config --file "$CONF_FILE" --enable CONFIG_VETH +scripts/config "${FLAGS[@]}" --enable CONFIG_NET_NS +scripts/config "${FLAGS[@]}" --enable CONFIG_VETH # simulate packet loss -scripts/config --file "$CONF_FILE" --enable CONFIG_NET_SCH_NETEM +scripts/config "${FLAGS[@]}" --enable CONFIG_NET_SCH_NETEM diff --git a/tools/testing/selftests/net/rds/run.sh b/tools/testing/selftests/net/rds/run.sh index 8aee244f582a..897d17d1b8db 100755 --- a/tools/testing/selftests/net/rds/run.sh +++ b/tools/testing/selftests/net/rds/run.sh @@ -19,6 +19,9 @@ if test -f "$build_include"; then build_dir="$mk_build_dir" fi +# Source settings for timeout value (also used by ksft runner) +source "$current_dir"/settings + # This test requires kernel source and the *.gcda data therein # Locate the top level of the kernel source, and the net/rds # subfolder with the appropriate *.gcno object files @@ -194,8 +197,8 @@ set +e echo running RDS tests... echo Traces will be logged to "$TRACE_FILE" rm -f "$TRACE_FILE" -strace -T -tt -o "$TRACE_FILE" python3 "$(dirname "$0")/test.py" --timeout 400 -d "$LOG_DIR" \ - -l "$PLOSS" -c "$PCORRUPT" -u "$PDUP" +strace -T -tt -o "$TRACE_FILE" python3 "$(dirname "$0")/test.py" \ + --timeout "$timeout" -d "$LOG_DIR" -l "$PLOSS" -c "$PCORRUPT" -u "$PDUP" test_rc=$? dmesg > "${LOG_DIR}/dmesg.out" diff --git a/tools/testing/selftests/net/rds/settings b/tools/testing/selftests/net/rds/settings new file mode 100644 index 000000000000..d2009a64589c --- /dev/null +++ b/tools/testing/selftests/net/rds/settings @@ -0,0 +1 @@ +timeout=400 diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py index 4a7178d11193..93e23e8b256c 100755 --- a/tools/testing/selftests/net/rds/test.py +++ b/tools/testing/selftests/net/rds/test.py @@ -11,9 +11,8 @@ import signal import socket import subprocess import sys -import atexit -from pwd import getpwuid -from os import stat +import tempfile +import shutil # Allow utils module to be imported from different directory this_dir = os.path.dirname(os.path.realpath(__file__)) @@ -23,45 +22,54 @@ from lib.py.utils import ip libc = ctypes.cdll.LoadLibrary('libc.so.6') setns = libc.setns -net0 = 'net0' -net1 = 'net1' +NET0 = 'net0' +NET1 = 'net1' -veth0 = 'veth0' -veth1 = 'veth1' +VETH0 = 'veth0' +VETH1 = 'veth1' # Helper function for creating a socket inside a network namespace. # We need this because otherwise RDS will detect that the two TCP # sockets are on the same interface and use the loop transport instead # of the TCP transport. -def netns_socket(netns, *args): +def netns_socket(netns, *sock_args): + """ + Creates sockets inside of network namespace + + :param netns: the name of the network namespace + :param sock_args: socket family and type + """ u0, u1 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET) child = os.fork() if child == 0: # change network namespace - with open(f'/var/run/netns/{netns}') as f: + with open(f'/var/run/netns/{netns}', encoding='utf-8') as f: try: - ret = setns(f.fileno(), 0) + setns(f.fileno(), 0) except IOError as e: print(e.errno) print(e) # create socket in target namespace - s = socket.socket(*args) + sock = socket.socket(*sock_args) # send resulting socket to parent - socket.send_fds(u0, [], [s.fileno()]) + socket.send_fds(u0, [], [sock.fileno()]) sys.exit(0) # receive socket from child - _, s, _, _ = socket.recv_fds(u1, 0, 1) + _, fds, _, _ = socket.recv_fds(u1, 0, 1) os.waitpid(child, 0) u0.close() u1.close() - return socket.fromfd(s[0], *args) + return socket.fromfd(fds[0], *sock_args) -def signal_handler(sig, frame): +def signal_handler(_sig, _frame): + """ + Test timed out signal handler + """ print('Test timed out') sys.exit(1) @@ -81,13 +89,13 @@ parser.add_argument('-u', '--duplicate', help="Simulate tcp packet duplication", type=int, default=0) args = parser.parse_args() logdir=args.logdir -packet_loss=str(args.loss)+'%' -packet_corruption=str(args.corruption)+'%' -packet_duplicate=str(args.duplicate)+'%' +PACKET_LOSS=str(args.loss)+'%' +PACKET_CORRUPTION=str(args.corruption)+'%' +PACKET_DUPLICATE=str(args.duplicate)+'%' -ip(f"netns add {net0}") -ip(f"netns add {net1}") -ip(f"link add type veth") +ip(f"netns add {NET0}") +ip(f"netns add {NET1}") +ip("link add type veth") addrs = [ # we technically don't need different port numbers, but this will @@ -99,38 +107,38 @@ addrs = [ # move interfaces to separate namespaces so they can no longer be # bound directly; this prevents rds from switching over from the tcp # transport to the loop transport. -ip(f"link set {veth0} netns {net0} up") -ip(f"link set {veth1} netns {net1} up") +ip(f"link set {VETH0} netns {NET0} up") +ip(f"link set {VETH1} netns {NET1} up") # add addresses -ip(f"-n {net0} addr add {addrs[0][0]}/32 dev {veth0}") -ip(f"-n {net1} addr add {addrs[1][0]}/32 dev {veth1}") +ip(f"-n {NET0} addr add {addrs[0][0]}/32 dev {VETH0}") +ip(f"-n {NET1} addr add {addrs[1][0]}/32 dev {VETH1}") # add routes -ip(f"-n {net0} route add {addrs[1][0]}/32 dev {veth0}") -ip(f"-n {net1} route add {addrs[0][0]}/32 dev {veth1}") +ip(f"-n {NET0} route add {addrs[1][0]}/32 dev {VETH0}") +ip(f"-n {NET1} route add {addrs[0][0]}/32 dev {VETH1}") # sanity check that our two interfaces/addresses are correctly set up # and communicating by doing a single ping -ip(f"netns exec {net0} ping -c 1 {addrs[1][0]}") +ip(f"netns exec {NET0} ping -c 1 {addrs[1][0]}") # Start a packet capture on each network -for net in [net0, net1]: - tcpdump_pid = os.fork() - if tcpdump_pid == 0: - pcap = logdir+'/'+net+'.pcap' - subprocess.check_call(['touch', pcap]) - user = getpwuid(stat(pcap).st_uid).pw_name - ip(f"netns exec {net} /usr/sbin/tcpdump -Z {user} -i any -w {pcap}") - sys.exit(0) +tcpdump_procs = [] +for net in [NET0, NET1]: + pcap = logdir+'/'+net+'.pcap' + fd, pcap_tmp = tempfile.mkstemp(suffix=".pcap", prefix=f"{net}-", dir="/tmp") + p = subprocess.Popen( + ['ip', 'netns', 'exec', net, + '/usr/sbin/tcpdump', '-i', 'any', '-w', pcap_tmp]) + tcpdump_procs.append((p, pcap_tmp, pcap, fd)) # simulate packet loss, duplication and corruption -for net, iface in [(net0, veth0), (net1, veth1)]: +for net, iface in [(NET0, VETH0), (NET1, VETH1)]: ip(f"netns exec {net} /usr/sbin/tc qdisc add dev {iface} root netem \ - corrupt {packet_corruption} loss {packet_loss} duplicate \ - {packet_duplicate}") + corrupt {PACKET_CORRUPTION} loss {PACKET_LOSS} duplicate \ + {PACKET_DUPLICATE}") # add a timeout if args.timeout > 0: @@ -138,8 +146,8 @@ if args.timeout > 0: signal.signal(signal.SIGALRM, signal_handler) sockets = [ - netns_socket(net0, socket.AF_RDS, socket.SOCK_SEQPACKET), - netns_socket(net1, socket.AF_RDS, socket.SOCK_SEQPACKET), + netns_socket(NET0, socket.AF_RDS, socket.SOCK_SEQPACKET), + netns_socket(NET1, socket.AF_RDS, socket.SOCK_SEQPACKET), ] for s, addr in zip(sockets, addrs): @@ -150,9 +158,7 @@ fileno_to_socket = { s.fileno(): s for s in sockets } -addr_to_socket = { - addr: s for addr, s in zip(addrs, sockets) -} +addr_to_socket = dict(zip(addrs, sockets)) socket_to_addr = { s: addr for addr, s in zip(addrs, sockets) @@ -166,14 +172,14 @@ ep = select.epoll() for s in sockets: ep.register(s, select.EPOLLRDNORM) -n = 50000 +NUM_PACKETS = 50000 nr_send = 0 nr_recv = 0 -while nr_send < n: +while nr_send < NUM_PACKETS: # Send as much as we can without blocking print("sending...", nr_send, nr_recv) - while nr_send < n: + while nr_send < NUM_PACKETS: send_data = hashlib.sha256( f'packet {nr_send}'.encode('utf-8')).hexdigest().encode('utf-8') @@ -212,7 +218,7 @@ while nr_send < n: break # exercise net/rds/tcp.c:rds_tcp_sysctl_reset() - for net in [net0, net1]: + for net in [NET0, NET1]: ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_rcvbuf=10000") ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_sndbuf=10000") @@ -242,7 +248,11 @@ for s in sockets: print(f"getsockopt(): {nr_success}/{nr_error}") print("Stopping network packet captures") -subprocess.check_call(['killall', '-q', 'tcpdump']) +for p, pcap_tmp, pcap, fd in tcpdump_procs: + p.terminate() + p.wait() + os.close(fd) + shutil.move(pcap_tmp, pcap) # We're done sending and receiving stuff, now let's check if what # we received is what we sent. diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c index 066efd30e294..5aad27a0d13a 100644 --- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c +++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c @@ -22,7 +22,7 @@ #include <sys/socket.h> #include <sys/types.h> #include <unistd.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" struct reuse_opts { int reuseaddr[2]; @@ -112,7 +112,7 @@ TEST(reuseaddr_ports_exhausted_reusable_same_euid) ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); if (opts->reuseport[0] && opts->reuseport[1]) { - EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened."); + EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets successfully listened."); } else { EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations."); } diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c index b8475cb29be7..1c43401a1c80 100644 --- a/tools/testing/selftests/net/reuseport_addr_any.c +++ b/tools/testing/selftests/net/reuseport_addr_any.c @@ -9,7 +9,6 @@ #include <arpa/inet.h> #include <errno.h> #include <error.h> -#include <linux/dccp.h> #include <linux/in.h> #include <linux/unistd.h> #include <stdbool.h> @@ -21,10 +20,6 @@ #include <sys/socket.h> #include <unistd.h> -#ifndef SOL_DCCP -#define SOL_DCCP 269 -#endif - static const char *IP4_ADDR = "127.0.0.1"; static const char *IP6_ADDR = "::1"; static const char *IP4_MAPPED6 = "::ffff:127.0.0.1"; @@ -86,15 +81,6 @@ static void build_rcv_fd(int family, int proto, int *rcv_fds, int count, if (proto == SOCK_STREAM && listen(rcv_fds[i], 10)) error(1, errno, "tcp: failed to listen on receive port"); - else if (proto == SOCK_DCCP) { - if (setsockopt(rcv_fds[i], SOL_DCCP, - DCCP_SOCKOPT_SERVICE, - &(int) {htonl(42)}, sizeof(int))) - error(1, errno, "failed to setsockopt"); - - if (listen(rcv_fds[i], 10)) - error(1, errno, "dccp: failed to listen on receive port"); - } } } @@ -148,11 +134,6 @@ static int connect_and_send(int family, int proto) if (fd < 0) error(1, errno, "failed to create send socket"); - if (proto == SOCK_DCCP && - setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE, - &(int){htonl(42)}, sizeof(int))) - error(1, errno, "failed to setsockopt"); - if (bind(fd, saddr, sz)) error(1, errno, "failed to bind send socket"); @@ -175,7 +156,7 @@ static int receive_once(int epfd, int proto) if (i < 0) error(1, errno, "epoll_wait failed"); - if (proto == SOCK_STREAM || proto == SOCK_DCCP) { + if (proto == SOCK_STREAM) { fd = accept(ev.data.fd, NULL, NULL); if (fd < 0) error(1, errno, "failed to accept"); @@ -243,20 +224,6 @@ static void run_one_test(int fam_send, int fam_rcv, int proto, static void test_proto(int proto, const char *proto_str) { - if (proto == SOCK_DCCP) { - int test_fd; - - test_fd = socket(AF_INET, proto, 0); - if (test_fd < 0) { - if (errno == ESOCKTNOSUPPORT) { - fprintf(stderr, "DCCP not supported: skipping DCCP tests\n"); - return; - } else - error(1, errno, "failed to create a DCCP socket"); - } - close(test_fd); - } - fprintf(stderr, "%s IPv4 ... ", proto_str); run_one_test(AF_INET, AF_INET, proto, IP4_ADDR); @@ -271,7 +238,6 @@ int main(void) { test_proto(SOCK_DGRAM, "UDP"); test_proto(SOCK_STREAM, "TCP"); - test_proto(SOCK_DCCP, "DCCP"); fprintf(stderr, "SUCCESS\n"); return 0; diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index 65aea27d761c..12e48b97b862 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -23,8 +23,9 @@ #include <sys/socket.h> #include <sys/resource.h> #include <unistd.h> +#include <sched.h> -#include "../kselftest.h" +#include "kselftest.h" struct test_params { int recv_family; @@ -455,8 +456,18 @@ static __attribute__((destructor)) void main_dtor(void) setrlimit(RLIMIT_MEMLOCK, &rlim_old); } +static void setup_netns(void) +{ + if (unshare(CLONE_NEWNET)) + error(1, errno, "failed to unshare netns"); + if (system("ip link set lo up")) + error(1, 0, "failed to bring up lo interface in netns"); +} + int main(void) { + setup_netns(); + fprintf(stderr, "---- IPv4 UDP ----\n"); /* NOTE: UDP socket lookups traverse a different code path when there * are > 10 sockets in a group. Run the bpf test through both paths. diff --git a/tools/testing/selftests/net/reuseport_bpf_cpu.c b/tools/testing/selftests/net/reuseport_bpf_cpu.c index 2d646174729f..ddfe92f6597a 100644 --- a/tools/testing/selftests/net/reuseport_bpf_cpu.c +++ b/tools/testing/selftests/net/reuseport_bpf_cpu.c @@ -228,10 +228,20 @@ static void test(int *rcv_fd, int len, int family, int proto) close(rcv_fd[cpu]); } +static void setup_netns(void) +{ + if (unshare(CLONE_NEWNET)) + error(1, errno, "failed to unshare netns"); + if (system("ip link set lo up")) + error(1, 0, "failed to bring up lo interface in netns"); +} + int main(void) { int *rcv_fd, cpus; + setup_netns(); + cpus = sysconf(_SC_NPROCESSORS_ONLN); if (cpus <= 0) error(1, errno, "failed counting cpus"); diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c index c9ba36aa688e..8ec52fc5ef41 100644 --- a/tools/testing/selftests/net/reuseport_bpf_numa.c +++ b/tools/testing/selftests/net/reuseport_bpf_numa.c @@ -23,7 +23,7 @@ #include <unistd.h> #include <numa.h> -#include "../kselftest.h" +#include "kselftest.h" static const int PORT = 8888; @@ -230,10 +230,20 @@ static void test(int *rcv_fd, int len, int family, int proto) close(rcv_fd[node]); } +static void setup_netns(void) +{ + if (unshare(CLONE_NEWNET)) + error(1, errno, "failed to unshare netns"); + if (system("ip link set lo up")) + error(1, 0, "failed to bring up lo interface in netns"); +} + int main(void) { int *rcv_fd, nodes; + setup_netns(); + if (numa_available() < 0) ksft_exit_skip("no numa api support\n"); diff --git a/tools/testing/selftests/net/reuseport_dualstack.c b/tools/testing/selftests/net/reuseport_dualstack.c index fb7a59ed759e..0eaf739d0c85 100644 --- a/tools/testing/selftests/net/reuseport_dualstack.c +++ b/tools/testing/selftests/net/reuseport_dualstack.c @@ -25,6 +25,7 @@ #include <sys/types.h> #include <sys/socket.h> #include <unistd.h> +#include <sched.h> static const int PORT = 8888; @@ -156,10 +157,20 @@ static void test(int *rcv_fds, int count, int proto) close(epfd); } +static void setup_netns(void) +{ + if (unshare(CLONE_NEWNET)) + error(1, errno, "failed to unshare netns"); + if (system("ip link set lo up")) + error(1, 0, "failed to bring up lo interface in netns"); +} + int main(void) { int rcv_fds[32], i; + setup_netns(); + fprintf(stderr, "---- UDP IPv4 created before IPv6 ----\n"); build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 5); build_rcv_fd(AF_INET6, SOCK_DGRAM, &(rcv_fds[5]), 5); diff --git a/tools/testing/selftests/net/route_hint.sh b/tools/testing/selftests/net/route_hint.sh new file mode 100755 index 000000000000..2db01ece0cc1 --- /dev/null +++ b/tools/testing/selftests/net/route_hint.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test ensures directed broadcast routes use dst hint mechanism + +source lib.sh + +CLIENT_IP4="192.168.0.1" +SERVER_IP4="192.168.0.2" +BROADCAST_ADDRESS="192.168.0.255" + +setup() { + setup_ns CLIENT_NS SERVER_NS + + ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}" + + ip -net "${CLIENT_NS}" link set link0 up + ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}/24" dev link0 + + ip -net "${SERVER_NS}" link set link1 up + ip -net "${SERVER_NS}" addr add "${SERVER_IP4}/24" dev link1 + + ip netns exec "${CLIENT_NS}" ethtool -K link0 tcp-segmentation-offload off + ip netns exec "${SERVER_NS}" sh -c "echo 500000000 > /sys/class/net/link1/gro_flush_timeout" + ip netns exec "${SERVER_NS}" sh -c "echo 1 > /sys/class/net/link1/napi_defer_hard_irqs" + ip netns exec "${SERVER_NS}" ethtool -K link1 generic-receive-offload on +} + +cleanup() { + ip -net "${SERVER_NS}" link del link1 + cleanup_ns "${CLIENT_NS}" "${SERVER_NS}" +} + +directed_bcast_hint_test() +{ + local rc=0 + + echo "Testing for directed broadcast route hint" + + orig_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd') + ip netns exec "${CLIENT_NS}" mausezahn link0 -a own -b bcast -A "${CLIENT_IP4}" \ + -B "${BROADCAST_ADDRESS}" -c1 -t tcp "sp=1-100,dp=1234,s=1,a=0" -p 5 -q + sleep 1 + new_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd') + + res=$(echo "${new_in_brd} - ${orig_in_brd}" | bc) + + if [ "${res}" -lt 100 ]; then + echo "[ OK ]" + rc="${ksft_pass}" + else + echo "[FAIL] expected in_brd to be under 100, got ${res}" + rc="${ksft_fail}" + fi + + return "${rc}" +} + +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit "${ksft_skip}" +fi + +if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test without jq tool" + exit "${ksft_skip}" +fi + +if [ ! -x "$(command -v bc)" ]; then + echo "SKIP: Could not run test without bc tool" + exit "${ksft_skip}" +fi + +trap cleanup EXIT + +setup + +directed_bcast_hint_test +exit $? diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh index 4287a8529890..b200019b3c80 100755 --- a/tools/testing/selftests/net/rps_default_mask.sh +++ b/tools/testing/selftests/net/rps_default_mask.sh @@ -54,16 +54,16 @@ cleanup echo 1 > /proc/sys/net/core/rps_default_mask setup -chk_rps "changing rps_default_mask dont affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK +chk_rps "changing rps_default_mask doesn't affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK echo 3 > /proc/sys/net/core/rps_default_mask -chk_rps "changing rps_default_mask dont affect existing netns" $NETNS lo 0 +chk_rps "changing rps_default_mask doesn't affect existing netns" $NETNS lo 0 ip link add name $VETH type veth peer netns $NETNS name $VETH ip link set dev $VETH up ip -n $NETNS link set dev $VETH up -chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3 -chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0 +chk_rps "changing rps_default_mask affects newly created devices" "" $VETH 3 +chk_rps "changing rps_default_mask doesn't affect newly child netns[II]" $NETNS $VETH 0 ip link del dev $VETH ip netns del $NETNS @@ -72,8 +72,8 @@ chk_rps "rps_default_mask is 0 by default in child netns" "$NETNS" lo 0 ip netns exec $NETNS sysctl -qw net.core.rps_default_mask=1 ip link add name $VETH type veth peer netns $NETNS name $VETH -chk_rps "changing rps_default_mask in child ns don't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK +chk_rps "changing rps_default_mask in child ns doesn't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK chk_rps "changing rps_default_mask in child ns affects new childns devices" $NETNS $VETH 1 -chk_rps "changing rps_default_mask in child ns don't affect existing devices" $NETNS lo 0 +chk_rps "changing rps_default_mask in child ns doesn't affect existing devices" $NETNS lo 0 exit $ret diff --git a/tools/testing/selftests/net/rtnetlink.py b/tools/testing/selftests/net/rtnetlink.py new file mode 100755 index 000000000000..e9ad5e88da97 --- /dev/null +++ b/tools/testing/selftests/net/rtnetlink.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_exit, ksft_run, ksft_ge, RtnlAddrFamily +import socket + +IPV4_ALL_HOSTS_MULTICAST = b'\xe0\x00\x00\x01' + +def dump_mcaddr_check(rtnl: RtnlAddrFamily) -> None: + """ + Verify that at least one interface has the IPv4 all-hosts multicast address. + At least the loopback interface should have this address. + """ + + addresses = rtnl.getmulticast({"ifa-family": socket.AF_INET}, dump=True) + + all_host_multicasts = [ + addr for addr in addresses if addr['multicast'] == IPV4_ALL_HOSTS_MULTICAST + ] + + ksft_ge(len(all_host_multicasts), 1, + "No interface found with the IPv4 all-hosts multicast address") + +def main() -> None: + rtnl = RtnlAddrFamily() + ksft_run([dump_mcaddr_check], args=(rtnl, )) + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 2e8243a65b50..5a5ff88321d5 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -8,6 +8,7 @@ ALL_TESTS=" kci_test_polrouting kci_test_route_get kci_test_addrlft + kci_test_addrlft_route_cleanup kci_test_promote_secondaries kci_test_tc kci_test_gre @@ -21,15 +22,18 @@ ALL_TESTS=" kci_test_vrf kci_test_encap kci_test_macsec + kci_test_macsec_vlan kci_test_ipsec kci_test_ipsec_offload kci_test_fdb_get kci_test_fdb_del kci_test_neigh_get + kci_test_neigh_update kci_test_bridge_parent_id kci_test_address_proto kci_test_enslave_bonding kci_test_mngtmpaddr + kci_test_operstate " devdummy="test-dummy0" @@ -291,6 +295,17 @@ kci_test_route_get() end_test "PASS: route get" } +check_addr_not_exist() +{ + dev=$1 + addr=$2 + if ip addr show dev $dev | grep -q $addr; then + return 1 + else + return 0 + fi +} + kci_test_addrlft() { for i in $(seq 10 100) ;do @@ -298,9 +313,10 @@ kci_test_addrlft() run_cmd ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1)) done - sleep 5 - run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy" - if [ $? -eq 0 ]; then + slowwait 5 check_addr_not_exist "$devdummy" "10.23.11." + if [ $? -eq 1 ]; then + # troubleshoot the reason for our failure + run_cmd ip addr show dev "$devdummy" check_err 1 end_test "FAIL: preferred_lft addresses remaining" return @@ -309,8 +325,32 @@ kci_test_addrlft() end_test "PASS: preferred_lft addresses have expired" } +kci_test_addrlft_route_cleanup() +{ + local ret=0 + local test_addr="2001:db8:99::1/64" + local test_prefix="2001:db8:99::/64" + + run_cmd ip -6 addr add $test_addr dev "$devdummy" valid_lft 300 preferred_lft 300 + run_cmd_grep "$test_prefix proto kernel" ip -6 route show dev "$devdummy" + run_cmd ip -6 addr del $test_addr dev "$devdummy" + run_cmd_grep_fail "$test_prefix" ip -6 route show dev "$devdummy" + + if [ $ret -ne 0 ]; then + end_test "FAIL: route not cleaned up when address with valid_lft deleted" + return 1 + fi + + end_test "PASS: route cleaned up when address with valid_lft deleted" +} + kci_test_promote_secondaries() { + run_cmd ifconfig "$devdummy" + if [ $ret -ne 0 ]; then + end_test "SKIP: ifconfig not installed" + return $ksft_skip + fi promote=$(sysctl -n net.ipv4.conf.$devdummy.promote_secondaries) sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=1 @@ -507,7 +547,7 @@ kci_test_encap_fou() run_cmd_fail ip -netns "$testns" fou del port 9999 run_cmd ip -netns "$testns" fou del port 7777 if [ $ret -ne 0 ]; then - end_test "FAIL: fou"s + end_test "FAIL: fou" return 1 fi @@ -561,6 +601,41 @@ kci_test_macsec() end_test "PASS: macsec" } +# Test __dev_set_rx_mode call from dev_uc_add under addr_list_lock spinlock. +# Make sure __dev_set_promiscuity is not grabbing (sleeping) netdev instance +# lock. +# https://lore.kernel.org/netdev/2aff4342b0f5b1539c02ffd8df4c7e58dd9746e7.camel@nvidia.com/ +kci_test_macsec_vlan() +{ + msname="test_macsec1" + vlanname="test_vlan1" + local ret=0 + run_cmd_grep "^Usage: ip macsec" ip macsec help + if [ $? -ne 0 ]; then + end_test "SKIP: macsec: iproute2 too old" + return $ksft_skip + fi + run_cmd ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on + if [ $ret -ne 0 ];then + end_test "FAIL: can't add macsec interface, skipping test" + return 1 + fi + + run_cmd ip link set dev "$msname" up + ip link add link "$msname" name "$vlanname" type vlan id 1 + ip link set dev "$vlanname" address 00:11:22:33:44:88 + ip link set dev "$vlanname" up + run_cmd ip link del dev "$vlanname" + run_cmd ip link del dev "$msname" + + if [ $ret -ne 0 ];then + end_test "FAIL: macsec_vlan" + return 1 + fi + + end_test "PASS: macsec_vlan" +} + #------------------------------------------------------------------- # Example commands # ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \ @@ -673,6 +748,11 @@ kci_test_ipsec_offload() sysfsf=$sysfsd/ipsec sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ probed=false + esp4_offload_probed_default=false + + if lsmod | grep -q esp4_offload; then + esp4_offload_probed_default=true + fi if ! mount | grep -q debugfs; then mount -t debugfs none /sys/kernel/debug/ &> /dev/null @@ -766,6 +846,7 @@ EOF fi # clean up any leftovers + ! "$esp4_offload_probed_default" && lsmod | grep -q esp4_offload && rmmod esp4_offload echo 0 > /sys/bus/netdevsim/del_device $probed && rmmod netdevsim @@ -1080,6 +1161,60 @@ kci_test_neigh_get() end_test "PASS: neigh get" } +kci_test_neigh_update() +{ + dstip=10.0.2.4 + dstmac=de:ad:be:ef:13:37 + local ret=0 + + for proxy in "" "proxy" ; do + # add a neighbour entry without any flags + run_cmd ip neigh add $proxy $dstip dev "$devdummy" lladdr $dstmac nud permanent + run_cmd_grep $dstip ip neigh show $proxy + run_cmd_grep_fail "$dstip dev $devdummy .*\(managed\|use\|router\|extern\)" ip neigh show $proxy + + # set the extern_learn flag, but no other + run_cmd ip neigh change $proxy $dstip dev "$devdummy" extern_learn + run_cmd_grep "$dstip dev $devdummy .* extern_learn" ip neigh show $proxy + run_cmd_grep_fail "$dstip dev $devdummy .* \(managed\|use\|router\)" ip neigh show $proxy + + # flags are reset when not provided + run_cmd ip neigh change $proxy $dstip dev "$devdummy" + run_cmd_grep $dstip ip neigh show $proxy + run_cmd_grep_fail "$dstip dev $devdummy .* extern_learn" ip neigh show $proxy + + # add a protocol + run_cmd ip neigh change $proxy $dstip dev "$devdummy" protocol boot + run_cmd_grep "$dstip dev $devdummy .* proto boot" ip neigh show $proxy + + # protocol is retained when not provided + run_cmd ip neigh change $proxy $dstip dev "$devdummy" + run_cmd_grep "$dstip dev $devdummy .* proto boot" ip neigh show $proxy + + # change protocol + run_cmd ip neigh change $proxy $dstip dev "$devdummy" protocol static + run_cmd_grep "$dstip dev $devdummy .* proto static" ip neigh show $proxy + + # also check an extended flag for non-proxy neighs + if [ "$proxy" = "" ]; then + run_cmd ip neigh change $proxy $dstip dev "$devdummy" managed + run_cmd_grep "$dstip dev $devdummy managed" ip neigh show $proxy + + run_cmd ip neigh change $proxy $dstip dev "$devdummy" lladdr $dstmac + run_cmd_grep_fail "$dstip dev $devdummy managed" ip neigh show $proxy + fi + + run_cmd ip neigh del $proxy $dstip dev "$devdummy" + done + + if [ $ret -ne 0 ];then + end_test "FAIL: neigh update" + return 1 + fi + + end_test "PASS: neigh update" +} + kci_test_bridge_parent_id() { local ret=0 @@ -1148,6 +1283,12 @@ do_test_address_proto() local ret=0 local err + run_cmd_grep 'proto' ip address help + if [ $? -ne 0 ];then + end_test "SKIP: addr proto ${what}: iproute2 too old" + return $ksft_skip + fi + ip address add dev "$devdummy" "$addr3" check_err $? proto=$(address_get_proto "$addr3") @@ -1334,6 +1475,39 @@ kci_test_mngtmpaddr() return $ret } +kci_test_operstate() +{ + local ret=0 + + # Check that it is possible to set operational state during device + # creation and that it is preserved when the administrative state of + # the device is toggled. + run_cmd ip link add name vx0 up state up type vxlan id 10010 dstport 4789 + run_cmd_grep "state UP" ip link show dev vx0 + run_cmd ip link set dev vx0 down + run_cmd_grep "state DOWN" ip link show dev vx0 + run_cmd ip link set dev vx0 up + run_cmd_grep "state UP" ip link show dev vx0 + + run_cmd ip link del dev vx0 + + # Check that it is possible to set the operational state of the device + # after creation. + run_cmd ip link add name vx0 up type vxlan id 10010 dstport 4789 + run_cmd_grep "state UNKNOWN" ip link show dev vx0 + run_cmd ip link set dev vx0 state up + run_cmd_grep "state UP" ip link show dev vx0 + + run_cmd ip link del dev vx0 + + if [ "$ret" -ne 0 ]; then + end_test "FAIL: operstate" + return 1 + fi + + end_test "PASS: operstate" +} + kci_test_rtnl() { local current_test @@ -1367,6 +1541,8 @@ usage: ${0##*/} OPTS EOF } +require_command jq + #check for needed privileges if [ "$(id -u)" -ne 0 ];then end_test "SKIP: Need root privileges" diff --git a/tools/testing/selftests/net/rtnetlink_notification.sh b/tools/testing/selftests/net/rtnetlink_notification.sh new file mode 100755 index 000000000000..3f9780232bd6 --- /dev/null +++ b/tools/testing/selftests/net/rtnetlink_notification.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking rtnetlink notification callpaths, and get as much +# coverage as possible. +# +# set -e + +ALL_TESTS=" + kci_test_mcast_addr_notification + kci_test_anycast_addr_notification +" + +source lib.sh +test_dev="test-dummy1" + +kci_test_mcast_addr_notification() +{ + RET=0 + local tmpfile + local monitor_pid + local match_result + + tmpfile=$(mktemp) + defer rm "$tmpfile" + + ip monitor maddr > $tmpfile & + monitor_pid=$! + defer kill_process "$monitor_pid" + + sleep 1 + + if [ ! -e "/proc/$monitor_pid" ]; then + RET=$ksft_skip + log_test "mcast addr notification: iproute2 too old" + return $RET + fi + + ip link add name "$test_dev" type dummy + check_err $? "failed to add dummy interface" + ip link set "$test_dev" up + check_err $? "failed to set dummy interface up" + ip link del dev "$test_dev" + check_err $? "Failed to delete dummy interface" + sleep 1 + + # There should be 4 line matches as follows. + # 13: test-dummy1 inet6 mcast ff02::1 scope global + # 13: test-dummy1 inet mcast 224.0.0.1 scope global + # Deleted 13: test-dummy1 inet mcast 224.0.0.1 scope global + # Deleted 13: test-dummy1 inet6 mcast ff02::1 scope global + match_result=$(grep -cE "$test_dev.*(224.0.0.1|ff02::1)" "$tmpfile") + if [ "$match_result" -ne 4 ]; then + RET=$ksft_fail + fi + log_test "mcast addr notification: Expected 4 matches, got $match_result" + return $RET +} + +kci_test_anycast_addr_notification() +{ + RET=0 + local tmpfile + local monitor_pid + local match_result + + tmpfile=$(mktemp) + defer rm "$tmpfile" + + ip monitor acaddress > "$tmpfile" & + monitor_pid=$! + defer kill_process "$monitor_pid" + sleep 1 + + if [ ! -e "/proc/$monitor_pid" ]; then + RET=$ksft_skip + log_test "anycast addr notification: iproute2 too old" + return "$RET" + fi + + ip link add name "$test_dev" type dummy + check_err $? "failed to add dummy interface" + ip link set "$test_dev" up + check_err $? "failed to set dummy interface up" + sysctl -qw net.ipv6.conf."$test_dev".forwarding=1 + ip link del dev "$test_dev" + check_err $? "Failed to delete dummy interface" + sleep 1 + + # There should be 2 line matches as follows. + # 9: dummy2 inet6 any fe80:: scope global + # Deleted 9: dummy2 inet6 any fe80:: scope global + match_result=$(grep -cE "$test_dev.*(fe80::)" "$tmpfile") + if [ "$match_result" -ne 2 ]; then + RET=$ksft_fail + fi + log_test "anycast addr notification: Expected 2 matches, got $match_result" + return "$RET" +} + +#check for needed privileges +if [ "$(id -u)" -ne 0 ];then + RET=$ksft_skip + log_test "need root privileges" + exit $RET +fi + +require_command ip + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c index 16ac4df55fdb..b81ed0352d6c 100644 --- a/tools/testing/selftests/net/rxtimestamp.c +++ b/tools/testing/selftests/net/rxtimestamp.c @@ -18,7 +18,7 @@ #include <linux/net_tstamp.h> #include <linux/errqueue.h> -#include "../kselftest.h" +#include "kselftest.h" struct options { int so_timestamp; diff --git a/tools/testing/selftests/net/sctp_hello.c b/tools/testing/selftests/net/sctp_hello.c index f02f1f95d227..a04dac0b8027 100644 --- a/tools/testing/selftests/net/sctp_hello.c +++ b/tools/testing/selftests/net/sctp_hello.c @@ -29,7 +29,6 @@ static void set_addr(struct sockaddr_storage *ss, char *ip, char *port, int *len static int do_client(int argc, char *argv[]) { struct sockaddr_storage ss; - char buf[] = "hello"; int csk, ret, len; if (argc < 5) { @@ -56,16 +55,10 @@ static int do_client(int argc, char *argv[]) set_addr(&ss, argv[3], argv[4], &len); ret = connect(csk, (struct sockaddr *)&ss, len); - if (ret < 0) { - printf("failed to connect to peer\n"); + if (ret < 0) return -1; - } - ret = send(csk, buf, strlen(buf) + 1, 0); - if (ret < 0) { - printf("failed to send msg %d\n", ret); - return -1; - } + recv(csk, NULL, 0, 0); close(csk); return 0; @@ -75,7 +68,6 @@ int main(int argc, char *argv[]) { struct sockaddr_storage ss; int lsk, csk, ret, len; - char buf[20]; if (argc < 2 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { printf("%s server|client ...\n", argv[0]); @@ -125,11 +117,6 @@ int main(int argc, char *argv[]) return -1; } - ret = recv(csk, buf, sizeof(buf), 0); - if (ret <= 0) { - printf("failed to recv msg %d\n", ret); - return -1; - } close(csk); close(lsk); diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh index c854034b6aa1..667b211aa8a1 100755 --- a/tools/testing/selftests/net/sctp_vrf.sh +++ b/tools/testing/selftests/net/sctp_vrf.sh @@ -20,9 +20,9 @@ setup() { modprobe sctp_diag setup_ns CLIENT_NS1 CLIENT_NS2 SERVER_NS - ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null - ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null - ip net exec $SERVER_NS sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + ip net exec $CLIENT_NS1 sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip net exec $CLIENT_NS2 sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip net exec $SERVER_NS sysctl -wq net.ipv6.conf.default.accept_dad=0 ip -n $SERVER_NS link add veth1 type veth peer name veth1 netns $CLIENT_NS1 ip -n $SERVER_NS link add veth2 type veth peer name veth1 netns $CLIENT_NS2 @@ -62,17 +62,40 @@ setup() { } cleanup() { - ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + wait_client $CLIENT_NS1 + wait_client $CLIENT_NS2 + stop_server cleanup_ns $CLIENT_NS1 $CLIENT_NS2 $SERVER_NS } -wait_server() { +start_server() { local IFACE=$1 local CNT=0 - until ip netns exec $SERVER_NS ss -lS src $SERVER_IP:$SERVER_PORT | \ - grep LISTEN | grep "$IFACE" 2>&1 >/dev/null; do - [ $((CNT++)) = "20" ] && { RET=3; return $RET; } + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP $SERVER_PORT $IFACE & + disown + until ip netns exec $SERVER_NS ss -SlH | grep -q "$IFACE"; do + [ $((CNT++)) -eq 30 ] && { RET=3; return $RET; } + sleep 0.1 + done +} + +stop_server() { + local CNT=0 + + ip netns exec $SERVER_NS pkill sctp_hello + while ip netns exec $SERVER_NS ss -SaH | grep -q .; do + [ $((CNT++)) -eq 30 ] && break + sleep 0.1 + done +} + +wait_client() { + local CLIENT_NS=$1 + local CNT=0 + + while ip netns exec $CLIENT_NS ss -SaH | grep -q .; do + [ $((CNT++)) -eq 30 ] && break sleep 0.1 done } @@ -81,14 +104,12 @@ do_test() { local CLIENT_NS=$1 local IFACE=$2 - ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null - ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ - $SERVER_PORT $IFACE 2>&1 >/dev/null & - disown - wait_server $IFACE || return $RET + start_server $IFACE || return $RET timeout 3 ip netns exec $CLIENT_NS ./sctp_hello client $AF \ - $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT RET=$? + wait_client $CLIENT_NS + stop_server return $RET } @@ -96,25 +117,21 @@ do_testx() { local IFACE1=$1 local IFACE2=$2 - ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null - ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ - $SERVER_PORT $IFACE1 2>&1 >/dev/null & - disown - wait_server $IFACE1 || return $RET - ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ - $SERVER_PORT $IFACE2 2>&1 >/dev/null & - disown - wait_server $IFACE2 || return $RET + start_server $IFACE1 || return $RET + start_server $IFACE2 || return $RET timeout 3 ip netns exec $CLIENT_NS1 ./sctp_hello client $AF \ - $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null && \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT && \ timeout 3 ip netns exec $CLIENT_NS2 ./sctp_hello client $AF \ - $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT RET=$? + wait_client $CLIENT_NS1 + wait_client $CLIENT_NS2 + stop_server return $RET } testup() { - ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=1 2>&1 >/dev/null + ip netns exec $SERVER_NS sysctl -wq net.sctp.l3mdev_accept=1 echo -n "TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y " do_test $CLIENT_NS1 || { echo "[FAIL]"; return $RET; } echo "[PASS]" @@ -123,7 +140,7 @@ testup() { do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; } echo "[PASS]" - ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=0 2>&1 >/dev/null + ip netns exec $SERVER_NS sysctl -wq net.sctp.l3mdev_accept=0 echo -n "TEST 03: nobind, connect from client 1, l3mdev_accept=0, N " do_test $CLIENT_NS1 && { echo "[FAIL]"; return $RET; } echo "[PASS]" @@ -160,7 +177,7 @@ testup() { do_testx vrf-1 vrf-2 || { echo "[FAIL]"; return $RET; } echo "[PASS]" - echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N " + echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, Y " do_testx vrf-2 vrf-1 || { echo "[FAIL]"; return $RET; } echo "[PASS]" } diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh deleted file mode 100644 index 2070b57849de..000000000000 --- a/tools/testing/selftests/net/setup_loopback.sh +++ /dev/null @@ -1,120 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout" -readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs" -readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})" -readonly HARD_IRQS="$(< ${IRQ_PATH})" -readonly server_ns=$(mktemp -u server-XXXXXXXX) -readonly client_ns=$(mktemp -u client-XXXXXXXX) - -netdev_check_for_carrier() { - local -r dev="$1" - - for i in {1..5}; do - carrier="$(cat /sys/class/net/${dev}/carrier)" - if [[ "${carrier}" -ne 1 ]] ; then - echo "carrier not ready yet..." >&2 - sleep 1 - else - echo "carrier ready" >&2 - break - fi - done - echo "${carrier}" -} - -# Assumes that there is no existing ipvlan device on the physical device -setup_loopback_environment() { - local dev="$1" - - # Fail hard if cannot turn on loopback mode for current NIC - ethtool -K "${dev}" loopback on || exit 1 - sleep 1 - - # Check for the carrier - carrier=$(netdev_check_for_carrier ${dev}) - if [[ "${carrier}" -ne 1 ]] ; then - echo "setup_loopback_environment failed" - exit 1 - fi -} - -setup_macvlan_ns(){ - local -r link_dev="$1" - local -r ns_name="$2" - local -r ns_dev="$3" - local -r ns_mac="$4" - local -r addr="$5" - - ip link add link "${link_dev}" dev "${ns_dev}" \ - address "${ns_mac}" type macvlan - exit_code=$? - if [[ "${exit_code}" -ne 0 ]]; then - echo "setup_macvlan_ns failed" - exit $exit_code - fi - - [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - ip link set dev "${ns_dev}" netns "${ns_name}" - ip -netns "${ns_name}" link set dev "${ns_dev}" up - if [[ -n "${addr}" ]]; then - ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}" - fi - - sleep 1 -} - -cleanup_macvlan_ns(){ - while (( $# >= 2 )); do - ns_name="$1" - ns_dev="$2" - ip -netns "${ns_name}" link del dev "${ns_dev}" - ip netns del "${ns_name}" - shift 2 - done -} - -cleanup_loopback(){ - local -r dev="$1" - - ethtool -K "${dev}" loopback off - sleep 1 - - # Check for the carrier - carrier=$(netdev_check_for_carrier ${dev}) - if [[ "${carrier}" -ne 1 ]] ; then - echo "setup_loopback_environment failed" - exit 1 - fi -} - -setup_interrupt() { - # Use timer on host to trigger the network stack - # Also disable device interrupt to not depend on NIC interrupt - # Reduce test flakiness caused by unexpected interrupts - echo 100000 >"${FLUSH_PATH}" - echo 50 >"${IRQ_PATH}" -} - -setup_ns() { - # Set up server_ns namespace and client_ns namespace - setup_macvlan_ns "${dev}" ${server_ns} server "${SERVER_MAC}" - setup_macvlan_ns "${dev}" ${client_ns} client "${CLIENT_MAC}" -} - -cleanup_ns() { - cleanup_macvlan_ns ${server_ns} server ${client_ns} client -} - -setup() { - setup_loopback_environment "${dev}" - setup_interrupt -} - -cleanup() { - cleanup_loopback "${dev}" - - echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}" - echo "${HARD_IRQS}" >"${IRQ_PATH}" -} diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh deleted file mode 100644 index 1f78a87f6f37..000000000000 --- a/tools/testing/selftests/net/setup_veth.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -readonly server_ns=$(mktemp -u server-XXXXXXXX) -readonly client_ns=$(mktemp -u client-XXXXXXXX) - -setup_veth_ns() { - local -r link_dev="$1" - local -r ns_name="$2" - local -r ns_dev="$3" - local -r ns_mac="$4" - - [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" - ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535 - ip -netns "${ns_name}" link set dev "${ns_dev}" up - - ip netns exec "${ns_name}" ethtool -K "${ns_dev}" gro on tso off -} - -setup_ns() { - # Set up server_ns namespace and client_ns namespace - ip link add name server type veth peer name client - - setup_veth_ns "${dev}" ${server_ns} server "${SERVER_MAC}" - setup_veth_ns "${dev}" ${client_ns} client "${CLIENT_MAC}" -} - -cleanup_ns() { - local ns_name - - for ns_name in ${client_ns} ${server_ns}; do - [[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}" - done -} - -setup() { - # no global init setup step needed - : -} - -cleanup() { - cleanup_ns -} diff --git a/tools/testing/selftests/net/sk_so_peek_off.c b/tools/testing/selftests/net/sk_so_peek_off.c index d87dd8d8d491..2a3f5c604f52 100644 --- a/tools/testing/selftests/net/sk_so_peek_off.c +++ b/tools/testing/selftests/net/sk_so_peek_off.c @@ -8,7 +8,7 @@ #include <sys/types.h> #include <netinet/in.h> #include <arpa/inet.h> -#include "../kselftest.h" +#include "kselftest.h" static char *afstr(int af, int proto) { diff --git a/tools/testing/selftests/net/skf_net_off.c b/tools/testing/selftests/net/skf_net_off.c new file mode 100644 index 000000000000..1fdf61d6cd7f --- /dev/null +++ b/tools/testing/selftests/net/skf_net_off.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Open a tun device. + * + * [modifications: use IFF_NAPI_FRAGS, add sk filter] + * + * Expects the device to have been configured previously, e.g.: + * sudo ip tuntap add name tap1 mode tap + * sudo ip link set tap1 up + * sudo ip link set dev tap1 addr 02:00:00:00:00:01 + * sudo ip -6 addr add fdab::1 peer fdab::2 dev tap1 nodad + * + * And to avoid premature pskb_may_pull: + * + * sudo ethtool -K tap1 gro off + * sudo bash -c 'echo 0 > /proc/sys/net/ipv4/ip_early_demux' + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <getopt.h> +#include <linux/filter.h> +#include <linux/if.h> +#include <linux/if_packet.h> +#include <linux/if_tun.h> +#include <linux/ipv6.h> +#include <netinet/if_ether.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/udp.h> +#include <poll.h> +#include <signal.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/poll.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <unistd.h> + +static bool cfg_do_filter; +static bool cfg_do_frags; +static int cfg_dst_port = 8000; +static char *cfg_ifname; + +static int tun_open(const char *tun_name) +{ + struct ifreq ifr = {0}; + int fd, ret; + + fd = open("/dev/net/tun", O_RDWR); + if (fd == -1) + error(1, errno, "open /dev/net/tun"); + + ifr.ifr_flags = IFF_TAP; + if (cfg_do_frags) + ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS; + + strncpy(ifr.ifr_name, tun_name, IFNAMSIZ - 1); + + ret = ioctl(fd, TUNSETIFF, &ifr); + if (ret) + error(1, ret, "ioctl TUNSETIFF"); + + return fd; +} + +static void sk_set_filter(int fd) +{ + const int offset_proto = offsetof(struct ip6_hdr, ip6_nxt); + const int offset_dport = sizeof(struct ip6_hdr) + offsetof(struct udphdr, dest); + + /* Filter UDP packets with destination port cfg_dst_port */ + struct sock_filter filter_code[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_NET_OFF + offset_proto), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, SKF_NET_OFF + offset_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dst_port, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + + struct sock_fprog filter = { + sizeof(filter_code) / sizeof(filter_code[0]), + filter_code, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter))) + error(1, errno, "setsockopt attach filter"); +} + +static int raw_open(void) +{ + int fd; + + fd = socket(PF_INET6, SOCK_RAW, IPPROTO_UDP); + if (fd == -1) + error(1, errno, "socket raw (udp)"); + + if (cfg_do_filter) + sk_set_filter(fd); + + return fd; +} + +static void tun_write(int fd) +{ + const char eth_src[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x02 }; + const char eth_dst[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x01 }; + struct tun_pi pi = {0}; + struct ipv6hdr ip6h = {0}; + struct udphdr uh = {0}; + struct ethhdr eth = {0}; + uint32_t payload; + struct iovec iov[5]; + int ret; + + pi.proto = htons(ETH_P_IPV6); + + memcpy(eth.h_source, eth_src, sizeof(eth_src)); + memcpy(eth.h_dest, eth_dst, sizeof(eth_dst)); + eth.h_proto = htons(ETH_P_IPV6); + + ip6h.version = 6; + ip6h.payload_len = htons(sizeof(uh) + sizeof(uint32_t)); + ip6h.nexthdr = IPPROTO_UDP; + ip6h.hop_limit = 8; + if (inet_pton(AF_INET6, "fdab::2", &ip6h.saddr) != 1) + error(1, errno, "inet_pton src"); + if (inet_pton(AF_INET6, "fdab::1", &ip6h.daddr) != 1) + error(1, errno, "inet_pton src"); + + uh.source = htons(8000); + uh.dest = htons(cfg_dst_port); + uh.len = ip6h.payload_len; + uh.check = 0; + + payload = htonl(0xABABABAB); /* Covered in IPv6 length */ + + iov[0].iov_base = π + iov[0].iov_len = sizeof(pi); + iov[1].iov_base = ð + iov[1].iov_len = sizeof(eth); + iov[2].iov_base = &ip6h; + iov[2].iov_len = sizeof(ip6h); + iov[3].iov_base = &uh; + iov[3].iov_len = sizeof(uh); + iov[4].iov_base = &payload; + iov[4].iov_len = sizeof(payload); + + ret = writev(fd, iov, sizeof(iov) / sizeof(iov[0])); + if (ret <= 0) + error(1, errno, "writev"); +} + +static void raw_read(int fd) +{ + struct timeval tv = { .tv_usec = 100 * 1000 }; + struct msghdr msg = {0}; + struct iovec iov[2]; + struct udphdr uh; + uint32_t payload[2]; + int ret; + + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) + error(1, errno, "setsockopt rcvtimeo udp"); + + iov[0].iov_base = &uh; + iov[0].iov_len = sizeof(uh); + + iov[1].iov_base = payload; + iov[1].iov_len = sizeof(payload); + + msg.msg_iov = iov; + msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]); + + ret = recvmsg(fd, &msg, 0); + if (ret <= 0) + error(1, errno, "read raw"); + if (ret != sizeof(uh) + sizeof(payload[0])) + error(1, errno, "read raw: len=%d\n", ret); + + fprintf(stderr, "raw recv: 0x%x\n", payload[0]); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "fFi:")) != -1) { + switch (c) { + case 'f': + cfg_do_filter = true; + printf("bpf filter enabled\n"); + break; + case 'F': + cfg_do_frags = true; + printf("napi frags mode enabled\n"); + break; + case 'i': + cfg_ifname = optarg; + break; + default: + error(1, 0, "unknown option %c", optopt); + break; + } + } + + if (!cfg_ifname) + error(1, 0, "must specify tap interface name (-i)"); +} + +int main(int argc, char **argv) +{ + int fdt, fdr; + + parse_opts(argc, argv); + + fdr = raw_open(); + fdt = tun_open(cfg_ifname); + + tun_write(fdt); + raw_read(fdr); + + if (close(fdt)) + error(1, errno, "close tun"); + if (close(fdr)) + error(1, errno, "close udp"); + + fprintf(stderr, "OK\n"); + return 0; +} + diff --git a/tools/testing/selftests/net/skf_net_off.sh b/tools/testing/selftests/net/skf_net_off.sh new file mode 100755 index 000000000000..5da5066fb465 --- /dev/null +++ b/tools/testing/selftests/net/skf_net_off.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +readonly NS="ns-$(mktemp -u XXXXXX)" + +cleanup() { + ip netns del $NS +} + +ip netns add $NS +trap cleanup EXIT + +ip -netns $NS link set lo up +ip -netns $NS tuntap add name tap1 mode tap +ip -netns $NS link set tap1 up +ip -netns $NS link set dev tap1 addr 02:00:00:00:00:01 +ip -netns $NS -6 addr add fdab::1 peer fdab::2 dev tap1 nodad +ip netns exec $NS ethtool -K tap1 gro off + +# disable early demux, else udp_v6_early_demux pulls udp header into linear +ip netns exec $NS sysctl -w net.ipv4.ip_early_demux=0 + +echo "no filter" +ip netns exec $NS ./skf_net_off -i tap1 + +echo "filter, linear skb (-f)" +ip netns exec $NS ./skf_net_off -i tap1 -f + +echo "filter, fragmented skb (-f) (-F)" +ip netns exec $NS ./skf_net_off -i tap1 -f -F diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c index e9fa14e10732..4740701f1a9a 100644 --- a/tools/testing/selftests/net/so_incoming_cpu.c +++ b/tools/testing/selftests/net/so_incoming_cpu.c @@ -9,7 +9,7 @@ #include <sys/socket.h> #include <sys/sysinfo.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" FIXTURE(so_incoming_cpu) { diff --git a/tools/testing/selftests/net/so_rcv_listener.c b/tools/testing/selftests/net/so_rcv_listener.c new file mode 100644 index 000000000000..bc5841192aa6 --- /dev/null +++ b/tools/testing/selftests/net/so_rcv_listener.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <netdb.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <linux/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#ifndef SO_RCVPRIORITY +#define SO_RCVPRIORITY 82 +#endif + +struct options { + __u32 val; + int name; + int rcvname; + const char *host; + const char *service; +} opt; + +static void __attribute__((noreturn)) usage(const char *bin) +{ + printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin); + printf("Options:\n" + "\t\t-M val Test SO_RCVMARK\n" + "\t\t-P val Test SO_RCVPRIORITY\n" + ""); + exit(EXIT_FAILURE); +} + +static void parse_args(int argc, char *argv[]) +{ + int o; + + while ((o = getopt(argc, argv, "M:P:")) != -1) { + switch (o) { + case 'M': + opt.val = atoi(optarg); + opt.name = SO_MARK; + opt.rcvname = SO_RCVMARK; + break; + case 'P': + opt.val = atoi(optarg); + opt.name = SO_PRIORITY; + opt.rcvname = SO_RCVPRIORITY; + break; + default: + usage(argv[0]); + break; + } + } + + if (optind != argc - 2) + usage(argv[0]); + + opt.host = argv[optind]; + opt.service = argv[optind + 1]; +} + +int main(int argc, char *argv[]) +{ + int err = 0; + int recv_fd = -1; + int ret_value = 0; + __u32 recv_val; + struct cmsghdr *cmsg; + char cbuf[CMSG_SPACE(sizeof(__u32))]; + char recv_buf[CMSG_SPACE(sizeof(__u32))]; + struct iovec iov[1]; + struct msghdr msg; + struct sockaddr_in recv_addr4; + struct sockaddr_in6 recv_addr6; + + parse_args(argc, argv); + + int family = strchr(opt.host, ':') ? AF_INET6 : AF_INET; + + recv_fd = socket(family, SOCK_DGRAM, IPPROTO_UDP); + if (recv_fd < 0) { + perror("Can't open recv socket"); + ret_value = -errno; + goto cleanup; + } + + err = setsockopt(recv_fd, SOL_SOCKET, opt.rcvname, &opt.val, sizeof(opt.val)); + if (err < 0) { + perror("Recv setsockopt error"); + ret_value = -errno; + goto cleanup; + } + + if (family == AF_INET) { + memset(&recv_addr4, 0, sizeof(recv_addr4)); + recv_addr4.sin_family = family; + recv_addr4.sin_port = htons(atoi(opt.service)); + + if (inet_pton(family, opt.host, &recv_addr4.sin_addr) <= 0) { + perror("Invalid IPV4 address"); + ret_value = -errno; + goto cleanup; + } + + err = bind(recv_fd, (struct sockaddr *)&recv_addr4, sizeof(recv_addr4)); + } else { + memset(&recv_addr6, 0, sizeof(recv_addr6)); + recv_addr6.sin6_family = family; + recv_addr6.sin6_port = htons(atoi(opt.service)); + + if (inet_pton(family, opt.host, &recv_addr6.sin6_addr) <= 0) { + perror("Invalid IPV6 address"); + ret_value = -errno; + goto cleanup; + } + + err = bind(recv_fd, (struct sockaddr *)&recv_addr6, sizeof(recv_addr6)); + } + + if (err < 0) { + perror("Recv bind error"); + ret_value = -errno; + goto cleanup; + } + + iov[0].iov_base = recv_buf; + iov[0].iov_len = sizeof(recv_buf); + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + + err = recvmsg(recv_fd, &msg, 0); + if (err < 0) { + perror("Message receive error"); + ret_value = -errno; + goto cleanup; + } + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == opt.name) { + recv_val = *(__u32 *)CMSG_DATA(cmsg); + printf("Received value: %u\n", recv_val); + + if (recv_val != opt.val) { + fprintf(stderr, "Error: expected value: %u, got: %u\n", + opt.val, recv_val); + ret_value = -EINVAL; + } + goto cleanup; + } + } + + fprintf(stderr, "Error: No matching cmsg received\n"); + ret_value = -ENOMSG; + +cleanup: + if (recv_fd >= 0) + close(recv_fd); + + return ret_value; +} diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 8457b7ccbc09..b76df1efc2ef 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -174,7 +174,7 @@ static int do_recv_errqueue_timeout(int fdt) msg.msg_controllen = sizeof(control); while (1) { - const char *reason; + const char *reason = NULL; ret = recvmsg(fdt, &msg, MSG_ERRQUEUE); if (ret == -1 && errno == EAGAIN) diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c index db1aeb8c5d1e..9e270548dad8 100644 --- a/tools/testing/selftests/net/socket.c +++ b/tools/testing/selftests/net/socket.c @@ -7,7 +7,7 @@ #include <sys/socket.h> #include <netinet/in.h> -#include "../kselftest.h" +#include "kselftest.h" struct socket_testcase { int domain; @@ -39,6 +39,7 @@ static int run_tests(void) { char err_string1[ERR_STRING_SZ]; char err_string2[ERR_STRING_SZ]; + const char *msg1, *msg2; int i, err; err = 0; @@ -56,13 +57,13 @@ static int run_tests(void) errno == -s->expect) continue; - strerror_r(-s->expect, err_string1, ERR_STRING_SZ); - strerror_r(errno, err_string2, ERR_STRING_SZ); + msg1 = strerror_r(-s->expect, err_string1, ERR_STRING_SZ); + msg2 = strerror_r(errno, err_string2, ERR_STRING_SZ); fprintf(stderr, "socket(%d, %d, %d) expected " "err (%s) got (%s)\n", s->domain, s->type, s->protocol, - err_string1, err_string2); + msg1, msg2); err = -1; break; @@ -70,12 +71,12 @@ static int run_tests(void) close(fd); if (s->expect < 0) { - strerror_r(errno, err_string1, ERR_STRING_SZ); + msg1 = strerror_r(errno, err_string1, ERR_STRING_SZ); fprintf(stderr, "socket(%d, %d, %d) expected " "success got err (%s)\n", s->domain, s->type, s->protocol, - err_string1); + msg1); err = -1; break; diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh index 02d617040793..a5e959a080bb 100755 --- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh @@ -285,11 +285,6 @@ setup_hs() ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0 - ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh index 79fb81e63c59..a649dba3cb77 100755 --- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh @@ -250,11 +250,6 @@ setup_hs() eval local rtname=\${rt_${rid}} local rtveth=veth-t${tid} - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0 - ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0 diff --git a/tools/testing/selftests/net/srv6_end_flavors_test.sh b/tools/testing/selftests/net/srv6_end_flavors_test.sh index 50563443a4ad..318487eda671 100755 --- a/tools/testing/selftests/net/srv6_end_flavors_test.sh +++ b/tools/testing/selftests/net/srv6_end_flavors_test.sh @@ -399,7 +399,7 @@ __get_srv6_rtcfg_id() # Given the description of a router <id:op> as an input, the function returns # the <op> token which represents the operation (e.g. End behavior with or -# withouth flavors) configured for the node. +# without flavors) configured for the node. # Note that when the operation represents an End behavior with a list of # flavors, the output is the ordered version of that list. @@ -480,7 +480,7 @@ setup_rt_local_sids() # all SIDs start with a common locator. Routes and SRv6 Endpoint - # behavior instaces are grouped together in the 'localsid' table. + # behavior instances are grouped together in the 'localsid' table. ip -netns "${nsname}" -6 rule \ add to "${LOCATOR_SERVICE}::/16" \ lookup "${LOCALSID_TABLE_ID}" prio 999 diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh index 87e414cc417c..4bc135e5c22c 100755 --- a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh @@ -245,10 +245,8 @@ # that adopted in the use cases already examined (of course, it is necessary to # consider the different SIDs/C-SIDs). -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly DUMMY_DEVNAME="dum0" readonly VRF_TID=100 readonly VRF_DEVNAME="vrf-${VRF_TID}" @@ -376,32 +374,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -410,8 +394,7 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } create_host() @@ -420,28 +403,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, @@ -462,10 +429,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -497,7 +464,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -518,9 +485,6 @@ setup_rt_networking() ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -596,7 +560,7 @@ setup_rt_local_sids() local lcnode_func_prefix local lcblock_prefix - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -630,7 +594,7 @@ setup_rt_local_sids() dev "${DUMMY_DEVNAME}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behavior instaces are grouped together in the 'localsid' + # Endpoint behavior instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule \ add to "${VPN_LOCATOR_SERVICE}::/16" \ @@ -668,8 +632,8 @@ __setup_l3vpn() local rtsrc_nsname local rtdst_nsname - rtsrc_nsname="$(get_rtname "${src}")" - rtdst_nsname="$(get_rtname "${dst}")" + eval rtsrc_nsname=\${$(get_rtname "${src}")} + eval rtdst_nsname=\${$(get_rtname "${dst}")} container="${LCBLOCK_ADDR}" @@ -744,8 +708,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -791,11 +755,6 @@ setup_hs() ip netns exec "${rtname}" \ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 - ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" } @@ -880,7 +839,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -903,7 +862,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -915,7 +874,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 @@ -1025,7 +984,7 @@ rt_x_nextcsid_end_behavior_test() local nsname local ret - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} __nextcsid_end_behavior_test "${nsname}" "add" "${blen}" "${flen}" ret="$?" diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh index c79cb8ede17f..34b781a2ae74 100755 --- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh @@ -72,6 +72,9 @@ # Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in # the selftest network. # +# In addition, every router interface connecting rt-x to rt-y is assigned an +# IPv6 link-local address fe80::x:y/64. +# # Local SID/C-SID table # ===================== # @@ -287,10 +290,8 @@ # packet using the SRv6 End.DT46 behavior (associated with the SID fcff:1::d46) # and sends it to the host hs-1. -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly DUMMY_DEVNAME="dum0" readonly VRF_TID=100 readonly VRF_DEVNAME="vrf-${VRF_TID}" @@ -418,32 +419,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -452,15 +439,12 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" + setup_ns "${nsname}" - __create_namespace "${nsname}" - + eval nsname=\${$(get_rtname "${rtid}")} ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -470,29 +454,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, # the selftest is considered as "skipped". @@ -512,10 +479,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -547,7 +514,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -557,6 +524,9 @@ setup_rt_networking() ip -netns "${nsname}" addr \ add "${net_prefix}::${rt}/64" dev "${devname}" nodad + ip -netns "${nsname}" addr \ + add "fe80::${rt}:${neigh}/64" dev "${devname}" nodad + ip -netns "${nsname}" link set "${devname}" up done @@ -631,7 +601,7 @@ set_end_x_nextcsid() local rt="$1" local adj="$2" - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} net_prefix="$(get_network_prefix "${rt}" "${adj}")" lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")" @@ -645,12 +615,33 @@ set_end_x_nextcsid() nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}" } +set_end_x_ll_nextcsid() +{ + local rt="$1" + local adj="$2" + + eval nsname=\${$(get_rtname "${rt}")} + lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")" + nh6_ll_addr="fe80::${adj}:${rt}" + oifname="veth-rt-${rt}-${adj}" + + # enabled NEXT-C-SID SRv6 End.X behavior via an IPv6 link-local nexthop + # address (note that "dev" is the dummy dum0 device chosen for the sake + # of simplicity). + ip -netns "${nsname}" -6 route \ + replace "${lcnode_func_prefix}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.X nh6 "${nh6_ll_addr}" \ + oif "${oifname}" flavors next-csid lblen "${LCBLOCK_BLEN}" \ + nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}" +} + set_underlay_sids_reachability() { local rt="$1" local rt_neighs="$2" - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -685,12 +676,12 @@ setup_rt_local_sids() local lcnode_func_prefix local lcblock_prefix - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} set_underlay_sids_reachability "${rt}" "${rt_neighs}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behavior instaces are grouped together in the 'localsid' + # Endpoint behavior instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule \ add to "${VPN_LOCATOR_SERVICE}::/16" \ @@ -728,8 +719,8 @@ __setup_l3vpn() local rtsrc_nsname local rtdst_nsname - rtsrc_nsname="$(get_rtname "${src}")" - rtdst_nsname="$(get_rtname "${dst}")" + eval rtsrc_nsname=\${$(get_rtname "${src}")} + eval rtdst_nsname=\${$(get_rtname "${dst}")} container="${LCBLOCK_ADDR}" @@ -804,8 +795,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -851,11 +842,6 @@ setup_hs() ip netns exec "${rtname}" \ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 - ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" } @@ -947,7 +933,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -970,7 +956,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -982,7 +968,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 @@ -1057,6 +1043,27 @@ host_vpn_tests() check_and_log_hs_ipv4_connectivity 1 2 check_and_log_hs_ipv4_connectivity 2 1 + + # Setup the adjacencies in the SRv6 aware routers using IPv6 link-local + # addresses. + # - rt-3 SRv6 End.X adjacency with rt-4 + # - rt-4 SRv6 End.X adjacency with rt-1 + set_end_x_ll_nextcsid 3 4 + set_end_x_ll_nextcsid 4 1 + + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6), link-local" + + check_and_log_hs_ipv6_connectivity 1 2 + check_and_log_hs_ipv6_connectivity 2 1 + + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4), link-local" + + check_and_log_hs_ipv4_connectivity 1 2 + check_and_log_hs_ipv4_connectivity 2 1 + + # Restore the previous adjacencies. + set_end_x_nextcsid 3 4 + set_end_x_nextcsid 4 1 } __nextcsid_end_x_behavior_test() @@ -1093,7 +1100,7 @@ rt_x_nextcsid_end_x_behavior_test() local nsname local ret - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} __nextcsid_end_x_behavior_test "${nsname}" "add" "${blen}" "${flen}" ret="$?" diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh index 28a775654b92..cd7d061e21f8 100755 --- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh @@ -166,10 +166,8 @@ # hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d) # -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly VRF_TID=100 readonly VRF_DEVNAME="vrf-${VRF_TID}" readonly RT2HS_DEVNAME="veth-t${VRF_TID}" @@ -195,6 +193,8 @@ ret=${ksft_skip} nsuccess=0 nfail=0 +HAS_TUNSRC=false + log_test() { local rc="$1" @@ -248,32 +248,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -282,8 +268,7 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } create_host() @@ -292,29 +277,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, # the selftest is considered as "skipped". @@ -334,10 +302,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -369,7 +337,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -379,6 +347,17 @@ setup_rt_networking() ip -netns "${nsname}" addr \ add "${net_prefix}::${rt}/64" dev "${devname}" nodad + # A dedicated ::dead:<rt> address (with preferred_lft 0, i.e., + # deprecated) is added when there is support for tunsrc. Because + # it is deprecated, the kernel should never auto-select it as + # source with current config. Only an explicit tunsrc can place + # it in the outer header. + if $HAS_TUNSRC; then + ip -netns "${nsname}" addr \ + add "${net_prefix}::dead:${rt}/64" \ + dev "${devname}" nodad preferred_lft 0 + fi + ip -netns "${nsname}" link set "${devname}" up done @@ -387,9 +366,6 @@ setup_rt_networking() ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -403,7 +379,7 @@ setup_rt_local_sids() local nsname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -432,7 +408,7 @@ setup_rt_local_sids() dev "${VRF_DEVNAME}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behavior instaces are grouped together in the 'localsid' + # Endpoint behavior instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule \ add to "${VPN_LOCATOR_SERVICE}::/16" \ @@ -457,6 +433,7 @@ setup_rt_local_sids() # to the destination host) # $5 - encap mode (full or red) # $6 - traffic type (IPv6 or IPv4) +# $7 - force tunsrc (true or false) __setup_rt_policy() { local dst="$1" @@ -465,11 +442,47 @@ __setup_rt_policy() local dec_rt="$4" local mode="$5" local traffic="$6" + local with_tunsrc="$7" local nsname local policy='' + local tunsrc='' local n - nsname="$(get_rtname "${encap_rt}")" + # Verify the per-route tunnel source address ("tunsrc") feature. + # If it is not supported, fallback on encap config without tunsrc. + if $with_tunsrc && $HAS_TUNSRC; then + local net_prefix + local drule + local nxt + + eval nsname=\${$(get_rtname "${dec_rt}")} + + # Next SRv6 hop: first End router if any, or the decap router + [ -z "${end_rts}" ] && nxt="${dec_rt}" || nxt="${end_rts%% *}" + + # Use the right prefix for tunsrc depending on the next SRv6 hop + net_prefix="$(get_network_prefix "${encap_rt}" "${nxt}")" + tunsrc="tunsrc ${net_prefix}::dead:${encap_rt}" + + # To verify that the outer source address matches the one + # configured with tunsrc, the decap router discards packets + # with any other source address. + ip netns exec "${nsname}" ip6tables -t raw -I PREROUTING 1 \ + -s "${net_prefix}::dead:${encap_rt}" \ + -d "${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC}" \ + -j ACCEPT + + drule="PREROUTING \ + -d ${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC} \ + -j DROP" + + if ! ip netns exec "${nsname}" \ + ip6tables -t raw -C ${drule} &>/dev/null; then + ip netns exec "${nsname}" ip6tables -t raw -A ${drule} + fi + fi + + eval nsname=\${$(get_rtname "${encap_rt}")} for n in ${end_rts}; do policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," @@ -481,7 +494,7 @@ __setup_rt_policy() if [ "${traffic}" -eq 6 ]; then ip -netns "${nsname}" -6 route \ add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \ - encap seg6 mode "${mode}" segs "${policy}" \ + encap seg6 mode "${mode}" ${tunsrc} segs "${policy}" \ dev "${VRF_DEVNAME}" ip -netns "${nsname}" -6 neigh \ @@ -492,7 +505,7 @@ __setup_rt_policy() # received, otherwise the proxy arp does not work. ip -netns "${nsname}" -4 route \ add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \ - encap seg6 mode "${mode}" segs "${policy}" \ + encap seg6 mode "${mode}" ${tunsrc} segs "${policy}" \ dev "${VRF_DEVNAME}" fi } @@ -500,13 +513,13 @@ __setup_rt_policy() # see __setup_rt_policy setup_rt_policy_ipv6() { - __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 "$6" } #see __setup_rt_policy setup_rt_policy_ipv4() { - __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 "$6" } setup_hs() @@ -516,8 +529,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -555,11 +568,6 @@ setup_hs() ip netns exec "${rtname}" \ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 - ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" } @@ -609,41 +617,41 @@ setup() # the network path between hs-1 and hs-2 traverses several routers # depending on the direction of traffic. # - # Direction hs-1 -> hs-2 (H.Encaps.Red) + # Direction hs-1 -> hs-2 (H.Encaps.Red + tunsrc) # - rt-3,rt-4 (SRv6 End behaviors) # - rt-2 (SRv6 End.DT46 behavior) # # Direction hs-2 -> hs-1 (H.Encaps.Red) # - rt-1 (SRv6 End.DT46 behavior) - setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red - setup_rt_policy_ipv6 1 2 "" 1 encap.red + setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red true + setup_rt_policy_ipv6 1 2 "" 1 encap.red false # create an IPv4 VPN between hosts hs-1 and hs-2 # the network path between hs-1 and hs-2 traverses several routers # depending on the direction of traffic. # - # Direction hs-1 -> hs-2 (H.Encaps.Red) + # Direction hs-1 -> hs-2 (H.Encaps.Red + tunsrc) # - rt-2 (SRv6 End.DT46 behavior) # # Direction hs-2 -> hs-1 (H.Encaps.Red) # - rt-4,rt-3 (SRv6 End behaviors) # - rt-1 (SRv6 End.DT46 behavior) - setup_rt_policy_ipv4 2 1 "" 2 encap.red - setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red + setup_rt_policy_ipv4 2 1 "" 2 encap.red true + setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red false # create an IPv6 VPN between hosts hs-3 and hs-4 # the network path between hs-3 and hs-4 traverses several routers # depending on the direction of traffic. # - # Direction hs-3 -> hs-4 (H.Encaps.Red) + # Direction hs-3 -> hs-4 (H.Encaps.Red + tunsrc) # - rt-2 (SRv6 End Behavior) # - rt-4 (SRv6 End.DT46 behavior) # # Direction hs-4 -> hs-3 (H.Encaps.Red) # - rt-1 (SRv6 End behavior) # - rt-3 (SRv6 End.DT46 behavior) - setup_rt_policy_ipv6 4 3 "2" 4 encap.red - setup_rt_policy_ipv6 3 4 "1" 3 encap.red + setup_rt_policy_ipv6 4 3 "2" 4 encap.red true + setup_rt_policy_ipv6 3 4 "1" 3 encap.red false # testing environment was set up successfully SETUP_ERR=0 @@ -656,7 +664,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -679,7 +687,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -691,7 +699,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 @@ -851,6 +859,38 @@ test_vrf_or_ksft_skip() fi } +# Before enabling tunsrc tests, make sure tunsrc and ip6tables are supported. +check_tunsrc_support() +{ + setup_ns tunsrc_ns + + ip -netns "${tunsrc_ns}" link add veth0 type veth \ + peer name veth1 netns "${tunsrc_ns}" + + ip -netns "${tunsrc_ns}" link set veth0 up + + if ! ip -netns "${tunsrc_ns}" -6 route add fc00::dead:beef/128 \ + encap seg6 mode encap.red tunsrc fc00::1 segs fc00::2 \ + dev veth0 &>/dev/null; then + cleanup_ns "${tunsrc_ns}" + return + fi + + if ! ip -netns "${tunsrc_ns}" -6 route show | grep -q "tunsrc"; then + cleanup_ns "${tunsrc_ns}" + return + fi + + if ! ip netns exec "${tunsrc_ns}" ip6tables -t raw -A PREROUTING \ + -d fc00::dead:beef -j DROP &>/dev/null; then + cleanup_ns "${tunsrc_ns}" + return + fi + + cleanup_ns "${tunsrc_ns}" + HAS_TUNSRC=true +} + if [ "$(id -u)" -ne 0 ]; then echo "SKIP: Need root privileges" exit "${ksft_skip}" @@ -868,6 +908,7 @@ test_vrf_or_ksft_skip set -e trap cleanup EXIT +check_tunsrc_support setup set +e diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh index cb4177d41b21..0979b5316fdf 100755 --- a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh +++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh @@ -116,10 +116,8 @@ # hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b) # -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly DUMMY_DEVNAME="dum0" readonly RT2HS_DEVNAME="veth-hs" readonly HS_VETH_NAME="veth0" @@ -199,32 +197,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -233,8 +217,7 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } create_host() @@ -243,28 +226,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, @@ -285,10 +252,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -320,7 +287,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -341,9 +308,6 @@ setup_rt_networking() ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -357,7 +321,7 @@ setup_rt_local_sids() local nsname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -379,7 +343,7 @@ setup_rt_local_sids() encap seg6local action End dev "${DUMMY_DEVNAME}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behaviors instaces are grouped together in the 'localsid' + # Endpoint behaviors instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule add \ to "${VPN_LOCATOR_SERVICE}::/16" \ @@ -407,7 +371,7 @@ __setup_rt_policy() local policy='' local n - nsname="$(get_rtname "${encap_rt}")" + eval nsname=\${$(get_rtname "${encap_rt}")} for n in ${end_rts}; do policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," @@ -446,7 +410,7 @@ setup_decap() local rt="$1" local nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} # Local End.DX2 behavior ip -netns "${nsname}" -6 route \ @@ -463,8 +427,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -486,11 +450,6 @@ setup_hs() add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}" ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up - - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 } # set an auto-generated mac address @@ -508,7 +467,7 @@ set_mac_address() local ifname="$4" local nsname - nsname=$(get_nodename "${nodename}") + eval nsname=\${${nodename}} ip -netns "${nsname}" link set dev "${ifname}" down @@ -532,7 +491,7 @@ set_host_l2peer() local hssrc_name local ipaddr - hssrc_name="$(get_hsname "${hssrc}")" + eval hssrc_name=\${$(get_hsname "${hssrc}")} if [ "${proto}" -eq 6 ]; then ipaddr="${ipprefix}::${hsdst}" @@ -562,7 +521,7 @@ setup_l2vpn() local rtdst="${hsdst}" # set fixed mac for source node and the neigh MAC address - set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}" + set_mac_address "hs_${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}" set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6 set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4 @@ -570,7 +529,7 @@ setup_l2vpn() # to the mac address of the remote peer (L2 VPN destination host). # Otherwise, traffic coming from the source host is dropped at the # ingress router. - set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}" + set_mac_address "rt_${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}" # set the SRv6 Policies at the ingress router setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \ @@ -647,7 +606,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -670,7 +629,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -682,7 +641,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 diff --git a/tools/testing/selftests/net/srv6_iptunnel_cache.sh b/tools/testing/selftests/net/srv6_iptunnel_cache.sh new file mode 100755 index 000000000000..62638ab679d9 --- /dev/null +++ b/tools/testing/selftests/net/srv6_iptunnel_cache.sh @@ -0,0 +1,197 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> + +# This test verifies that the seg6 lwtunnel does not share the dst_cache +# between the input (forwarding) and output (locally generated) paths. +# +# A shared dst_cache allows a forwarded packet to populate the cache and a +# subsequent locally generated packet to silently reuse that entry, bypassing +# its own route lookup. To expose this, the SID is made reachable only for +# forwarded traffic (via an ip rule matching iif) and blackholed for everything +# else. A local ping on ns_router must always hit the blackhole; +# if it succeeds after a forwarded packet has populated the +# cache, the bug is confirmed. +# +# Both forwarded and local packets are pinned to the same CPU with taskset, +# since dst_cache is per-cpu. +# +# +# +--------------------+ +--------------------+ +# | ns_src | | ns_dst | +# | | | | +# | veth-s0 | | veth-d0 | +# | fd00::1/64 | | fd01::2/64 | +# +-------+------------+ +----------+---------+ +# | | +# | +--------------------+ | +# | | ns_router | | +# | | | | +# +------------+ veth-r0 veth-r1 +--------------+ +# | fd00::2 fd01::1 | +# +--------------------+ +# +# +# ns_router: encap (main table) +# +---------+---------------------------------------+ +# | dst | action | +# +---------+---------------------------------------+ +# | cafe::1 | encap seg6 mode encap segs fc00::100 | +# +---------+---------------------------------------+ +# +# ns_router: post-encap SID resolution +# +-------+------------+----------------------------+ +# | table | dst | action | +# +-------+------------+----------------------------+ +# | 100 | fc00::100 | via fd01::2 dev veth-r1 | +# +-------+------------+----------------------------+ +# | main | fc00::100 | blackhole | +# +-------+------------+----------------------------+ +# +# ns_router: ip rule +# +------------------+------------------------------+ +# | match | action | +# +------------------+------------------------------+ +# | iif veth-r0 | lookup 100 | +# +------------------+------------------------------+ +# +# ns_dst: SRv6 decap (main table) +# +--------------+----------------------------------+ +# | SID | action | +# +--------------+----------------------------------+ +# | fc00::100 | End.DT6 table 255 (local) | +# +--------------+----------------------------------+ + +source lib.sh + +readonly SID="fc00::100" +readonly DEST="cafe::1" + +readonly SRC_MAC="02:00:00:00:00:01" +readonly RTR_R0_MAC="02:00:00:00:00:02" +readonly RTR_R1_MAC="02:00:00:00:00:03" +readonly DST_MAC="02:00:00:00:00:04" + +cleanup() +{ + cleanup_ns "${NS_SRC}" "${NS_RTR}" "${NS_DST}" +} + +check_prerequisites() +{ + if ! command -v ip &>/dev/null; then + echo "SKIP: ip tool not found" + exit "${ksft_skip}" + fi + + if ! command -v ping &>/dev/null; then + echo "SKIP: ping not found" + exit "${ksft_skip}" + fi + + if ! command -v sysctl &>/dev/null; then + echo "SKIP: sysctl not found" + exit "${ksft_skip}" + fi + + if ! command -v taskset &>/dev/null; then + echo "SKIP: taskset not found" + exit "${ksft_skip}" + fi +} + +setup() +{ + setup_ns NS_SRC NS_RTR NS_DST + + ip link add veth-s0 netns "${NS_SRC}" type veth \ + peer name veth-r0 netns "${NS_RTR}" + ip link add veth-r1 netns "${NS_RTR}" type veth \ + peer name veth-d0 netns "${NS_DST}" + + ip -n "${NS_SRC}" link set veth-s0 address "${SRC_MAC}" + ip -n "${NS_RTR}" link set veth-r0 address "${RTR_R0_MAC}" + ip -n "${NS_RTR}" link set veth-r1 address "${RTR_R1_MAC}" + ip -n "${NS_DST}" link set veth-d0 address "${DST_MAC}" + + # ns_src + ip -n "${NS_SRC}" link set veth-s0 up + ip -n "${NS_SRC}" addr add fd00::1/64 dev veth-s0 nodad + ip -n "${NS_SRC}" -6 route add "${DEST}"/128 via fd00::2 + + # ns_router + ip -n "${NS_RTR}" link set veth-r0 up + ip -n "${NS_RTR}" addr add fd00::2/64 dev veth-r0 nodad + ip -n "${NS_RTR}" link set veth-r1 up + ip -n "${NS_RTR}" addr add fd01::1/64 dev veth-r1 nodad + ip netns exec "${NS_RTR}" sysctl -qw net.ipv6.conf.all.forwarding=1 + + ip -n "${NS_RTR}" -6 route add "${DEST}"/128 \ + encap seg6 mode encap segs "${SID}" dev veth-r0 + ip -n "${NS_RTR}" -6 route add "${SID}"/128 table 100 \ + via fd01::2 dev veth-r1 + ip -n "${NS_RTR}" -6 route add blackhole "${SID}"/128 + ip -n "${NS_RTR}" -6 rule add iif veth-r0 lookup 100 + + # ns_dst + ip -n "${NS_DST}" link set veth-d0 up + ip -n "${NS_DST}" addr add fd01::2/64 dev veth-d0 nodad + ip -n "${NS_DST}" addr add "${DEST}"/128 dev lo nodad + ip -n "${NS_DST}" -6 route add "${SID}"/128 \ + encap seg6local action End.DT6 table 255 dev veth-d0 + ip -n "${NS_DST}" -6 route add fd00::/64 via fd01::1 + + # static neighbors + ip -n "${NS_SRC}" -6 neigh add fd00::2 dev veth-s0 \ + lladdr "${RTR_R0_MAC}" nud permanent + ip -n "${NS_RTR}" -6 neigh add fd00::1 dev veth-r0 \ + lladdr "${SRC_MAC}" nud permanent + ip -n "${NS_RTR}" -6 neigh add fd01::2 dev veth-r1 \ + lladdr "${DST_MAC}" nud permanent + ip -n "${NS_DST}" -6 neigh add fd01::1 dev veth-d0 \ + lladdr "${RTR_R1_MAC}" nud permanent +} + +test_cache_isolation() +{ + RET=0 + + # local ping with empty cache: must fail (SID is blackholed) + if ip netns exec "${NS_RTR}" taskset -c 0 \ + ping -c 1 -W 2 "${DEST}" &>/dev/null; then + echo "SKIP: local ping succeeded, topology broken" + exit "${ksft_skip}" + fi + + # forward from ns_src to populate the input cache + if ! ip netns exec "${NS_SRC}" taskset -c 0 \ + ping -c 1 -W 2 "${DEST}" &>/dev/null; then + echo "SKIP: forwarded ping failed, topology broken" + exit "${ksft_skip}" + fi + + # local ping again: must still fail; if the output path reuses + # the input cache, it bypasses the blackhole and the ping succeeds + if ip netns exec "${NS_RTR}" taskset -c 0 \ + ping -c 1 -W 2 "${DEST}" &>/dev/null; then + echo "FAIL: output path used dst cached by input path" + RET="${ksft_fail}" + else + echo "PASS: output path dst_cache is independent" + fi + + return "${RET}" +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit "${ksft_skip}" +fi + +trap cleanup EXIT + +check_prerequisites +setup +test_cache_isolation +exit "${RET}" diff --git a/tools/testing/selftests/net/tap.c b/tools/testing/selftests/net/tap.c index 247c3b3ac1c9..a0c9418132c8 100644 --- a/tools/testing/selftests/net/tap.c +++ b/tools/testing/selftests/net/tap.c @@ -17,7 +17,7 @@ #include <linux/virtio_net.h> #include <netinet/ip.h> #include <netinet/udp.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" static const char param_dev_tap_name[] = "xmacvtap0"; static const char param_dev_dummy_name[] = "xdummy0"; @@ -56,18 +56,12 @@ static void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr) static struct rtattr *rtattr_add_str(struct nlmsghdr *nh, unsigned short type, const char *s) { - struct rtattr *rta = rtattr_add(nh, type, strlen(s)); + unsigned int strsz = strlen(s) + 1; + struct rtattr *rta; - memcpy(RTA_DATA(rta), s, strlen(s)); - return rta; -} - -static struct rtattr *rtattr_add_strsz(struct nlmsghdr *nh, unsigned short type, - const char *s) -{ - struct rtattr *rta = rtattr_add(nh, type, strlen(s) + 1); + rta = rtattr_add(nh, type, strsz); - strcpy(RTA_DATA(rta), s); + memcpy(RTA_DATA(rta), s, strsz); return rta; } @@ -119,7 +113,7 @@ static int dev_create(const char *dev, const char *link_type, link_info = rtattr_begin(&req.nh, IFLA_LINKINFO); - rtattr_add_strsz(&req.nh, IFLA_INFO_KIND, link_type); + rtattr_add_str(&req.nh, IFLA_INFO_KIND, link_type); if (fill_info_data) { info_data = rtattr_begin(&req.nh, IFLA_INFO_DATA); diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config index 3605e38711cb..971cb6fa2d63 100644 --- a/tools/testing/selftests/net/tcp_ao/config +++ b/tools/testing/selftests/net/tcp_ao/config @@ -1,8 +1,8 @@ CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_RMD160=y CONFIG_CRYPTO_SHA1=y -CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6=y +CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_NET_VRF=y CONFIG_TCP_AO=y diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c index d418162d335f..93b61e9a36f1 100644 --- a/tools/testing/selftests/net/tcp_ao/connect-deny.c +++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c @@ -4,6 +4,7 @@ #include "aolib.h" #define fault(type) (inj == FAULT_ ## type) +static volatile int sk_pair; static inline int test_add_key_maclen(int sk, const char *key, uint8_t maclen, union tcp_addr in_addr, uint8_t prefix, @@ -34,10 +35,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, const char *cnt_name, test_cnt cnt_expected, fault_t inj) { - struct tcp_ao_counters ao_cnt1, ao_cnt2; + struct tcp_counters cnt1, cnt2; uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */ + test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected; int lsk, err, sk = 0; - time_t timeout; lsk = test_listen_socket(this_ip_addr, port, 1); @@ -46,21 +47,24 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, if (cnt_name) before_cnt = netstat_get_one(cnt_name, NULL); - if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (pwd && test_get_tcp_counters(lsk, &cnt1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - err = test_wait_fd(lsk, timeout, 0); + err = test_skpair_wait_poll(lsk, 0, poll_cnt, &sk_pair); if (err == -ETIMEDOUT) { + sk_pair = err; if (!fault(TIMEOUT)) - test_fail("timed out for accept()"); + test_fail("%s: timed out for accept()", tst_name); + } else if (err == -EKEYREJECTED) { + if (!fault(KEYREJECT)) + test_fail("%s: key was rejected", tst_name); } else if (err < 0) { - test_error("test_wait_fd()"); + test_error("test_skpair_wait_poll()"); } else { if (fault(TIMEOUT)) - test_fail("ready to accept"); + test_fail("%s: ready to accept", tst_name); sk = accept(lsk, NULL, NULL); if (sk < 0) { @@ -72,13 +76,13 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, } synchronize_threads(); /* before counter checks */ - if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2)) - test_error("test_get_tcp_ao_counters()"); + if (pwd && test_get_tcp_counters(lsk, &cnt2)) + test_error("test_get_tcp_counters()"); close(lsk); if (pwd) - test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); if (!cnt_name) goto out; @@ -109,7 +113,7 @@ static void *server_fn(void *arg) try_accept("Non-AO server + AO client", port++, NULL, this_ip_dest, -1, 100, 100, 0, - "TCPAOKeyNotFound", 0, FAULT_TIMEOUT); + "TCPAOKeyNotFound", TEST_CNT_NS_KEY_NOT_FOUND, FAULT_TIMEOUT); try_accept("AO server + Non-AO client", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, @@ -135,8 +139,9 @@ static void *server_fn(void *arg) wrong_addr, -1, 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT); + /* Key rejected by the other side, failing short through skpair */ try_accept("Client: Wrong addr", port++, NULL, - this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_TIMEOUT); + this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_KEYREJECT); try_accept("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 200, 100, 0, @@ -163,8 +168,7 @@ static void try_connect(const char *tst_name, unsigned int port, uint8_t sndid, uint8_t rcvid, test_cnt cnt_expected, fault_t inj) { - struct tcp_ao_counters ao_cnt1, ao_cnt2; - time_t timeout; + struct tcp_counters cnt1, cnt2; int sk, ret; sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); @@ -174,16 +178,15 @@ static void try_connect(const char *tst_name, unsigned int port, if (pwd && test_add_key(sk, pwd, addr, prefix, sndid, rcvid)) test_error("setsockopt(TCP_AO_ADD_KEY)"); - if (pwd && test_get_tcp_ao_counters(sk, &ao_cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (pwd && test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - ret = _test_connect_socket(sk, this_ip_dest, port, timeout); - + ret = test_skpair_connect_poll(sk, this_ip_dest, port, cnt_expected, &sk_pair); synchronize_threads(); /* before counter checks */ if (ret < 0) { + sk_pair = ret; if (fault(KEYREJECT) && ret == -EKEYREJECTED) { test_ok("%s: connect() was prevented", tst_name); } else if (ret == -ETIMEDOUT && fault(TIMEOUT)) { @@ -202,9 +205,11 @@ static void try_connect(const char *tst_name, unsigned int port, else test_ok("%s: connected", tst_name); if (pwd && ret > 0) { - if (test_get_tcp_ao_counters(sk, &ao_cnt2)) - test_error("test_get_tcp_ao_counters()"); - test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); + } else if (pwd) { + test_tcp_counters_free(&cnt1); } out: synchronize_threads(); /* close() */ @@ -241,6 +246,11 @@ static void *client_fn(void *arg) try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + /* + * XXX: The test doesn't increase any counters, see tcp_make_synack(). + * Potentially, it can be speed up by setting sk_pair = -ETIMEDOUT + * but the price would be increased complexity of the tracer thread. + */ trace_ao_event_sk_expect(TCP_AO_SYNACK_NO_KEY, this_ip_dest, addr_any, port, 0, 100, 100); try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD, diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c index f1d8d29e393f..340f00e979ea 100644 --- a/tools/testing/selftests/net/tcp_ao/connect.c +++ b/tools/testing/selftests/net/tcp_ao/connect.c @@ -35,7 +35,7 @@ static void *client_fn(void *arg) uint64_t before_aogood, after_aogood; const size_t nr_packets = 20; struct netstat *ns_before, *ns_after; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters ao1, ao2; if (sk < 0) test_error("socket()"); @@ -50,18 +50,18 @@ static void *client_fn(void *arg) ns_before = netstat_read(); before_aogood = netstat_get(ns_before, "TCPAOGood", NULL); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &ao1)) + test_error("test_get_tcp_counters()"); - if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, 100, nr_packets)) { test_fail("verify failed"); return NULL; } ns_after = netstat_read(); after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &ao2)) + test_error("test_get_tcp_counters()"); netstat_print_diff(ns_before, ns_after); netstat_free(ns_before); netstat_free(ns_after); @@ -71,14 +71,14 @@ static void *client_fn(void *arg) nr_packets, after_aogood, before_aogood); return NULL; } - if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD)) + if (test_assert_counters("connect", &ao1, &ao2, TEST_CNT_GOOD)) return NULL; test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %zu", - before_aogood, ao1.ao_info_pkt_good, - ao1.key_cnts[0].pkt_good, - after_aogood, ao2.ao_info_pkt_good, - ao2.key_cnts[0].pkt_good, + before_aogood, ao1.ao.ao_info_pkt_good, + ao1.ao.key_cnts[0].pkt_good, + after_aogood, ao2.ao.ao_info_pkt_good, + ao2.ao.key_cnts[0].pkt_good, nr_packets); return NULL; } diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c index a1614f0d8c44..85c1a1e958c6 100644 --- a/tools/testing/selftests/net/tcp_ao/icmps-discard.c +++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c @@ -53,7 +53,7 @@ static void serve_interfered(int sk) ssize_t test_quota = packet_size * packets_nr * 10; uint64_t dest_unreach_a, dest_unreach_b; uint64_t icmp_ignored_a, icmp_ignored_b; - struct tcp_ao_counters ao_cnt1, ao_cnt2; + struct tcp_counters cnt1, cnt2; bool counter_not_found; struct netstat *ns_after, *ns_before; ssize_t bytes; @@ -61,16 +61,16 @@ static void serve_interfered(int sk) ns_before = netstat_read(); dest_unreach_a = netstat_get(ns_before, dst_unreach, NULL); icmp_ignored_a = netstat_get(ns_before, tcpao_icmps, NULL); - if (test_get_tcp_ao_counters(sk, &ao_cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); bytes = test_server_run(sk, test_quota, 0); ns_after = netstat_read(); netstat_print_diff(ns_before, ns_after); dest_unreach_b = netstat_get(ns_after, dst_unreach, NULL); icmp_ignored_b = netstat_get(ns_after, tcpao_icmps, &counter_not_found); - if (test_get_tcp_ao_counters(sk, &ao_cnt2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); netstat_free(ns_before); netstat_free(ns_after); @@ -91,9 +91,9 @@ static void serve_interfered(int sk) return; } #ifdef TEST_ICMPS_ACCEPT - test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD); + test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD); #else - test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP); + test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP); #endif if (icmp_ignored_a >= icmp_ignored_b) { test_icmps_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64, @@ -395,7 +395,6 @@ static void icmp_interfere(const size_t nr, uint32_t rcv_nxt, void *src, void *d static void send_interfered(int sk) { - const unsigned int timeout = TEST_TIMEOUT_SEC; struct sockaddr_in6 src, dst; socklen_t addr_sz; @@ -409,7 +408,7 @@ static void send_interfered(int sk) while (1) { uint32_t rcv_nxt; - if (test_client_verify(sk, packet_size, packets_nr, timeout)) { + if (test_client_verify(sk, packet_size, packets_nr)) { test_fail("client: connection is broken"); return; } diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c index d4385b52c10b..69d9a7a05d5c 100644 --- a/tools/testing/selftests/net/tcp_ao/key-management.c +++ b/tools/testing/selftests/net/tcp_ao/key-management.c @@ -629,11 +629,11 @@ static int key_collection_socket(bool server, unsigned int port) } static void verify_counters(const char *tst_name, bool is_listen_sk, bool server, - struct tcp_ao_counters *a, struct tcp_ao_counters *b) + struct tcp_counters *a, struct tcp_counters *b) { unsigned int i; - __test_tcp_ao_counters_cmp(tst_name, a, b, TEST_CNT_GOOD); + test_assert_counters_sk(tst_name, a, b, TEST_CNT_GOOD); for (i = 0; i < collection.nr_keys; i++) { struct test_key *key = &collection.keys[i]; @@ -652,12 +652,12 @@ static void verify_counters(const char *tst_name, bool is_listen_sk, bool server rx_cnt_expected = key->used_on_server_tx; } - test_tcp_ao_key_counters_cmp(tst_name, a, b, - rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0, - sndid, rcvid); + test_assert_counters_key(tst_name, &a->ao, &b->ao, + rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0, + sndid, rcvid); } - test_tcp_ao_counters_free(a); - test_tcp_ao_counters_free(b); + test_tcp_counters_free(a); + test_tcp_counters_free(b); test_ok("%s: passed counters checks", tst_name); } @@ -791,17 +791,17 @@ out: } static int start_server(const char *tst_name, unsigned int port, size_t quota, - struct tcp_ao_counters *begin, + struct tcp_counters *begin, unsigned int current_index, unsigned int rnext_index) { - struct tcp_ao_counters lsk_c1, lsk_c2; + struct tcp_counters lsk_c1, lsk_c2; ssize_t bytes; int sk, lsk; synchronize_threads(); /* 1: key collection initialized */ lsk = key_collection_socket(true, port); - if (test_get_tcp_ao_counters(lsk, &lsk_c1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(lsk, &lsk_c1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 2: MKTs added => connect() */ if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) test_error("test_wait_fd()"); @@ -809,12 +809,12 @@ static int start_server(const char *tst_name, unsigned int port, size_t quota, sk = accept(lsk, NULL, NULL); if (sk < 0) test_error("accept()"); - if (test_get_tcp_ao_counters(sk, begin)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, begin)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 3: accepted => send data */ - if (test_get_tcp_ao_counters(lsk, &lsk_c2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(lsk, &lsk_c2)) + test_error("test_get_tcp_counters()"); verify_keys(tst_name, lsk, true, true); close(lsk); @@ -830,12 +830,12 @@ static int start_server(const char *tst_name, unsigned int port, size_t quota, } static void end_server(const char *tst_name, int sk, - struct tcp_ao_counters *begin) + struct tcp_counters *begin) { - struct tcp_ao_counters end; + struct tcp_counters end; - if (test_get_tcp_ao_counters(sk, &end)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &end)) + test_error("test_get_tcp_counters()"); verify_keys(tst_name, sk, false, true); synchronize_threads(); /* 4: verified => closed */ @@ -848,7 +848,7 @@ static void end_server(const char *tst_name, int sk, static void try_server_run(const char *tst_name, unsigned int port, size_t quota, unsigned int current_index, unsigned int rnext_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; int sk; sk = start_server(tst_name, port, quota, &tmp, @@ -860,7 +860,7 @@ static void server_rotations(const char *tst_name, unsigned int port, size_t quota, unsigned int rotations, unsigned int current_index, unsigned int rnext_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; unsigned int i; int sk; @@ -886,7 +886,7 @@ static void server_rotations(const char *tst_name, unsigned int port, static int run_client(const char *tst_name, unsigned int port, unsigned int nr_keys, int current_index, int rnext_index, - struct tcp_ao_counters *before, + struct tcp_counters *before, const size_t msg_sz, const size_t msg_nr) { int sk; @@ -904,8 +904,8 @@ static int run_client(const char *tst_name, unsigned int port, if (test_set_key(sk, sndid, rcvid)) test_error("failed to set current/rnext keys"); } - if (before && test_get_tcp_ao_counters(sk, before)) - test_error("test_get_tcp_ao_counters()"); + if (before && test_get_tcp_counters(sk, before)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 2: MKTs added => connect() */ if (test_connect_socket(sk, this_ip_dest, port++) <= 0) @@ -918,11 +918,11 @@ static int run_client(const char *tst_name, unsigned int port, collection.keys[rnext_index].used_on_server_tx = 1; synchronize_threads(); /* 3: accepted => send data */ - if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, msg_sz, msg_nr)) { test_fail("verify failed"); close(sk); if (before) - test_tcp_ao_counters_free(before); + test_tcp_counters_free(before); return -1; } @@ -931,7 +931,7 @@ static int run_client(const char *tst_name, unsigned int port, static int start_client(const char *tst_name, unsigned int port, unsigned int nr_keys, int current_index, int rnext_index, - struct tcp_ao_counters *before, + struct tcp_counters *before, const size_t msg_sz, const size_t msg_nr) { if (init_default_key_collection(nr_keys, true)) @@ -943,9 +943,9 @@ static int start_client(const char *tst_name, unsigned int port, static void end_client(const char *tst_name, int sk, unsigned int nr_keys, int current_index, int rnext_index, - struct tcp_ao_counters *start) + struct tcp_counters *start) { - struct tcp_ao_counters end; + struct tcp_counters end; /* Some application may become dependent on this kernel choice */ if (current_index < 0) @@ -955,8 +955,8 @@ static void end_client(const char *tst_name, int sk, unsigned int nr_keys, verify_current_rnext(tst_name, sk, collection.keys[current_index].client_keyid, collection.keys[rnext_index].server_keyid); - if (start && test_get_tcp_ao_counters(sk, &end)) - test_error("test_get_tcp_ao_counters()"); + if (start && test_get_tcp_counters(sk, &end)) + test_error("test_get_tcp_counters()"); verify_keys(tst_name, sk, false, false); synchronize_threads(); /* 4: verify => closed */ close(sk); @@ -1016,7 +1016,7 @@ static void try_unmatched_keys(int sk, int *rnext_index, unsigned int port) trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_addr, this_ip_dest, -1, port, 0, -1, -1, -1, -1, -1, -1, key->server_keyid, -1); - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk, msg_len, nr_packets)) test_fail("verify failed"); *rnext_index = i; } @@ -1048,7 +1048,7 @@ static void check_current_back(const char *tst_name, unsigned int port, unsigned int current_index, unsigned int rnext_index, unsigned int rotate_to_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; int sk; sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, @@ -1061,7 +1061,7 @@ static void check_current_back(const char *tst_name, unsigned int port, port, -1, 0, -1, -1, -1, -1, -1, collection.keys[rotate_to_index].client_keyid, collection.keys[current_index].client_keyid, -1); - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk, msg_len, nr_packets)) test_fail("verify failed"); /* There is a race here: between setting the current_key with * setsockopt(TCP_AO_INFO) and starting to send some data - there @@ -1081,7 +1081,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port, unsigned int nr_keys, unsigned int rotations, unsigned int current_index, unsigned int rnext_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; unsigned int i; int sk; @@ -1099,10 +1099,10 @@ static void roll_over_keys(const char *tst_name, unsigned int port, collection.keys[i].server_keyid, -1); if (test_set_key(sk, -1, collection.keys[i].server_keyid)) test_error("Can't change the Rnext key"); - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, msg_len, nr_packets)) { test_fail("verify failed"); close(sk); - test_tcp_ao_counters_free(&tmp); + test_tcp_counters_free(&tmp); return; } verify_current_rnext(tst_name, sk, -1, @@ -1116,7 +1116,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port, static void try_client_run(const char *tst_name, unsigned int port, unsigned int nr_keys, int current_index, int rnext_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; int sk; sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h index 5db2f65cddc4..ebb2899c12fe 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/aolib.h +++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h @@ -289,7 +289,7 @@ extern int link_set_up(const char *intf); extern const unsigned int test_server_port; extern int test_wait_fd(int sk, time_t sec, bool write); extern int __test_connect_socket(int sk, const char *device, - void *addr, size_t addr_sz, time_t timeout); + void *addr, size_t addr_sz, bool async); extern int __test_listen_socket(int backlog, void *addr, size_t addr_sz); static inline int test_listen_socket(const union tcp_addr taddr, @@ -331,25 +331,26 @@ static inline int test_listen_socket(const union tcp_addr taddr, * If set to 0 - kernel will try to retransmit SYN number of times, set in * /proc/sys/net/ipv4/tcp_syn_retries * By default set to 1 to make tests pass faster on non-busy machine. + * [in process of removal, don't use in new tests] */ #ifndef TEST_RETRANSMIT_SEC #define TEST_RETRANSMIT_SEC 1 #endif static inline int _test_connect_socket(int sk, const union tcp_addr taddr, - unsigned int port, time_t timeout) + unsigned int port, bool async) { sockaddr_af addr; tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port)); return __test_connect_socket(sk, veth_name, - (void *)&addr, sizeof(addr), timeout); + (void *)&addr, sizeof(addr), async); } static inline int test_connect_socket(int sk, const union tcp_addr taddr, unsigned int port) { - return _test_connect_socket(sk, taddr, port, TEST_TIMEOUT_SEC); + return _test_connect_socket(sk, taddr, port, false); } extern int __test_set_md5(int sk, void *addr, size_t addr_sz, @@ -483,10 +484,7 @@ static inline int test_set_ao_flags(int sk, bool ao_required, bool accept_icmps) } extern ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec); -extern ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, - const size_t msg_len, time_t timeout_sec); -extern int test_client_verify(int sk, const size_t msg_len, const size_t nr, - time_t timeout_sec); +extern int test_client_verify(int sk, const size_t msg_len, const size_t nr); struct tcp_ao_key_counters { uint8_t sndid; @@ -512,7 +510,15 @@ struct tcp_ao_counters { size_t nr_keys; struct tcp_ao_key_counters *key_cnts; }; -extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out); + +struct tcp_counters { + struct tcp_ao_counters ao; + uint64_t netns_md5_notfound; + uint64_t netns_md5_unexpected; + uint64_t netns_md5_failure; +}; + +extern int test_get_tcp_counters(int sk, struct tcp_counters *out); #define TEST_CNT_KEY_GOOD BIT(0) #define TEST_CNT_KEY_BAD BIT(1) @@ -526,8 +532,31 @@ extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out); #define TEST_CNT_NS_KEY_NOT_FOUND BIT(9) #define TEST_CNT_NS_AO_REQUIRED BIT(10) #define TEST_CNT_NS_DROPPED_ICMP BIT(11) +#define TEST_CNT_NS_MD5_NOT_FOUND BIT(12) +#define TEST_CNT_NS_MD5_UNEXPECTED BIT(13) +#define TEST_CNT_NS_MD5_FAILURE BIT(14) typedef uint16_t test_cnt; +#define _for_each_counter(f) \ +do { \ + /* per-netns */ \ + f(ao.netns_ao_good, TEST_CNT_NS_GOOD); \ + f(ao.netns_ao_bad, TEST_CNT_NS_BAD); \ + f(ao.netns_ao_key_not_found, TEST_CNT_NS_KEY_NOT_FOUND); \ + f(ao.netns_ao_required, TEST_CNT_NS_AO_REQUIRED); \ + f(ao.netns_ao_dropped_icmp, TEST_CNT_NS_DROPPED_ICMP); \ + /* per-socket */ \ + f(ao.ao_info_pkt_good, TEST_CNT_SOCK_GOOD); \ + f(ao.ao_info_pkt_bad, TEST_CNT_SOCK_BAD); \ + f(ao.ao_info_pkt_key_not_found, TEST_CNT_SOCK_KEY_NOT_FOUND); \ + f(ao.ao_info_pkt_ao_required, TEST_CNT_SOCK_AO_REQUIRED); \ + f(ao.ao_info_pkt_dropped_icmp, TEST_CNT_SOCK_DROPPED_ICMP); \ + /* non-AO */ \ + f(netns_md5_notfound, TEST_CNT_NS_MD5_NOT_FOUND); \ + f(netns_md5_unexpected, TEST_CNT_NS_MD5_UNEXPECTED); \ + f(netns_md5_failure, TEST_CNT_NS_MD5_FAILURE); \ +} while (0) + #define TEST_CNT_AO_GOOD (TEST_CNT_SOCK_GOOD | TEST_CNT_NS_GOOD) #define TEST_CNT_AO_BAD (TEST_CNT_SOCK_BAD | TEST_CNT_NS_BAD) #define TEST_CNT_AO_KEY_NOT_FOUND (TEST_CNT_SOCK_KEY_NOT_FOUND | \ @@ -539,34 +568,71 @@ typedef uint16_t test_cnt; #define TEST_CNT_GOOD (TEST_CNT_KEY_GOOD | TEST_CNT_AO_GOOD) #define TEST_CNT_BAD (TEST_CNT_KEY_BAD | TEST_CNT_AO_BAD) -extern int __test_tcp_ao_counters_cmp(const char *tst_name, - struct tcp_ao_counters *before, struct tcp_ao_counters *after, +extern test_cnt test_cmp_counters(struct tcp_counters *before, + struct tcp_counters *after); +extern int test_assert_counters_sk(const char *tst_name, + struct tcp_counters *before, struct tcp_counters *after, test_cnt expected); -extern int test_tcp_ao_key_counters_cmp(const char *tst_name, +extern int test_assert_counters_key(const char *tst_name, struct tcp_ao_counters *before, struct tcp_ao_counters *after, test_cnt expected, int sndid, int rcvid); -extern void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts); +extern void test_tcp_counters_free(struct tcp_counters *cnts); + +/* + * Polling for netns and socket counters during select()/connect() and also + * client/server messaging. Instead of constant timeout on underlying select(), + * check the counters and return early. This allows to pass the tests where + * timeout is expected without waiting for that fixing timeout (tests speed-up). + * Previously shorter timeouts were used for tests expecting to time out, + * but that leaded to sporadic false positives on counter checks failures, + * as one second timeouts aren't enough for TCP retransmit. + * + * Two sides of the socketpair (client/server) should synchronize failures + * using a shared variable *err, so that they can detect the other side's + * failure. + */ +extern int test_skpair_wait_poll(int sk, bool write, test_cnt cond, + volatile int *err); +extern int _test_skpair_connect_poll(int sk, const char *device, + void *addr, size_t addr_sz, + test_cnt cond, volatile int *err); +static inline int test_skpair_connect_poll(int sk, const union tcp_addr taddr, + unsigned int port, + test_cnt cond, volatile int *err) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port)); + return _test_skpair_connect_poll(sk, veth_name, + (void *)&addr, sizeof(addr), cond, err); +} + +extern int test_skpair_client(int sk, const size_t msg_len, const size_t nr, + test_cnt cond, volatile int *err); +extern int test_skpair_server(int sk, ssize_t quota, + test_cnt cond, volatile int *err); + /* - * Frees buffers allocated in test_get_tcp_ao_counters(). + * Frees buffers allocated in test_get_tcp_counters(). * The function doesn't expect new keys or keys removed between calls - * to test_get_tcp_ao_counters(). Check key counters manually if they + * to test_get_tcp_counters(). Check key counters manually if they * may change. */ -static inline int test_tcp_ao_counters_cmp(const char *tst_name, - struct tcp_ao_counters *before, - struct tcp_ao_counters *after, - test_cnt expected) +static inline int test_assert_counters(const char *tst_name, + struct tcp_counters *before, + struct tcp_counters *after, + test_cnt expected) { int ret; - ret = __test_tcp_ao_counters_cmp(tst_name, before, after, expected); + ret = test_assert_counters_sk(tst_name, before, after, expected); if (ret) goto out; - ret = test_tcp_ao_key_counters_cmp(tst_name, before, after, - expected, -1, -1); + ret = test_assert_counters_key(tst_name, &before->ao, &after->ao, + expected, -1, -1); out: - test_tcp_ao_counters_free(before); - test_tcp_ao_counters_free(after); + test_tcp_counters_free(before); + test_tcp_counters_free(after); return ret; } diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c index 24380c68fec6..27403f875054 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c +++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c @@ -427,11 +427,8 @@ static void dump_trace_event(struct expected_trace_point *e) test_print("trace event filter %s [%s:%d => %s:%d, L3index %d, flags: %s%s%s%s%s, keyid: %d, rnext: %d, maclen: %d, sne: %d] = %zu", trace_event_names[e->type], src, e->src_port, dst, e->dst_port, e->L3index, - (e->fin > 0) ? "F" : (e->fin == 0) ? "!F" : "", - (e->syn > 0) ? "S" : (e->syn == 0) ? "!S" : "", - (e->rst > 0) ? "R" : (e->rst == 0) ? "!R" : "", - (e->psh > 0) ? "P" : (e->psh == 0) ? "!P" : "", - (e->ack > 0) ? "." : (e->ack == 0) ? "!." : "", + e->fin ? "F" : "", e->syn ? "S" : "", e->rst ? "R" : "", + e->psh ? "P" : "", e->ack ? "." : "", e->keyid, e->rnext, e->maclen, e->sne, e->matched); } diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c index a27cc03c9fbd..49aec2922a31 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/setup.c +++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c @@ -9,7 +9,7 @@ * Can't be included in the header: it defines static variables which * will be unique to every object. Let's include it only once here. */ -#include "../../../kselftest.h" +#include "kselftest.h" /* Prevent overriding of one thread's output by another */ static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER; diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c index 0ffda966c677..ef8e9031d47a 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/sock.c +++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c @@ -34,10 +34,8 @@ int __test_listen_socket(int backlog, void *addr, size_t addr_sz) return sk; } -int test_wait_fd(int sk, time_t sec, bool write) +static int __test_wait_fd(int sk, struct timeval *tv, bool write) { - struct timeval tv = { .tv_sec = sec }; - struct timeval *ptv = NULL; fd_set fds, efds; int ret; socklen_t slen = sizeof(ret); @@ -47,14 +45,11 @@ int test_wait_fd(int sk, time_t sec, bool write) FD_ZERO(&efds); FD_SET(sk, &efds); - if (sec) - ptv = &tv; - errno = 0; if (write) - ret = select(sk + 1, NULL, &fds, &efds, ptv); + ret = select(sk + 1, NULL, &fds, &efds, tv); else - ret = select(sk + 1, &fds, NULL, &efds, ptv); + ret = select(sk + 1, &fds, NULL, &efds, tv); if (ret < 0) return -errno; if (ret == 0) { @@ -69,8 +64,54 @@ int test_wait_fd(int sk, time_t sec, bool write) return 0; } +int test_wait_fd(int sk, time_t sec, bool write) +{ + struct timeval tv = { .tv_sec = sec, }; + + return __test_wait_fd(sk, sec ? &tv : NULL, write); +} + +static bool __skpair_poll_should_stop(int sk, struct tcp_counters *c, + test_cnt condition) +{ + struct tcp_counters c2; + test_cnt diff; + + if (test_get_tcp_counters(sk, &c2)) + test_error("test_get_tcp_counters()"); + + diff = test_cmp_counters(c, &c2); + test_tcp_counters_free(&c2); + return (diff & condition) == condition; +} + +/* How often wake up and check netns counters & paired (*err) */ +#define POLL_USEC 150 +static int __test_skpair_poll(int sk, bool write, uint64_t timeout, + struct tcp_counters *c, test_cnt cond, + volatile int *err) +{ + uint64_t t; + + for (t = 0; t <= timeout * 1000000; t += POLL_USEC) { + struct timeval tv = { .tv_usec = POLL_USEC, }; + int ret; + + ret = __test_wait_fd(sk, &tv, write); + if (ret != -ETIMEDOUT) + return ret; + if (c && cond && __skpair_poll_should_stop(sk, c, cond)) + break; + if (err && *err) + return *err; + } + if (err) + *err = -ETIMEDOUT; + return -ETIMEDOUT; +} + int __test_connect_socket(int sk, const char *device, - void *addr, size_t addr_sz, time_t timeout) + void *addr, size_t addr_sz, bool async) { long flags; int err; @@ -82,15 +123,6 @@ int __test_connect_socket(int sk, const char *device, test_error("setsockopt(SO_BINDTODEVICE, %s)", device); } - if (!timeout) { - err = connect(sk, addr, addr_sz); - if (err) { - err = -errno; - goto out; - } - return 0; - } - flags = fcntl(sk, F_GETFL); if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0)) test_error("fcntl()"); @@ -100,9 +132,9 @@ int __test_connect_socket(int sk, const char *device, err = -errno; goto out; } - if (timeout < 0) + if (async) return sk; - err = test_wait_fd(sk, timeout, 1); + err = test_wait_fd(sk, TEST_TIMEOUT_SEC, 1); if (err) goto out; } @@ -113,6 +145,45 @@ out: return err; } +int test_skpair_wait_poll(int sk, bool write, + test_cnt cond, volatile int *err) +{ + struct tcp_counters c; + int ret; + + *err = 0; + if (test_get_tcp_counters(sk, &c)) + test_error("test_get_tcp_counters()"); + synchronize_threads(); /* 1: init skpair & read nscounters */ + + ret = __test_skpair_poll(sk, write, TEST_TIMEOUT_SEC, &c, cond, err); + test_tcp_counters_free(&c); + return ret; +} + +int _test_skpair_connect_poll(int sk, const char *device, + void *addr, size_t addr_sz, + test_cnt condition, volatile int *err) +{ + struct tcp_counters c; + int ret; + + *err = 0; + if (test_get_tcp_counters(sk, &c)) + test_error("test_get_tcp_counters()"); + synchronize_threads(); /* 1: init skpair & read nscounters */ + ret = __test_connect_socket(sk, device, addr, addr_sz, true); + if (ret < 0) { + test_tcp_counters_free(&c); + return (*err = ret); + } + ret = __test_skpair_poll(sk, 1, TEST_TIMEOUT_SEC, &c, condition, err); + if (ret < 0) + close(sk); + test_tcp_counters_free(&c); + return ret; +} + int __test_set_md5(int sk, void *addr, size_t addr_sz, uint8_t prefix, int vrf, const char *password) { @@ -333,12 +404,12 @@ do { \ return 0; } -int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) +int test_get_tcp_counters(int sk, struct tcp_counters *out) { struct tcp_ao_getsockopt *key_dump; socklen_t key_dump_sz = sizeof(*key_dump); struct tcp_ao_info_opt info = {}; - bool c1, c2, c3, c4, c5; + bool c1, c2, c3, c4, c5, c6, c7, c8; struct netstat *ns; int err, nr_keys; @@ -346,25 +417,30 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) /* per-netns */ ns = netstat_read(); - out->netns_ao_good = netstat_get(ns, "TCPAOGood", &c1); - out->netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2); - out->netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3); - out->netns_ao_required = netstat_get(ns, "TCPAORequired", &c4); - out->netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5); + out->ao.netns_ao_good = netstat_get(ns, "TCPAOGood", &c1); + out->ao.netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2); + out->ao.netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3); + out->ao.netns_ao_required = netstat_get(ns, "TCPAORequired", &c4); + out->ao.netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5); + out->netns_md5_notfound = netstat_get(ns, "TCPMD5NotFound", &c6); + out->netns_md5_unexpected = netstat_get(ns, "TCPMD5Unexpected", &c7); + out->netns_md5_failure = netstat_get(ns, "TCPMD5Failure", &c8); netstat_free(ns); - if (c1 || c2 || c3 || c4 || c5) + if (c1 || c2 || c3 || c4 || c5 || c6 || c7 || c8) return -EOPNOTSUPP; err = test_get_ao_info(sk, &info); + if (err == -ENOENT) + return 0; if (err) return err; /* per-socket */ - out->ao_info_pkt_good = info.pkt_good; - out->ao_info_pkt_bad = info.pkt_bad; - out->ao_info_pkt_key_not_found = info.pkt_key_not_found; - out->ao_info_pkt_ao_required = info.pkt_ao_required; - out->ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp; + out->ao.ao_info_pkt_good = info.pkt_good; + out->ao.ao_info_pkt_bad = info.pkt_bad; + out->ao.ao_info_pkt_key_not_found = info.pkt_key_not_found; + out->ao.ao_info_pkt_ao_required = info.pkt_ao_required; + out->ao.ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp; /* per-key */ nr_keys = test_get_ao_keys_nr(sk); @@ -372,7 +448,7 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) return nr_keys; if (nr_keys == 0) test_error("test_get_ao_keys_nr() == 0"); - out->nr_keys = (size_t)nr_keys; + out->ao.nr_keys = (size_t)nr_keys; key_dump = calloc(nr_keys, key_dump_sz); if (!key_dump) return -errno; @@ -386,72 +462,84 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) return -errno; } - out->key_cnts = calloc(nr_keys, sizeof(out->key_cnts[0])); - if (!out->key_cnts) { + out->ao.key_cnts = calloc(nr_keys, sizeof(out->ao.key_cnts[0])); + if (!out->ao.key_cnts) { free(key_dump); return -errno; } while (nr_keys--) { - out->key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid; - out->key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid; - out->key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good; - out->key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad; + out->ao.key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid; + out->ao.key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid; + out->ao.key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good; + out->ao.key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad; } free(key_dump); return 0; } -int __test_tcp_ao_counters_cmp(const char *tst_name, - struct tcp_ao_counters *before, - struct tcp_ao_counters *after, - test_cnt expected) +test_cnt test_cmp_counters(struct tcp_counters *before, + struct tcp_counters *after) +{ +#define __cmp(cnt, e_cnt) \ +do { \ + if (before->cnt > after->cnt) \ + test_error("counter " __stringify(cnt) " decreased"); \ + if (before->cnt != after->cnt) \ + ret |= e_cnt; \ +} while (0) + + test_cnt ret = 0; + size_t i; + + if (before->ao.nr_keys != after->ao.nr_keys) + test_error("the number of keys has changed"); + + _for_each_counter(__cmp); + + i = before->ao.nr_keys; + while (i--) { + __cmp(ao.key_cnts[i].pkt_good, TEST_CNT_KEY_GOOD); + __cmp(ao.key_cnts[i].pkt_bad, TEST_CNT_KEY_BAD); + } +#undef __cmp + return ret; +} + +int test_assert_counters_sk(const char *tst_name, + struct tcp_counters *before, + struct tcp_counters *after, + test_cnt expected) { -#define __cmp_ao(cnt, expecting_inc) \ +#define __cmp_ao(cnt, e_cnt) \ do { \ if (before->cnt > after->cnt) { \ test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64, \ - tst_name ?: "", before->cnt, after->cnt); \ + tst_name ?: "", before->cnt, after->cnt); \ return -1; \ } \ - if ((before->cnt != after->cnt) != (expecting_inc)) { \ + if ((before->cnt != after->cnt) != !!(expected & e_cnt)) { \ test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64, \ - tst_name ?: "", (expecting_inc) ? "" : "not ", \ + tst_name ?: "", (expected & e_cnt) ? "" : "not ", \ before->cnt, after->cnt); \ return -1; \ } \ -} while(0) +} while (0) errno = 0; - /* per-netns */ - __cmp_ao(netns_ao_good, !!(expected & TEST_CNT_NS_GOOD)); - __cmp_ao(netns_ao_bad, !!(expected & TEST_CNT_NS_BAD)); - __cmp_ao(netns_ao_key_not_found, - !!(expected & TEST_CNT_NS_KEY_NOT_FOUND)); - __cmp_ao(netns_ao_required, !!(expected & TEST_CNT_NS_AO_REQUIRED)); - __cmp_ao(netns_ao_dropped_icmp, - !!(expected & TEST_CNT_NS_DROPPED_ICMP)); - /* per-socket */ - __cmp_ao(ao_info_pkt_good, !!(expected & TEST_CNT_SOCK_GOOD)); - __cmp_ao(ao_info_pkt_bad, !!(expected & TEST_CNT_SOCK_BAD)); - __cmp_ao(ao_info_pkt_key_not_found, - !!(expected & TEST_CNT_SOCK_KEY_NOT_FOUND)); - __cmp_ao(ao_info_pkt_ao_required, !!(expected & TEST_CNT_SOCK_AO_REQUIRED)); - __cmp_ao(ao_info_pkt_dropped_icmp, - !!(expected & TEST_CNT_SOCK_DROPPED_ICMP)); + _for_each_counter(__cmp_ao); return 0; #undef __cmp_ao } -int test_tcp_ao_key_counters_cmp(const char *tst_name, - struct tcp_ao_counters *before, - struct tcp_ao_counters *after, - test_cnt expected, - int sndid, int rcvid) +int test_assert_counters_key(const char *tst_name, + struct tcp_ao_counters *before, + struct tcp_ao_counters *after, + test_cnt expected, int sndid, int rcvid) { size_t i; -#define __cmp_ao(i, cnt, expecting_inc) \ +#define __cmp_ao(i, cnt, e_cnt) \ do { \ if (before->key_cnts[i].cnt > after->key_cnts[i].cnt) { \ test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64 " for key %u:%u", \ @@ -461,16 +549,16 @@ do { \ before->key_cnts[i].rcvid); \ return -1; \ } \ - if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != (expecting_inc)) { \ + if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != !!(expected & e_cnt)) { \ test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64 " for key %u:%u", \ - tst_name ?: "", (expecting_inc) ? "" : "not ",\ + tst_name ?: "", (expected & e_cnt) ? "" : "not ",\ before->key_cnts[i].cnt, \ after->key_cnts[i].cnt, \ before->key_cnts[i].sndid, \ before->key_cnts[i].rcvid); \ return -1; \ } \ -} while(0) +} while (0) if (before->nr_keys != after->nr_keys) { test_fail("%s: Keys changed on the socket %zu != %zu", @@ -485,20 +573,22 @@ do { \ continue; if (rcvid >= 0 && before->key_cnts[i].rcvid != rcvid) continue; - __cmp_ao(i, pkt_good, !!(expected & TEST_CNT_KEY_GOOD)); - __cmp_ao(i, pkt_bad, !!(expected & TEST_CNT_KEY_BAD)); + __cmp_ao(i, pkt_good, TEST_CNT_KEY_GOOD); + __cmp_ao(i, pkt_bad, TEST_CNT_KEY_BAD); } return 0; #undef __cmp_ao } -void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts) +void test_tcp_counters_free(struct tcp_counters *cnts) { - free(cnts->key_cnts); + free(cnts->ao.key_cnts); } #define TEST_BUF_SIZE 4096 -ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) +static ssize_t _test_server_run(int sk, ssize_t quota, struct tcp_counters *c, + test_cnt cond, volatile int *err, + time_t timeout_sec) { ssize_t total = 0; @@ -507,7 +597,7 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) ssize_t bytes, sent; int ret; - ret = test_wait_fd(sk, timeout_sec, 0); + ret = __test_skpair_poll(sk, 0, timeout_sec, c, cond, err); if (ret) return ret; @@ -518,7 +608,7 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) if (bytes == 0) break; - ret = test_wait_fd(sk, timeout_sec, 1); + ret = __test_skpair_poll(sk, 1, timeout_sec, c, cond, err); if (ret) return ret; @@ -533,13 +623,41 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) return total; } -ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, - const size_t msg_len, time_t timeout_sec) +ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) +{ + return _test_server_run(sk, quota, NULL, 0, NULL, + timeout_sec ?: TEST_TIMEOUT_SEC); +} + +int test_skpair_server(int sk, ssize_t quota, test_cnt cond, volatile int *err) +{ + struct tcp_counters c; + ssize_t ret; + + *err = 0; + if (test_get_tcp_counters(sk, &c)) + test_error("test_get_tcp_counters()"); + synchronize_threads(); /* 1: init skpair & read nscounters */ + + ret = _test_server_run(sk, quota, &c, cond, err, TEST_TIMEOUT_SEC); + test_tcp_counters_free(&c); + return ret; +} + +static ssize_t test_client_loop(int sk, size_t buf_sz, const size_t msg_len, + struct tcp_counters *c, test_cnt cond, + volatile int *err) { char msg[msg_len]; int nodelay = 1; + char *buf; size_t i; + buf = alloca(buf_sz); + if (!buf) + return -ENOMEM; + randomize_buffer(buf, buf_sz); + if (setsockopt(sk, IPPROTO_TCP, TCP_NODELAY, &nodelay, sizeof(nodelay))) test_error("setsockopt(TCP_NODELAY)"); @@ -547,7 +665,7 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, size_t sent, bytes = min(msg_len, buf_sz - i); int ret; - ret = test_wait_fd(sk, timeout_sec, 1); + ret = __test_skpair_poll(sk, 1, TEST_TIMEOUT_SEC, c, cond, err); if (ret) return ret; @@ -561,7 +679,8 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, do { ssize_t got; - ret = test_wait_fd(sk, timeout_sec, 0); + ret = __test_skpair_poll(sk, 0, TEST_TIMEOUT_SEC, + c, cond, err); if (ret) return ret; @@ -580,15 +699,31 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, return i; } -int test_client_verify(int sk, const size_t msg_len, const size_t nr, - time_t timeout_sec) +int test_client_verify(int sk, const size_t msg_len, const size_t nr) { size_t buf_sz = msg_len * nr; - char *buf = alloca(buf_sz); ssize_t ret; - randomize_buffer(buf, buf_sz); - ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec); + ret = test_client_loop(sk, buf_sz, msg_len, NULL, 0, NULL); + if (ret < 0) + return (int)ret; + return ret != buf_sz ? -1 : 0; +} + +int test_skpair_client(int sk, const size_t msg_len, const size_t nr, + test_cnt cond, volatile int *err) +{ + struct tcp_counters c; + size_t buf_sz = msg_len * nr; + ssize_t ret; + + *err = 0; + if (test_get_tcp_counters(sk, &c)) + test_error("test_get_tcp_counters()"); + synchronize_threads(); /* 1: init skpair & read nscounters */ + + ret = test_client_loop(sk, buf_sz, msg_len, &c, cond, err); + test_tcp_counters_free(&c); if (ret < 0) return (int)ret; return ret != buf_sz ? -1 : 0; diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c index ecc6f1e3a414..9a059b6c4523 100644 --- a/tools/testing/selftests/net/tcp_ao/restore.c +++ b/tools/testing/selftests/net/tcp_ao/restore.c @@ -16,11 +16,11 @@ const size_t quota = nr_packets * msg_len; static void try_server_run(const char *tst_name, unsigned int port, fault_t inj, test_cnt cnt_expected) { + test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected; const char *cnt_name = "TCPAOGood"; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; uint64_t before_cnt, after_cnt; - int sk, lsk; - time_t timeout; + int sk, lsk, dummy; ssize_t bytes; if (fault(TIMEOUT)) @@ -48,11 +48,10 @@ static void try_server_run(const char *tst_name, unsigned int port, } before_cnt = netstat_get_one(cnt_name, NULL); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - bytes = test_server_run(sk, quota, timeout); + bytes = test_skpair_server(sk, quota, poll_cnt, &dummy); if (fault(TIMEOUT)) { if (bytes > 0) test_fail("%s: server served: %zd", tst_name, bytes); @@ -65,17 +64,17 @@ static void try_server_run(const char *tst_name, unsigned int port, test_ok("%s: server alive", tst_name); } synchronize_threads(); /* 3: counters checks */ - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); after_cnt = netstat_get_one(cnt_name, NULL); - test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); if (after_cnt <= before_cnt) { - test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64, - tst_name, cnt_name, after_cnt, before_cnt); + test_fail("%s(server): %s counter did not increase: %" PRIu64 " <= %" PRIu64, + tst_name, cnt_name, after_cnt, before_cnt); } else { - test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64, + test_ok("%s(server): counter %s increased %" PRIu64 " => %" PRIu64, tst_name, cnt_name, before_cnt, after_cnt); } @@ -92,16 +91,16 @@ static void *server_fn(void *arg) { unsigned int port = test_server_port; - try_server_run("TCP-AO migrate to another socket", port++, + try_server_run("TCP-AO migrate to another socket (server)", port++, 0, TEST_CNT_GOOD); - try_server_run("TCP-AO with wrong send ISN", port++, + try_server_run("TCP-AO with wrong send ISN (server)", port++, FAULT_TIMEOUT, TEST_CNT_BAD); - try_server_run("TCP-AO with wrong receive ISN", port++, + try_server_run("TCP-AO with wrong receive ISN (server)", port++, FAULT_TIMEOUT, TEST_CNT_BAD); - try_server_run("TCP-AO with wrong send SEQ ext number", port++, + try_server_run("TCP-AO with wrong send SEQ ext number (server)", port++, FAULT_TIMEOUT, TEST_CNT_BAD); - try_server_run("TCP-AO with wrong receive SEQ ext number", port++, - FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD); + try_server_run("TCP-AO with wrong receive SEQ ext number (server)", + port++, FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD); synchronize_threads(); /* don't race to exit: client exits */ return NULL; @@ -125,7 +124,7 @@ static void test_get_sk_checkpoint(unsigned int server_port, sockaddr_af *saddr, test_error("failed to connect()"); synchronize_threads(); /* 2: accepted => send data */ - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk, msg_len, nr_packets)) test_fail("pre-migrate verify failed"); test_enable_repair(sk); @@ -139,11 +138,11 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port, struct tcp_ao_repair *ao_img, fault_t inj, test_cnt cnt_expected) { + test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected; const char *cnt_name = "TCPAOGood"; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; uint64_t before_cnt, after_cnt; - time_t timeout; - int sk; + int sk, dummy; if (fault(TIMEOUT)) cnt_name = "TCPAOBad"; @@ -159,30 +158,30 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port, test_error("setsockopt(TCP_AO_ADD_KEY)"); test_ao_restore(sk, ao_img); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); test_disable_repair(sk); test_sock_state_free(img); - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - if (test_client_verify(sk, msg_len, nr_packets, timeout)) { + if (test_skpair_client(sk, msg_len, nr_packets, poll_cnt, &dummy)) { if (fault(TIMEOUT)) test_ok("%s: post-migrate connection is broken", tst_name); else test_fail("%s: post-migrate connection is working", tst_name); } else { if (fault(TIMEOUT)) - test_fail("%s: post-migrate connection still working", tst_name); + test_fail("%s: post-migrate connection is working", tst_name); else test_ok("%s: post-migrate connection is alive", tst_name); } + synchronize_threads(); /* 3: counters checks */ - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); after_cnt = netstat_get_one(cnt_name, NULL); - test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); if (after_cnt <= before_cnt) { test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64, @@ -203,7 +202,7 @@ static void *client_fn(void *arg) sockaddr_af saddr; test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); - test_sk_restore("TCP-AO migrate to another socket", port++, + test_sk_restore("TCP-AO migrate to another socket (client)", port++, &saddr, &tcp_img, &ao_img, 0, TEST_CNT_GOOD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); @@ -212,7 +211,7 @@ static void *client_fn(void *arg) -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1); trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr, port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1); - test_sk_restore("TCP-AO with wrong send ISN", port++, + test_sk_restore("TCP-AO with wrong send ISN (client)", port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); @@ -221,7 +220,7 @@ static void *client_fn(void *arg) -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1); trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr, port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1); - test_sk_restore("TCP-AO with wrong receive ISN", port++, + test_sk_restore("TCP-AO with wrong receive ISN (client)", port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); @@ -229,8 +228,8 @@ static void *client_fn(void *arg) trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest, -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1); /* not expecting server => client mismatches as only snd sne is broken */ - test_sk_restore("TCP-AO with wrong send SEQ ext number", port++, - &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, + test_sk_restore("TCP-AO with wrong send SEQ ext number (client)", + port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); @@ -238,8 +237,8 @@ static void *client_fn(void *arg) /* not expecting client => server mismatches as only rcv sne is broken */ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr, port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1); - test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++, - &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, + test_sk_restore("TCP-AO with wrong receive SEQ ext number (client)", + port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_NS_GOOD | TEST_CNT_BAD); return NULL; diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c index 6364facaa63e..883cddf377cf 100644 --- a/tools/testing/selftests/net/tcp_ao/rst.c +++ b/tools/testing/selftests/net/tcp_ao/rst.c @@ -84,15 +84,15 @@ static void close_forced(int sk) static void test_server_active_rst(unsigned int port) { - struct tcp_ao_counters cnt1, cnt2; + struct tcp_counters cnt1, cnt2; ssize_t bytes; int sk, lsk; lsk = test_listen_socket(this_ip_addr, port, backlog); if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) test_error("setsockopt(TCP_AO_ADD_KEY)"); - if (test_get_tcp_ao_counters(lsk, &cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(lsk, &cnt1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 1: MKT added */ if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) @@ -103,8 +103,8 @@ static void test_server_active_rst(unsigned int port) test_error("accept()"); synchronize_threads(); /* 2: connection accept()ed, another queued */ - if (test_get_tcp_ao_counters(lsk, &cnt2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(lsk, &cnt2)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 3: close listen socket */ close(lsk); @@ -120,7 +120,7 @@ static void test_server_active_rst(unsigned int port) synchronize_threads(); /* 5: closed active sk */ synchronize_threads(); /* 6: counters checks */ - if (test_tcp_ao_counters_cmp("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD)) + if (test_assert_counters("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD)) test_fail("MKT counters (server) have not only good packets"); else test_ok("MKT counters are good on server"); @@ -128,7 +128,7 @@ static void test_server_active_rst(unsigned int port) static void test_server_passive_rst(unsigned int port) { - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; int sk, lsk; ssize_t bytes; @@ -147,8 +147,8 @@ static void test_server_passive_rst(unsigned int port) synchronize_threads(); /* 2: accepted => send data */ close(lsk); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); if (bytes != quota) { @@ -160,12 +160,12 @@ static void test_server_passive_rst(unsigned int port) synchronize_threads(); /* 3: checkpoint the client */ synchronize_threads(); /* 4: close the server, creating twsk */ - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); close(sk); synchronize_threads(); /* 5: restore the socket, send more data */ - test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD); + test_assert_counters("passive RST server", &cnt1, &cnt2, TEST_CNT_GOOD); synchronize_threads(); /* 6: server exits */ } @@ -271,8 +271,7 @@ static void test_client_active_rst(unsigned int port) synchronize_threads(); /* 1: MKT added */ for (i = 0; i < last; i++) { - err = _test_connect_socket(sk[i], this_ip_dest, port, - (i == 0) ? TEST_TIMEOUT_SEC : -1); + err = _test_connect_socket(sk[i], this_ip_dest, port, i != 0); if (err < 0) test_error("failed to connect()"); } @@ -283,12 +282,12 @@ static void test_client_active_rst(unsigned int port) test_error("test_wait_fds(): %d", err); /* async connect() with third sk to get into request_sock_queue */ - err = _test_connect_socket(sk[last], this_ip_dest, port, -1); + err = _test_connect_socket(sk[last], this_ip_dest, port, 1); if (err < 0) test_error("failed to connect()"); synchronize_threads(); /* 3: close listen socket */ - if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk[0], packet_sz, quota / packet_sz)) test_fail("Failed to send data on connected socket"); else test_ok("Verified established tcp connection"); @@ -323,7 +322,7 @@ static void test_client_active_rst(unsigned int port) static void test_client_passive_rst(unsigned int port) { - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; struct tcp_ao_repair ao_img; struct tcp_sock_state img; sockaddr_af saddr; @@ -341,7 +340,7 @@ static void test_client_passive_rst(unsigned int port) test_error("failed to connect()"); synchronize_threads(); /* 2: accepted => send data */ - if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk, packet_sz, quota / packet_sz)) test_fail("Failed to send data on connected socket"); else test_ok("Verified established tcp connection"); @@ -397,8 +396,8 @@ static void test_client_passive_rst(unsigned int port) test_error("setsockopt(TCP_AO_ADD_KEY)"); test_ao_restore(sk, &ao_img); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); test_disable_repair(sk); test_sock_state_free(&img); @@ -417,7 +416,7 @@ static void test_client_passive_rst(unsigned int port) * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0, * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0 */ - err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC); + err = test_client_verify(sk, packet_sz, quota / packet_sz); /* Make sure that the connection was reset, not timeouted */ if (err && err == -ECONNRESET) test_ok("client sock was passively reset post-seq-adjust"); @@ -426,12 +425,12 @@ static void test_client_passive_rst(unsigned int port) else test_fail("client sock is yet connected post-seq-adjust"); - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 6: server exits */ close(sk); - test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD); + test_assert_counters("client passive RST", &cnt1, &cnt2, TEST_CNT_GOOD); } static void *client_fn(void *arg) diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c index 3ecd2b58de6a..2c73bea698a6 100644 --- a/tools/testing/selftests/net/tcp_ao/self-connect.c +++ b/tools/testing/selftests/net/tcp_ao/self-connect.c @@ -16,6 +16,9 @@ static void __setup_lo_intf(const char *lo_intf, if (link_set_up(lo_intf)) test_error("Failed to bring %s up", lo_intf); + + if (ip_route_add(lo_intf, TEST_FAMILY, local_addr, local_addr)) + test_error("Failed to add a local route %s", lo_intf); } static void setup_lo_intf(const char *lo_intf) @@ -30,7 +33,7 @@ static void setup_lo_intf(const char *lo_intf) static void tcp_self_connect(const char *tst, unsigned int port, bool different_keyids, bool check_restore) { - struct tcp_ao_counters before_ao, after_ao; + struct tcp_counters before, after; uint64_t before_aogood, after_aogood; struct netstat *ns_before, *ns_after; const size_t nr_packets = 20; @@ -60,17 +63,17 @@ static void tcp_self_connect(const char *tst, unsigned int port, ns_before = netstat_read(); before_aogood = netstat_get(ns_before, "TCPAOGood", NULL); - if (test_get_tcp_ao_counters(sk, &before_ao)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &before)) + test_error("test_get_tcp_counters()"); if (__test_connect_socket(sk, "lo", (struct sockaddr *)&addr, - sizeof(addr), TEST_TIMEOUT_SEC) < 0) { + sizeof(addr), 0) < 0) { ns_after = netstat_read(); netstat_print_diff(ns_before, ns_after); test_error("failed to connect()"); } - if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, 100, nr_packets)) { test_fail("%s: tcp connection verify failed", tst); close(sk); return; @@ -78,8 +81,8 @@ static void tcp_self_connect(const char *tst, unsigned int port, ns_after = netstat_read(); after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); - if (test_get_tcp_ao_counters(sk, &after_ao)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &after)) + test_error("test_get_tcp_counters()"); if (!check_restore) { /* to debug: netstat_print_diff(ns_before, ns_after); */ netstat_free(ns_before); @@ -93,7 +96,7 @@ static void tcp_self_connect(const char *tst, unsigned int port, return; } - if (test_tcp_ao_counters_cmp(tst, &before_ao, &after_ao, TEST_CNT_GOOD)) { + if (test_assert_counters(tst, &before, &after, TEST_CNT_GOOD)) { close(sk); return; } @@ -136,7 +139,7 @@ static void tcp_self_connect(const char *tst, unsigned int port, test_ao_restore(sk, &ao_img); test_disable_repair(sk); test_sock_state_free(&img); - if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, 100, nr_packets)) { test_fail("%s: tcp connection verify failed", tst); close(sk); return; diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c index 8901a6785dc8..6478da6a71c3 100644 --- a/tools/testing/selftests/net/tcp_ao/seq-ext.c +++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Check that after SEQ number wrap-around: * 1. SEQ-extension has upper bytes set - * 2. TCP conneciton is alive and no TCPAOBad segments + * 2. TCP connection is alive and no TCPAOBad segments * In order to test (2), the test doesn't just adjust seq number for a queue * on a connected socket, but migrates it to another sk+port number, so * that there won't be any delayed packets that will fail to verify @@ -40,7 +40,7 @@ static void test_adjust_seqs(struct tcp_sock_state *img, static int test_sk_restore(struct tcp_sock_state *img, struct tcp_ao_repair *ao_img, sockaddr_af *saddr, const union tcp_addr daddr, unsigned int dport, - struct tcp_ao_counters *cnt) + struct tcp_counters *cnt) { int sk; @@ -54,8 +54,8 @@ static int test_sk_restore(struct tcp_sock_state *img, test_error("setsockopt(TCP_AO_ADD_KEY)"); test_ao_restore(sk, ao_img); - if (test_get_tcp_ao_counters(sk, cnt)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, cnt)) + test_error("test_get_tcp_counters()"); test_disable_repair(sk); test_sock_state_free(img); @@ -65,7 +65,7 @@ static int test_sk_restore(struct tcp_sock_state *img, static void *server_fn(void *arg) { uint64_t before_good, after_good, after_bad; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; struct tcp_sock_state img; struct tcp_ao_repair ao_img; sockaddr_af saddr; @@ -114,7 +114,7 @@ static void *server_fn(void *arg) test_adjust_seqs(&img, &ao_img, true); synchronize_threads(); /* 4: dump finished */ sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, - client_new_port, &ao1); + client_new_port, &cnt1); trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_addr, this_ip_dest, test_server_port + 1, client_new_port, 1); @@ -136,11 +136,11 @@ static void *server_fn(void *arg) } synchronize_threads(); /* 6: verify counters after SEQ-number rollover */ - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); after_good = netstat_get_one("TCPAOGood", NULL); - test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); + test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD); if (after_good <= before_good) { test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64, @@ -173,7 +173,7 @@ out: static void *client_fn(void *arg) { uint64_t before_good, after_good, after_bad; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; struct tcp_sock_state img; struct tcp_ao_repair ao_img; sockaddr_af saddr; @@ -191,7 +191,7 @@ static void *client_fn(void *arg) test_error("failed to connect()"); synchronize_threads(); /* 2: accepted => send data */ - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, msg_len, nr_packets)) { test_fail("pre-migrate verify failed"); return NULL; } @@ -213,20 +213,20 @@ static void *client_fn(void *arg) test_adjust_seqs(&img, &ao_img, false); synchronize_threads(); /* 4: dump finished */ sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, - test_server_port + 1, &ao1); + test_server_port + 1, &cnt1); synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */ - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk, msg_len, nr_packets)) test_fail("post-migrate verify failed"); else test_ok("post-migrate connection alive"); synchronize_threads(); /* 5: verify counters after SEQ-number rollover */ - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); after_good = netstat_get_one("TCPAOGood", NULL); - test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); + test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD); if (after_good <= before_good) { test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64, diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c index f779e5892bc1..a1467b64390a 100644 --- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c +++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c @@ -6,6 +6,7 @@ #define fault(type) (inj == FAULT_ ## type) static const char *md5_password = "Some evil genius, enemy to mankind, must have been the first contriver."; static const char *ao_password = DEFAULT_TEST_PASSWORD; +static volatile int sk_pair; static union tcp_addr client2; static union tcp_addr client3; @@ -41,10 +42,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *cnt_name, test_cnt cnt_expected, int needs_tcp_md5, fault_t inj) { - struct tcp_ao_counters ao_cnt1, ao_cnt2; + struct tcp_counters cnt1, cnt2; uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */ - int lsk, err, sk = 0; - time_t timeout; + test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected; + int lsk, err, sk = -1; if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) return; @@ -63,22 +64,25 @@ static void try_accept(const char *tst_name, unsigned int port, if (cnt_name) before_cnt = netstat_get_one(cnt_name, NULL); - if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (ao_addr && test_get_tcp_counters(lsk, &cnt1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - err = test_wait_fd(lsk, timeout, 0); + err = test_skpair_wait_poll(lsk, 0, poll_cnt, &sk_pair); synchronize_threads(); /* connect()/accept() timeouts */ if (err == -ETIMEDOUT) { + sk_pair = err; if (!fault(TIMEOUT)) - test_fail("timed out for accept()"); + test_fail("%s: timed out for accept()", tst_name); + } else if (err == -EKEYREJECTED) { + if (!fault(KEYREJECT)) + test_fail("%s: key was rejected", tst_name); } else if (err < 0) { - test_error("test_wait_fd()"); + test_error("test_skpair_wait_poll()"); } else { if (fault(TIMEOUT)) - test_fail("ready to accept"); + test_fail("%s: ready to accept", tst_name); sk = accept(lsk, NULL, NULL); if (sk < 0) { @@ -89,8 +93,8 @@ static void try_accept(const char *tst_name, unsigned int port, } } - if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt2)) - test_error("test_get_tcp_ao_counters()"); + if (ao_addr && test_get_tcp_counters(lsk, &cnt2)) + test_error("test_get_tcp_counters()"); close(lsk); if (!cnt_name) { @@ -108,11 +112,11 @@ static void try_accept(const char *tst_name, unsigned int port, tst_name, cnt_name, before_cnt, after_cnt); } if (ao_addr) - test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); out: synchronize_threads(); /* test_kill_sk() */ - if (sk > 0) + if (sk >= 0) test_kill_sk(sk); } @@ -153,78 +157,82 @@ static void *server_fn(void *arg) server_add_routes(); - try_accept("AO server (INADDR_ANY): AO client", port++, NULL, 0, + try_accept("[server] AO server (INADDR_ANY): AO client", port++, NULL, 0, &addr_any, 0, 0, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0); - try_accept("AO server (INADDR_ANY): MD5 client", port++, NULL, 0, + try_accept("[server] AO server (INADDR_ANY): MD5 client", port++, NULL, 0, &addr_any, 0, 0, 100, 100, 0, "TCPMD5Unexpected", - 0, 1, FAULT_TIMEOUT); - try_accept("AO server (INADDR_ANY): no sign client", port++, NULL, 0, + TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT); + try_accept("[server] AO server (INADDR_ANY): no sign client", port++, NULL, 0, &addr_any, 0, 0, 100, 100, 0, "TCPAORequired", TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT); - try_accept("AO server (AO_REQUIRED): AO client", port++, NULL, 0, + try_accept("[server] AO server (AO_REQUIRED): AO client", port++, NULL, 0, &this_ip_dest, TEST_PREFIX, true, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0); - try_accept("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, + try_accept("[server] AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, &this_ip_dest, TEST_PREFIX, true, 100, 100, 0, "TCPAORequired", TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT); - try_accept("MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0, + try_accept("[server] MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0, NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound", - 0, 1, FAULT_TIMEOUT); - try_accept("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, + TEST_CNT_NS_KEY_NOT_FOUND, 1, FAULT_TIMEOUT); + try_accept("[server] MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0); - try_accept("MD5 server (INADDR_ANY): no sign client", port++, &addr_any, + try_accept("[server] MD5 server (INADDR_ANY): no sign client", port++, &addr_any, 0, NULL, 0, 0, 0, 0, 0, "TCPMD5NotFound", - 0, 1, FAULT_TIMEOUT); + TEST_CNT_NS_MD5_NOT_FOUND, 1, FAULT_TIMEOUT); - try_accept("no sign server: AO client", port++, NULL, 0, + try_accept("[server] no sign server: AO client", port++, NULL, 0, NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound", - TEST_CNT_AO_KEY_NOT_FOUND, 0, FAULT_TIMEOUT); - try_accept("no sign server: MD5 client", port++, NULL, 0, + TEST_CNT_NS_KEY_NOT_FOUND, 0, FAULT_TIMEOUT); + try_accept("[server] no sign server: MD5 client", port++, NULL, 0, NULL, 0, 0, 0, 0, 0, "TCPMD5Unexpected", - 0, 1, FAULT_TIMEOUT); - try_accept("no sign server: no sign client", port++, NULL, 0, + TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT); + try_accept("[server] no sign server: no sign client", port++, NULL, 0, NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0); - try_accept("AO+MD5 server: AO client (matching)", port++, + try_accept("[server] AO+MD5 server: AO client (matching)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 1, 0); - try_accept("AO+MD5 server: AO client (misconfig, matching MD5)", port++, + try_accept("[server] AO+MD5 server: AO client (misconfig, matching MD5)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: AO client (misconfig, non-matching)", port++, + try_accept("[server] AO+MD5 server: AO client (misconfig, non-matching)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: MD5 client (matching)", port++, + try_accept("[server] AO+MD5 server: MD5 client (matching)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, NULL, 0, 1, 0); - try_accept("AO+MD5 server: MD5 client (misconfig, matching AO)", port++, + try_accept("[server] AO+MD5 server: MD5 client (misconfig, matching AO)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: MD5 client (misconfig, non-matching)", port++, + 100, 100, 0, "TCPMD5Unexpected", + TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT); + try_accept("[server] AO+MD5 server: MD5 client (misconfig, non-matching)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: no sign client (unmatched)", port++, + 100, 100, 0, "TCPMD5Unexpected", + TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT); + try_accept("[server] AO+MD5 server: no sign client (unmatched)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "CurrEstab", 0, 1, 0); - try_accept("AO+MD5 server: no sign client (misconfig, matching AO)", + try_accept("[server] AO+MD5 server: no sign client (misconfig, matching AO)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "TCPAORequired", TEST_CNT_AO_REQUIRED, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: no sign client (misconfig, matching MD5)", + try_accept("[server] AO+MD5 server: no sign client (misconfig, matching MD5)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, "TCPMD5NotFound", 0, 1, FAULT_TIMEOUT); + 100, 100, 0, "TCPMD5NotFound", + TEST_CNT_NS_MD5_NOT_FOUND, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys", + /* Key rejected by the other side, failing short through skpair */ + try_accept("[server] AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys", + 100, 100, 0, NULL, 0, 1, FAULT_KEYREJECT); + try_accept("[server] AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT); + 100, 100, 0, NULL, 0, 1, FAULT_KEYREJECT); server_add_fail_tests(&port); @@ -259,7 +267,6 @@ static void try_connect(const char *tst_name, unsigned int port, uint8_t sndid, uint8_t rcvid, uint8_t vrf, fault_t inj, int needs_tcp_md5, union tcp_addr *bind_addr) { - time_t timeout; int sk, ret; if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) @@ -281,11 +288,10 @@ static void try_connect(const char *tst_name, unsigned int port, synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - ret = _test_connect_socket(sk, this_ip_dest, port, timeout); - + ret = test_skpair_connect_poll(sk, this_ip_dest, port, 0, &sk_pair); synchronize_threads(); /* connect()/accept() timeouts */ if (ret < 0) { + sk_pair = ret; if (fault(KEYREJECT) && ret == -EKEYREJECTED) test_ok("%s: connect() was prevented", tst_name); else if (ret == -ETIMEDOUT && fault(TIMEOUT)) @@ -305,8 +311,7 @@ static void try_connect(const char *tst_name, unsigned int port, out: synchronize_threads(); /* test_kill_sk() */ - /* _test_connect_socket() cleans up on failure */ - if (ret > 0) + if (ret > 0) /* test_skpair_connect_poll() cleans up on failure */ test_kill_sk(sk); } @@ -437,7 +442,6 @@ static void try_to_add(const char *tst_name, unsigned int port, int ao_vrf, uint8_t sndid, uint8_t rcvid, int needs_tcp_md5, fault_t inj) { - time_t timeout; int sk, ret; if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) @@ -450,11 +454,10 @@ static void try_to_add(const char *tst_name, unsigned int port, synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + ret = test_skpair_connect_poll(sk, this_ip_dest, port, 0, &sk_pair); synchronize_threads(); /* connect()/accept() timeouts */ - if (ret <= 0) { + if (ret < 0) { test_error("%s: connect() returned %d", tst_name, ret); goto out; } @@ -490,8 +493,7 @@ static void try_to_add(const char *tst_name, unsigned int port, out: synchronize_threads(); /* test_kill_sk() */ - /* _test_connect_socket() cleans up on failure */ - if (ret > 0) + if (ret > 0) /* test_skpair_connect_poll() cleans up on failure */ test_kill_sk(sk); } diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c index c1cb0c75156a..4b3f9b5e50fe 100644 --- a/tools/testing/selftests/net/tcp_fastopen_backup_key.c +++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c @@ -26,7 +26,7 @@ #include <fcntl.h> #include <time.h> -#include "../kselftest.h" +#include "kselftest.h" #ifndef TCP_FASTOPEN_KEY #define TCP_FASTOPEN_KEY 33 diff --git a/tools/testing/selftests/net/tcp_port_share.c b/tools/testing/selftests/net/tcp_port_share.c new file mode 100644 index 000000000000..6146b62610df --- /dev/null +++ b/tools/testing/selftests/net/tcp_port_share.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// Copyright (c) 2025 Cloudflare, Inc. + +/* Tests for TCP port sharing (bind bucket reuse). */ + +#include <arpa/inet.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <fcntl.h> +#include <sched.h> +#include <stdlib.h> + +#include "kselftest_harness.h" + +#define DST_PORT 30000 +#define SRC_PORT 40000 + +struct sockaddr_inet { + union { + struct sockaddr_storage ss; + struct sockaddr_in6 v6; + struct sockaddr_in v4; + struct sockaddr sa; + }; + socklen_t len; + char str[INET6_ADDRSTRLEN + __builtin_strlen("[]:65535") + 1]; +}; + +const int one = 1; + +static int disconnect(int fd) +{ + return connect(fd, &(struct sockaddr){ AF_UNSPEC }, sizeof(struct sockaddr)); +} + +static int getsockname_port(int fd) +{ + struct sockaddr_inet addr = {}; + int err; + + addr.len = sizeof(addr); + err = getsockname(fd, &addr.sa, &addr.len); + if (err) + return -1; + + switch (addr.sa.sa_family) { + case AF_INET: + return ntohs(addr.v4.sin_port); + case AF_INET6: + return ntohs(addr.v6.sin6_port); + default: + errno = EAFNOSUPPORT; + return -1; + } +} + +static void make_inet_addr(int af, const char *ip, __u16 port, + struct sockaddr_inet *addr) +{ + const char *fmt = ""; + + memset(addr, 0, sizeof(*addr)); + + switch (af) { + case AF_INET: + addr->len = sizeof(addr->v4); + addr->v4.sin_family = af; + addr->v4.sin_port = htons(port); + inet_pton(af, ip, &addr->v4.sin_addr); + fmt = "%s:%hu"; + break; + case AF_INET6: + addr->len = sizeof(addr->v6); + addr->v6.sin6_family = af; + addr->v6.sin6_port = htons(port); + inet_pton(af, ip, &addr->v6.sin6_addr); + fmt = "[%s]:%hu"; + break; + } + + snprintf(addr->str, sizeof(addr->str), fmt, ip, port); +} + +FIXTURE(tcp_port_share) {}; + +FIXTURE_VARIANT(tcp_port_share) { + int domain; + /* IP to listen on and connect to */ + const char *dst_ip; + /* Primary IP to connect from */ + const char *src1_ip; + /* Secondary IP to connect from */ + const char *src2_ip; + /* IP to bind to in order to block the source port */ + const char *bind_ip; +}; + +FIXTURE_VARIANT_ADD(tcp_port_share, ipv4) { + .domain = AF_INET, + .dst_ip = "127.0.0.1", + .src1_ip = "127.1.1.1", + .src2_ip = "127.2.2.2", + .bind_ip = "127.3.3.3", +}; + +FIXTURE_VARIANT_ADD(tcp_port_share, ipv6) { + .domain = AF_INET6, + .dst_ip = "::1", + .src1_ip = "2001:db8::1", + .src2_ip = "2001:db8::2", + .bind_ip = "2001:db8::3", +}; + +FIXTURE_SETUP(tcp_port_share) +{ + int sc; + + ASSERT_EQ(unshare(CLONE_NEWNET), 0); + ASSERT_EQ(system("ip link set dev lo up"), 0); + ASSERT_EQ(system("ip addr add dev lo 2001:db8::1/32 nodad"), 0); + ASSERT_EQ(system("ip addr add dev lo 2001:db8::2/32 nodad"), 0); + ASSERT_EQ(system("ip addr add dev lo 2001:db8::3/32 nodad"), 0); + + sc = open("/proc/sys/net/ipv4/ip_local_port_range", O_WRONLY); + ASSERT_GE(sc, 0); + ASSERT_GT(dprintf(sc, "%hu %hu\n", SRC_PORT, SRC_PORT), 0); + ASSERT_EQ(close(sc), 0); +} + +FIXTURE_TEARDOWN(tcp_port_share) {} + +/* Verify that an ephemeral port becomes available again after the socket + * bound to it and blocking it from reuse is closed. + */ +TEST_F(tcp_port_share, can_reuse_port_after_bind_and_close) +{ + const typeof(variant) v = variant; + struct sockaddr_inet addr; + int c1, c2, ln, pb; + + /* Listen on <dst_ip>:<DST_PORT> */ + ln = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(ln, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(ln, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(bind(ln, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + ASSERT_EQ(listen(ln, 2), 0); + + /* Connect from <src1_ip>:<SRC_PORT> */ + c1 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c1, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(c1, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src1_ip, 0, &addr); + ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(connect(c1, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str); + ASSERT_EQ(getsockname_port(c1), SRC_PORT); + + /* Bind to <bind_ip>:<SRC_PORT>. Block the port from reuse. */ + pb = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(pb, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(pb, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr); + ASSERT_EQ(bind(pb, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + /* Try to connect from <src2_ip>:<SRC_PORT>. Expect failure. */ + c2 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c2, 0) TH_LOG("socket"); + ASSERT_EQ(setsockopt(c2, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src2_ip, 0, &addr); + ASSERT_EQ(bind(c2, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(connect(c2, &addr.sa, addr.len), -1) TH_LOG("connect(%s)", addr.str); + ASSERT_EQ(errno, EADDRNOTAVAIL) TH_LOG("%m"); + + /* Unbind from <bind_ip>:<SRC_PORT>. Unblock the port for reuse. */ + ASSERT_EQ(close(pb), 0); + + /* Connect again from <src2_ip>:<SRC_PORT> */ + EXPECT_EQ(connect(c2, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str); + EXPECT_EQ(getsockname_port(c2), SRC_PORT); + + ASSERT_EQ(close(c2), 0); + ASSERT_EQ(close(c1), 0); + ASSERT_EQ(close(ln), 0); +} + +/* Verify that a socket auto-bound during connect() blocks port reuse after + * disconnect (connect(AF_UNSPEC)) followed by an explicit port bind(). + */ +TEST_F(tcp_port_share, port_block_after_disconnect) +{ + const typeof(variant) v = variant; + struct sockaddr_inet addr; + int c1, c2, ln, pb; + + /* Listen on <dst_ip>:<DST_PORT> */ + ln = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(ln, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(ln, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(bind(ln, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + ASSERT_EQ(listen(ln, 2), 0); + + /* Connect from <src1_ip>:<SRC_PORT> */ + c1 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c1, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(c1, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src1_ip, 0, &addr); + ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(connect(c1, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str); + ASSERT_EQ(getsockname_port(c1), SRC_PORT); + + /* Disconnect the socket and bind it to <bind_ip>:<SRC_PORT> to block the port */ + ASSERT_EQ(disconnect(c1), 0) TH_LOG("disconnect: %m"); + ASSERT_EQ(setsockopt(c1, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr); + ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + /* Trigger port-addr bucket state update with another bind() and close() */ + pb = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(pb, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(pb, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr); + ASSERT_EQ(bind(pb, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + ASSERT_EQ(close(pb), 0); + + /* Connect from <src2_ip>:<SRC_PORT>. Expect failure. */ + c2 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c2, 0) TH_LOG("socket: %m"); + ASSERT_EQ(setsockopt(c2, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src2_ip, 0, &addr); + ASSERT_EQ(bind(c2, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + EXPECT_EQ(connect(c2, &addr.sa, addr.len), -1) TH_LOG("connect(%s)", addr.str); + EXPECT_EQ(errno, EADDRNOTAVAIL) TH_LOG("%m"); + + ASSERT_EQ(close(c2), 0); + ASSERT_EQ(close(c1), 0); + ASSERT_EQ(close(ln), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/test_blackhole_dev.sh b/tools/testing/selftests/net/test_blackhole_dev.sh deleted file mode 100755 index 3119b80e711f..000000000000 --- a/tools/testing/selftests/net/test_blackhole_dev.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# Runs blackhole-dev test using blackhole-dev kernel module - -if /sbin/modprobe -q test_blackhole_dev ; then - /sbin/modprobe -q -r test_blackhole_dev; - echo "test_blackhole_dev: ok"; -else - echo "test_blackhole_dev: [FAIL]"; - exit 1; -fi diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh index 1b3f89e2b86e..2a7224fe74f2 100755 --- a/tools/testing/selftests/net/test_bridge_backup_port.sh +++ b/tools/testing/selftests/net/test_bridge_backup_port.sh @@ -315,6 +315,29 @@ backup_port() tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "No forwarding out of vx0" + # Check that packets are forwarded out of vx0 when swp1 is + # administratively down and out of swp1 when it is administratively up + # again. + run_cmd "ip -n $sw1 link set dev swp1 down" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled + log_test $? 0 "swp1 administratively down" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 3 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "Forwarding out of vx0" + + run_cmd "ip -n $sw1 link set dev swp1 up" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding + log_test $? 0 "swp1 administratively up" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "No forwarding out of vx0" + # Remove vx0 as the backup port of swp1 and check that packets are no # longer forwarded out of vx0 when swp1 does not have a carrier. run_cmd "bridge -n $sw1 link set dev swp1 nobackup_port" @@ -322,9 +345,9 @@ backup_port() log_test $? 1 "vx0 not configured as backup port of swp1" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets $sw1 "dev swp1 egress" 101 4 + tc_check_packets $sw1 "dev swp1 egress" 101 5 log_test $? 0 "Forwarding out of swp1" - tc_check_packets $sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "No forwarding out of vx0" run_cmd "ip -n $sw1 link set dev swp1 carrier off" @@ -332,9 +355,9 @@ backup_port() log_test $? 0 "swp1 carrier off" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets $sw1 "dev swp1 egress" 101 4 + tc_check_packets $sw1 "dev swp1 egress" 101 5 log_test $? 0 "No forwarding out of swp1" - tc_check_packets $sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "No forwarding out of vx0" } diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh index 02b986c9c247..9067197c9055 100755 --- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh +++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh @@ -51,7 +51,9 @@ ret=0 # All tests in this script. Can be overridden with -t option. TESTS=" neigh_suppress_arp + neigh_suppress_uc_arp neigh_suppress_ns + neigh_suppress_uc_ns neigh_vlan_suppress_arp neigh_vlan_suppress_ns " @@ -388,6 +390,52 @@ neigh_suppress_arp() neigh_suppress_arp_common $vid $sip $tip } +neigh_suppress_uc_arp_common() +{ + local vid=$1; shift + local sip=$1; shift + local tip=$1; shift + local tmac + + echo + echo "Unicast ARP, per-port ARP suppression - VLAN $vid" + echo "-----------------------------------------------" + + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid" + run_cmd "ip -n $sw1 neigh replace $tip lladdr $tmac nud permanent dev br0.$vid" + + run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto arp flower arp_sip $tip arp_op reply action pass" + + run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto arp flower arp_tip $sip arp_op reply action pass" + + run_cmd "ip netns exec $h1 mausezahn eth0.$vid -c 1 -a own -b $tmac -t arp 'request sip=$sip, tip=$tip, tmac=$tmac' -q" + tc_check_packets $h1 "dev eth0.$vid ingress" 101 1 + log_test $? 0 "Unicast ARP, suppression on, h1 filter" + tc_check_packets $h2 "dev eth0.$vid egress" 101 1 + log_test $? 0 "Unicast ARP, suppression on, h2 filter" +} + +neigh_suppress_uc_arp() +{ + local vid=10 + local sip=192.0.2.1 + local tip=192.0.2.2 + + neigh_suppress_uc_arp_common $vid $sip $tip + + vid=20 + sip=192.0.2.17 + tip=192.0.2.18 + neigh_suppress_uc_arp_common $vid $sip $tip +} + neigh_suppress_ns_common() { local vid=$1; shift @@ -494,6 +542,78 @@ neigh_suppress_ns() neigh_suppress_ns_common $vid $saddr $daddr $maddr } +icmpv6_header_get() +{ + local csum=$1; shift + local tip=$1; shift + local type + local p + + # Type 135 (Neighbor Solicitation), hex format + type="87" + p=$(: + )"$type:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"$csum:"$( : ICMPv6.checksum + )"00:00:00:00:"$( : Reserved + )"$tip:"$( : Target Address + ) + echo $p +} + +neigh_suppress_uc_ns_common() +{ + local vid=$1; shift + local sip=$1; shift + local dip=$1; shift + local full_dip=$1; shift + local csum=$1; shift + local tmac + + echo + echo "Unicast NS, per-port NS suppression - VLAN $vid" + echo "---------------------------------------------" + + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid" + run_cmd "ip -n $sw1 -6 neigh replace $dip lladdr $tmac nud permanent dev br0.$vid" + + run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 src_ip $dip type 136 code 0 action pass" + + run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 dst_ip $sip type 136 code 0 action pass" + + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a own -b $tmac -A $sip -B $dip -t ip hop=255,next=58,payload=$(icmpv6_header_get $csum $full_dip) -q" + tc_check_packets $h1 "dev eth0.$vid ingress" 101 1 + log_test $? 0 "Unicast NS, suppression on, h1 filter" + tc_check_packets $h2 "dev eth0.$vid egress" 101 1 + log_test $? 0 "Unicast NS, suppression on, h2 filter" +} + +neigh_suppress_uc_ns() +{ + local vid=10 + local saddr=2001:db8:1::1 + local daddr=2001:db8:1::2 + local full_daddr=20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02 + local csum="ef:79" + + neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum + + vid=20 + saddr=2001:db8:2::1 + daddr=2001:db8:2::2 + full_daddr=20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:02 + csum="ef:76" + + neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum +} + neigh_vlan_suppress_arp() { local vid1=10 @@ -825,6 +945,11 @@ if [ ! -x "$(command -v jq)" ]; then exit $ksft_skip fi +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit $ksft_skip +fi + bridge link help 2>&1 | grep -q "neigh_vlan_suppress" if [ $? -ne 0 ]; then echo "SKIP: iproute2 bridge too old, missing per-VLAN neighbor suppression support" diff --git a/tools/testing/selftests/net/test_neigh.sh b/tools/testing/selftests/net/test_neigh.sh new file mode 100755 index 000000000000..7c594bf6ead0 --- /dev/null +++ b/tools/testing/selftests/net/test_neigh.sh @@ -0,0 +1,366 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh +TESTS=" + extern_valid_ipv4 + extern_valid_ipv6 +" +VERBOSE=0 + +################################################################################ +# Utilities + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + stderr= + fi + + out=$(eval "$cmd" "$stderr") + rc=$? + if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +################################################################################ +# Setup + +setup() +{ + set -e + + setup_ns ns1 ns2 + + ip -n "$ns1" link add veth0 type veth peer name veth1 netns "$ns2" + ip -n "$ns1" link set dev veth0 up + ip -n "$ns2" link set dev veth1 up + + ip -n "$ns1" address add 192.0.2.1/24 dev veth0 + ip -n "$ns1" address add 2001:db8:1::1/64 dev veth0 nodad + ip -n "$ns2" address add 192.0.2.2/24 dev veth1 + ip -n "$ns2" address add 2001:db8:1::2/64 dev veth1 nodad + + ip netns exec "$ns1" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + ip netns exec "$ns2" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + + sleep 5 + + set +e +} + +exit_cleanup_all() +{ + cleanup_all_ns + exit "${EXIT_STATUS}" +} + +################################################################################ +# Tests + +extern_valid_common() +{ + local af_str=$1; shift + local ip_addr=$1; shift + local tbl_name=$1; shift + local subnet=$1; shift + local mac + + mac=$(ip -n "$ns2" -j link show dev veth1 | jq -r '.[]["address"]') + + RET=0 + + # Check that simple addition works. + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "No \"extern_valid\" flag after addition" + + log_test "$af_str \"extern_valid\" flag: Add entry" + + RET=0 + + # Check that an entry cannot be added with "extern_valid" flag and an + # invalid state. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr nud none dev veth0 extern_valid" + check_fail $? "Managed to add an entry with \"extern_valid\" flag and an invalid state" + + log_test "$af_str \"extern_valid\" flag: Add with an invalid state" + + RET=0 + + # Check that entry cannot be added with both "extern_valid" flag and + # "use" / "managed" flag. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid use" + check_fail $? "Managed to add an entry with \"extern_valid\" flag and \"use\" flag" + + log_test "$af_str \"extern_valid\" flag: Add with \"use\" flag" + + RET=0 + + # Check that "extern_valid" flag can be toggled using replace. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Did not manage to set \"extern_valid\" flag with replace" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_fail $? "Did not manage to clear \"extern_valid\" flag with replace" + + log_test "$af_str \"extern_valid\" flag: Replace entry" + + RET=0 + + # Check that an existing "extern_valid" entry can be marked as + # "managed". + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid managed" + check_err $? "Did not manage to add \"managed\" flag to an existing \"extern_valid\" entry" + + log_test "$af_str \"extern_valid\" flag: Replace entry with \"managed\" flag" + + RET=0 + + # Check that entry cannot be replaced with "extern_valid" flag and an + # invalid state. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr nud none dev veth0 extern_valid" + check_fail $? "Managed to replace an entry with \"extern_valid\" flag and an invalid state" + + log_test "$af_str \"extern_valid\" flag: Replace with an invalid state" + + RET=0 + + # Check that an "extern_valid" entry is flushed when the interface is + # put administratively down. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 link set dev veth0 down" + run_cmd "ip -n $ns1 link set dev veth0 up" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0" + check_fail $? "\"extern_valid\" entry not flushed upon interface down" + + log_test "$af_str \"extern_valid\" flag: Interface down" + + RET=0 + + # Check that an "extern_valid" entry is not flushed when the interface + # loses its carrier. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns2 link set dev veth1 down" + run_cmd "ip -n $ns2 link set dev veth1 up" + run_cmd "sleep 2" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0" + check_err $? "\"extern_valid\" entry flushed upon carrier down" + + log_test "$af_str \"extern_valid\" flag: Carrier down" + + RET=0 + + # Check that when entry transitions to "reachable" state it maintains + # the "extern_valid" flag. Wait "delay_probe" seconds for ARP request / + # NS to be sent. + local delay_probe + + delay_probe=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["delay_probe"]') + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use" + run_cmd "sleep $((delay_probe / 1000 + 2))" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"REACHABLE\"" + check_err $? "Entry did not transition to \"reachable\" state" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry did not maintain \"extern_valid\" flag after transition to \"reachable\" state" + + log_test "$af_str \"extern_valid\" flag: Transition to \"reachable\" state" + + RET=0 + + # Drop all packets, trigger resolution and check that entry goes back + # to "stale" state instead of "failed". + local mcast_reprobes + local retrans_time + local ucast_probes + local app_probes + local probes + local delay + + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "tc -n $ns2 qdisc add dev veth1 clsact" + run_cmd "tc -n $ns2 filter add dev veth1 ingress proto all matchall action drop" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use" + retrans_time=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["retrans"]') + ucast_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["ucast_probes"]') + app_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["app_probes"]') + mcast_reprobes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["mcast_reprobes"]') + delay=$((delay_probe + (ucast_probes + app_probes + mcast_reprobes) * retrans_time)) + run_cmd "sleep $((delay / 1000 + 2))" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"STALE\"" + check_err $? "Entry did not return to \"stale\" state" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry did not maintain \"extern_valid\" flag after returning to \"stale\" state" + probes=$(ip -n "$ns1" -j -s neigh get "$ip_addr" dev veth0 | jq '.[]["probes"]') + if [[ $probes -eq 0 ]]; then + check_err 1 "No probes were sent" + fi + + log_test "$af_str \"extern_valid\" flag: Transition back to \"stale\" state" + + run_cmd "tc -n $ns2 qdisc del dev veth1 clsact" + + RET=0 + + # Forced garbage collection runs whenever the number of entries is + # larger than "thresh3" and deletes stale entries that have not been + # updated in the last 5 seconds. + # + # Check that an "extern_valid" entry survives a forced garbage + # collection. Add an entry, wait 5 seconds and add more entries than + # "thresh3" so that forced garbage collection will run. + # + # Note that the garbage collection thresholds are global resources and + # that changes in the initial namespace affect all the namespaces. + local forced_gc_runs_t0 + local forced_gc_runs_t1 + local orig_thresh1 + local orig_thresh2 + local orig_thresh3 + + run_cmd "ip -n $ns1 neigh flush dev veth0" + orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]') + orig_thresh2=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh2")) | .["thresh2"]') + orig_thresh3=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh3")) | .["thresh3"]') + run_cmd "ip ntable change name $tbl_name thresh3 10 thresh2 9 thresh1 8" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0" + run_cmd "sleep 5" + forced_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]') + for i in {1..20}; do + run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0" + done + forced_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]') + if [[ $forced_gc_runs_t1 -eq $forced_gc_runs_t0 ]]; then + check_err 1 "Forced garbage collection did not run" + fi + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry with \"extern_valid\" flag did not survive forced garbage collection" + run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0" + check_fail $? "Entry without \"extern_valid\" flag survived forced garbage collection" + + log_test "$af_str \"extern_valid\" flag: Forced garbage collection" + + run_cmd "ip ntable change name $tbl_name thresh3 $orig_thresh3 thresh2 $orig_thresh2 thresh1 $orig_thresh1" + + RET=0 + + # Periodic garbage collection runs every "base_reachable"/2 seconds and + # if the number of entries is larger than "thresh1", then it deletes + # stale entries that have not been used in the last "gc_stale" seconds. + # + # Check that an "extern_valid" entry survives a periodic garbage + # collection. Add an "extern_valid" entry, add more than "thresh1" + # regular entries, wait "base_reachable" (longer than "gc_stale") + # seconds and check that the "extern_valid" entry was not deleted. + # + # Note that the garbage collection thresholds and "base_reachable" are + # global resources and that changes in the initial namespace affect all + # the namespaces. + local periodic_gc_runs_t0 + local periodic_gc_runs_t1 + local orig_base_reachable + local orig_gc_stale + + run_cmd "ip -n $ns1 neigh flush dev veth0" + orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]') + orig_base_reachable=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["base_reachable"]') + run_cmd "ip ntable change name $tbl_name thresh1 10 base_reachable 10000" + orig_gc_stale=$(ip -n "$ns1" -j ntable show name "$tbl_name" dev veth0 | jq '.[]["gc_stale"]') + run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale 1000" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0" + # Wait orig_base_reachable/2 for the new interval to take effect. + run_cmd "sleep $(((orig_base_reachable / 1000) / 2 + 2))" + for i in {1..20}; do + run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0" + done + periodic_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]') + run_cmd "sleep 10" + periodic_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]') + [[ $periodic_gc_runs_t1 -ne $periodic_gc_runs_t0 ]] + check_err $? "Periodic garbage collection did not run" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry with \"extern_valid\" flag did not survive periodic garbage collection" + run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0" + check_fail $? "Entry without \"extern_valid\" flag survived periodic garbage collection" + + log_test "$af_str \"extern_valid\" flag: Periodic garbage collection" + + run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale $orig_gc_stale" + run_cmd "ip ntable change name $tbl_name thresh1 $orig_thresh1 base_reachable $orig_base_reachable" +} + +extern_valid_ipv4() +{ + extern_valid_common "IPv4" 192.0.2.2 "arp_cache" 192.0.2. +} + +extern_valid_ipv6() +{ + extern_valid_common "IPv6" 2001:db8:1::2 "ndisc_cache" 2001:db8:1:: +} + +################################################################################ +# Usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +while getopts ":t:pvh" opt; do + case $opt in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=$((VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +require_command jq + +if ! ip neigh help 2>&1 | grep -q "extern_valid"; then + echo "SKIP: iproute2 ip too old, missing \"extern_valid\" support" + exit "$ksft_skip" +fi + +trap exit_cleanup_all EXIT + +for t in $TESTS +do + setup; $t; cleanup_all_ns; +done diff --git a/tools/testing/selftests/net/test_so_rcv.sh b/tools/testing/selftests/net/test_so_rcv.sh new file mode 100755 index 000000000000..d8aa4362879d --- /dev/null +++ b/tools/testing/selftests/net/test_so_rcv.sh @@ -0,0 +1,73 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +HOSTS=("127.0.0.1" "::1") +PORT=1234 +TOTAL_TESTS=0 +FAILED_TESTS=0 + +declare -A TESTS=( + ["SO_RCVPRIORITY"]="-P 2" + ["SO_RCVMARK"]="-M 3" +) + +check_result() { + ((TOTAL_TESTS++)) + if [ "$1" -ne 0 ]; then + ((FAILED_TESTS++)) + fi +} + +cleanup() +{ + cleanup_ns $NS +} + +trap cleanup EXIT + +setup_ns NS + +for HOST in "${HOSTS[@]}"; do + PROTOCOL="IPv4" + if [[ "$HOST" == "::1" ]]; then + PROTOCOL="IPv6" + fi + for test_name in "${!TESTS[@]}"; do + echo "Running $test_name test, $PROTOCOL" + arg=${TESTS[$test_name]} + + ip netns exec $NS ./so_rcv_listener $arg $HOST $PORT & + LISTENER_PID=$! + + sleep 0.5 + + if ! ip netns exec $NS ./cmsg_sender $arg $HOST $PORT; then + echo "Sender failed for $test_name, $PROTOCOL" + kill "$LISTENER_PID" 2>/dev/null + wait "$LISTENER_PID" + check_result 1 + continue + fi + + wait "$LISTENER_PID" + LISTENER_EXIT_CODE=$? + + if [ "$LISTENER_EXIT_CODE" -eq 0 ]; then + echo "Rcv test OK for $test_name, $PROTOCOL" + check_result 0 + else + echo "Rcv test FAILED for $test_name, $PROTOCOL" + check_result 1 + fi + done +done + +if [ "$FAILED_TESTS" -ne 0 ]; then + echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed" + exit ${KSFT_FAIL} +else + echo "OK - All $TOTAL_TESTS tests passed" + exit ${KSFT_PASS} +fi diff --git a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh index 2d442cdab11e..8b414d0edada 100755 --- a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh +++ b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh @@ -1,29 +1,114 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# Check FDB default-remote handling across "ip link set". +ALL_TESTS=" + test_set_remote + test_change_mc_remote +" +source lib.sh check_remotes() { local what=$1; shift local N=$(bridge fdb sh dev vx | grep 00:00:00:00:00:00 | wc -l) - echo -ne "expected two remotes after $what\t" - if [[ $N != 2 ]]; then - echo "[FAIL]" - EXIT_STATUS=1 + ((N == 2)) + check_err $? "expected 2 remotes after $what, got $N" +} + +# Check FDB default-remote handling across "ip link set". +test_set_remote() +{ + RET=0 + + adf_ip_link_add vx up type vxlan id 2000 dstport 4789 + bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent + bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent + check_remotes "fdb append" + + ip link set dev vx type vxlan remote 192.0.2.30 + check_remotes "link set" + + log_test 'FDB default-remote handling across "ip link set"' +} + +fmt_remote() +{ + local addr=$1; shift + + if [[ $addr == 224.* ]]; then + echo "group $addr" else - echo "[ OK ]" + echo "remote $addr" fi } -ip link add name vx up type vxlan id 2000 dstport 4789 -bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent -bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent -check_remotes "fdb append" +change_remote() +{ + local remote=$1; shift + + ip link set dev vx type vxlan $(fmt_remote $remote) dev v1 +} + +check_membership() +{ + local check_vec=("$@") + + local memberships + memberships=$( + netstat -n --groups | + sed -n '/^v1\b/p' | + grep -o '[^ ]*$' + ) + check_err $? "Couldn't obtain group memberships" + + local item + for item in "${check_vec[@]}"; do + eval "local $item" + echo "$memberships" | grep -q "\b$group\b" + check_err_fail $fail $? "$group is_ex reported in IGMP query response" + done +} + +test_change_mc_remote() +{ + check_command netstat || return + + adf_ip_link_add v1 up type veth peer name v2 + adf_ip_link_set_up v2 + + RET=0 + + adf_ip_link_add vx up type vxlan dstport 4789 \ + local 192.0.2.1 $(fmt_remote 224.1.1.1) dev v1 vni 1000 + + check_membership "group=224.1.1.1 fail=0" \ + "group=224.1.1.2 fail=1" \ + "group=224.1.1.3 fail=1" + + log_test "MC group report after VXLAN creation" + + RET=0 + + change_remote 224.1.1.2 + check_membership "group=224.1.1.1 fail=1" \ + "group=224.1.1.2 fail=0" \ + "group=224.1.1.3 fail=1" + + log_test "MC group report after changing VXLAN remote MC->MC" + + RET=0 + + change_remote 192.0.2.2 + check_membership "group=224.1.1.1 fail=1" \ + "group=224.1.1.2 fail=1" \ + "group=224.1.1.3 fail=1" + + log_test "MC group report after changing VXLAN remote MC->UC" +} + +trap defer_scopes_cleanup EXIT -ip link set dev vx type vxlan remote 192.0.2.30 -check_remotes "link set" +tests_run -ip link del dev vx exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/test_vxlan_nh.sh b/tools/testing/selftests/net/test_vxlan_nh.sh new file mode 100755 index 000000000000..20f3369f776b --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_nh.sh @@ -0,0 +1,223 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh +TESTS=" + basic_tx_ipv4 + basic_tx_ipv6 + learning + proxy_ipv4 + proxy_ipv6 +" +VERBOSE=0 + +################################################################################ +# Utilities + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + stderr= + fi + + out=$(eval "$cmd" "$stderr") + rc=$? + if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +################################################################################ +# Cleanup + +exit_cleanup_all() +{ + cleanup_all_ns + exit "${EXIT_STATUS}" +} + +################################################################################ +# Tests + +nh_stats_get() +{ + ip -n "$ns1" -s -j nexthop show id 10 | jq ".[][\"group_stats\"][][\"packets\"]" +} + +tc_stats_get() +{ + tc_rule_handle_stats_get "dev dummy1 egress" 101 ".packets" "-n $ns1" +} + +basic_tx_common() +{ + local af_str=$1; shift + local proto=$1; shift + local local_addr=$1; shift + local plen=$1; shift + local remote_addr=$1; shift + + RET=0 + + # Test basic Tx functionality. Check that stats are incremented on + # both the FDB nexthop group and the egress device. + + run_cmd "ip -n $ns1 link add name dummy1 up type dummy" + run_cmd "ip -n $ns1 route add $remote_addr/$plen dev dummy1" + run_cmd "tc -n $ns1 qdisc add dev dummy1 clsact" + run_cmd "tc -n $ns1 filter add dev dummy1 egress proto $proto pref 1 handle 101 flower ip_proto udp dst_ip $remote_addr dst_port 4789 action pass" + + run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789" + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10" + + run_cmd "ip netns exec $ns1 mausezahn vx0 -a own -b 00:11:22:33:44:55 -c 1 -q" + + busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" nh_stats_get > /dev/null + check_err $? "FDB nexthop group stats did not increase" + + busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" tc_stats_get > /dev/null + check_err $? "tc filter stats did not increase" + + log_test "VXLAN FDB nexthop: $af_str basic Tx" +} + +basic_tx_ipv4() +{ + basic_tx_common "IPv4" ipv4 192.0.2.1 32 192.0.2.2 +} + +basic_tx_ipv6() +{ + basic_tx_common "IPv6" ipv6 2001:db8:1::1 128 2001:db8:1::2 +} + +learning() +{ + RET=0 + + # When learning is enabled on the VXLAN device, an incoming packet + # might try to refresh an FDB entry that points to an FDB nexthop group + # instead of an ordinary remote destination. Check that the kernel does + # not crash in this situation. + + run_cmd "ip -n $ns1 address add 192.0.2.1/32 dev lo" + run_cmd "ip -n $ns1 address add 192.0.2.2/32 dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via 192.0.2.3 fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 12345 localbypass" + run_cmd "ip -n $ns1 link add name vx1 up type vxlan id 10020 local 192.0.2.2 dstport 54321 learning" + + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static dst 192.0.2.2 port 54321 vni 10020" + run_cmd "bridge -n $ns1 fdb add 00:aa:bb:cc:dd:ee dev vx1 self static nhid 10" + + run_cmd "ip netns exec $ns1 mausezahn vx0 -a 00:aa:bb:cc:dd:ee -b 00:11:22:33:44:55 -c 1 -q" + + log_test "VXLAN FDB nexthop: learning" +} + +proxy_common() +{ + local af_str=$1; shift + local local_addr=$1; shift + local plen=$1; shift + local remote_addr=$1; shift + local neigh_addr=$1; shift + local ping_cmd=$1; shift + + RET=0 + + # When the "proxy" option is enabled on the VXLAN device, the device + # will suppress ARP requests and IPv6 Neighbor Solicitation messages if + # it is able to reply on behalf of the remote host. That is, if a + # matching and valid neighbor entry is configured on the VXLAN device + # whose MAC address is not behind the "any" remote (0.0.0.0 / ::). The + # FDB entry for the neighbor's MAC address might point to an FDB + # nexthop group instead of an ordinary remote destination. Check that + # the kernel does not crash in this situation. + + run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789 proxy" + + run_cmd "ip -n $ns1 neigh add $neigh_addr lladdr 00:11:22:33:44:55 nud perm dev vx0" + + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10" + + run_cmd "ip netns exec $ns1 $ping_cmd" + + log_test "VXLAN FDB nexthop: $af_str proxy" +} + +proxy_ipv4() +{ + proxy_common "IPv4" 192.0.2.1 32 192.0.2.2 192.0.2.3 \ + "arping -b -c 1 -s 192.0.2.1 -I vx0 192.0.2.3" +} + +proxy_ipv6() +{ + proxy_common "IPv6" 2001:db8:1::1 128 2001:db8:1::2 2001:db8:1::3 \ + "ndisc6 -r 1 -s 2001:db8:1::1 -w 1 2001:db8:1::3 vx0" +} + +################################################################################ +# Usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +while getopts ":t:pvh" opt; do + case $opt in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=$((VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +require_command mausezahn +require_command arping +require_command ndisc6 +require_command jq + +if ! ip nexthop help 2>&1 | grep -q "stats"; then + echo "SKIP: iproute2 ip too old, missing nexthop stats support" + exit "$ksft_skip" +fi + +trap exit_cleanup_all EXIT + +for t in $TESTS +do + setup_ns ns1; $t; cleanup_all_ns; +done diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh index 6127a78ee988..8deacc565afa 100755 --- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh +++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh @@ -146,18 +146,17 @@ run_cmd() } check_hv_connectivity() { - ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null - sleep 1 - ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null + slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null + slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null return $? } check_vm_connectivity() { - run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12" + slowwait 5 run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12" log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)" - run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22" + slowwait 5 run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22" log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)" } diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c new file mode 100644 index 000000000000..3b1ee2d3d417 --- /dev/null +++ b/tools/testing/selftests/net/tfo.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <error.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/socket.h> +#include <netinet/tcp.h> +#include <errno.h> + +static int cfg_server; +static int cfg_client; +static int cfg_port = 8000; +static struct sockaddr_in6 cfg_addr; +static char *cfg_outfile; + +static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) +{ + int ret; + + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + + ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); + if (ret != 1) { + /* fallback to plain IPv4 */ + ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); + if (ret != 1) + return -1; + + /* add ::ffff prefix */ + sin6->sin6_addr.s6_addr32[0] = 0; + sin6->sin6_addr.s6_addr32[1] = 0; + sin6->sin6_addr.s6_addr16[4] = 0; + sin6->sin6_addr.s6_addr16[5] = 0xffff; + } + + return 0; +} + +static void run_server(void) +{ + unsigned long qlen = 32; + int fd, opt, connfd; + socklen_t len; + char buf[64]; + FILE *outfile; + + outfile = fopen(cfg_outfile, "w"); + if (!outfile) + error(1, errno, "fopen() outfile"); + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, errno, "socket()"); + + opt = 1; + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) + error(1, errno, "setsockopt(SO_REUSEADDR)"); + + if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) < 0) + error(1, errno, "setsockopt(TCP_FASTOPEN)"); + + if (bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)) < 0) + error(1, errno, "bind()"); + + if (listen(fd, 5) < 0) + error(1, errno, "listen()"); + + len = sizeof(cfg_addr); + connfd = accept(fd, (struct sockaddr *)&cfg_addr, &len); + if (connfd < 0) + error(1, errno, "accept()"); + + len = sizeof(opt); + if (getsockopt(connfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &opt, &len) < 0) + error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)"); + + if (read(connfd, buf, 64) < 0) + error(1, errno, "read()"); + + if (fprintf(outfile, "%d\n", opt) < 0) + error(1, errno, "fprintf()"); + + fclose(outfile); + close(connfd); + close(fd); +} + +static void run_client(void) +{ + int fd, ret; + char *msg = "Hello, world!"; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, errno, "socket()"); + + ret = sendto(fd, msg, strlen(msg), MSG_FASTOPEN, + (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)); + if (ret < 0) + error(1, errno, "sendto()"); + + close(fd); +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s (-s|-c) -h<server_ip> -p<port> -o<outfile> ", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + struct sockaddr_in6 *addr6 = (void *) &cfg_addr; + char *addr = NULL; + int ret; + int c; + + if (argc <= 1) + usage(argv[0]); + + while ((c = getopt(argc, argv, "sch:p:o:")) != -1) { + switch (c) { + case 's': + if (cfg_client) + error(1, 0, "Pass one of -s or -c"); + cfg_server = 1; + break; + case 'c': + if (cfg_server) + error(1, 0, "Pass one of -s or -c"); + cfg_client = 1; + break; + case 'h': + addr = optarg; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 'o': + cfg_outfile = strdup(optarg); + if (!cfg_outfile) + error(1, 0, "outfile invalid"); + break; + } + } + + if (cfg_server && addr) + error(1, 0, "Server cannot have -h specified"); + + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + addr6->sin6_addr = in6addr_any; + if (addr) { + ret = parse_address(addr, cfg_port, addr6); + if (ret) + error(1, 0, "Client address parse error: %s", addr); + } +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + + if (cfg_server) + run_server(); + else if (cfg_client) + run_client(); + + return 0; +} diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh new file mode 100755 index 000000000000..f116f888b794 --- /dev/null +++ b/tools/testing/selftests/net/tfo_passive.sh @@ -0,0 +1,123 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source lib.sh + +NSIM_SV_ID=$((256 + RANDOM % 256)) +NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID +NSIM_CL_ID=$((512 + RANDOM % 256)) +NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device +NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device +NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device + +SERVER_IP=192.168.1.1 +CLIENT_IP=192.168.1.2 +SERVER_PORT=48675 + +setup_ns() +{ + set -e + ip netns add nssv + ip netns add nscl + + NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_SV_SYS/net -exec basename {} \;) + NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_CL_SYS/net -exec basename {} \;) + + ip link set $NSIM_SV_NAME netns nssv + ip link set $NSIM_CL_NAME netns nscl + + ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME + ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME + + ip netns exec nssv ip link set dev $NSIM_SV_NAME up + ip netns exec nscl ip link set dev $NSIM_CL_NAME up + + # Enable passive TFO + ip netns exec nssv sysctl -w net.ipv4.tcp_fastopen=519 > /dev/null + + set +e +} + +cleanup_ns() +{ + ip netns del nscl + ip netns del nssv +} + +### +### Code start +### + +modprobe netdevsim + +# linking + +echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW +echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW +udevadm settle + +setup_ns + +NSIM_SV_FD=$((256 + RANDOM % 256)) +exec {NSIM_SV_FD}</var/run/netns/nssv +NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex) + +NSIM_CL_FD=$((256 + RANDOM % 256)) +exec {NSIM_CL_FD}</var/run/netns/nscl +NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex) + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \ + $NSIM_DEV_SYS_LINK + +if [ $? -ne 0 ]; then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup_ns + exit 1 +fi + +out_file=$(mktemp) + +timeout -k 1s 30s ip netns exec nssv ./tfo \ + -s \ + -p ${SERVER_PORT} \ + -o ${out_file}& +server_pid="$!" + +wait_local_port_listen nssv ${SERVER_PORT} tcp + +ip netns exec nscl ./tfo -c -h ${SERVER_IP} -p ${SERVER_PORT} +client_exit_status="$?" + +wait "$server_pid" +server_exit_status="$?" + +res=$(cat $out_file) +rm $out_file + +if [ "$res" = "0" ]; then + echo "got invalid NAPI ID from passive TFO socket" + cleanup_ns + exit 1 +fi + +if [ "$client_exit_status" -ne 0 ] || [ "$server_exit_status" -ne 0 ]; then + # Note: timeout(1) exits with 124 if it timed out + echo "client exited with ${client_exit_status}" + echo "server exited with ${server_exit_status}" + cleanup_ns + exit 1 +fi + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK + +echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL + +cleanup_ns + +modprobe -r netdevsim + +exit 0 diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 9a85f93c33d8..9e2ccea13d70 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -21,7 +21,7 @@ #include <sys/socket.h> #include <sys/stat.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" #define TLS_PAYLOAD_MAX_LEN 16384 #define SOL_TLS 282 @@ -181,13 +181,12 @@ static int tls_send_cmsg(int fd, unsigned char record_type, return sendmsg(fd, &msg, flags); } -static int tls_recv_cmsg(struct __test_metadata *_metadata, - int fd, unsigned char record_type, - void *data, size_t len, int flags) +static int __tls_recv_cmsg(struct __test_metadata *_metadata, + int fd, unsigned char *ctype, + void *data, size_t len, int flags) { char cbuf[CMSG_SPACE(sizeof(char))]; struct cmsghdr *cmsg; - unsigned char ctype; struct msghdr msg; struct iovec vec; int n; @@ -206,7 +205,20 @@ static int tls_recv_cmsg(struct __test_metadata *_metadata, EXPECT_NE(cmsg, NULL); EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); - ctype = *((unsigned char *)CMSG_DATA(cmsg)); + if (ctype) + *ctype = *((unsigned char *)CMSG_DATA(cmsg)); + + return n; +} + +static int tls_recv_cmsg(struct __test_metadata *_metadata, + int fd, unsigned char record_type, + void *data, size_t len, int flags) +{ + unsigned char ctype; + int n; + + n = __tls_recv_cmsg(_metadata, fd, &ctype, data, len, flags); EXPECT_EQ(ctype, record_type); return n; @@ -427,6 +439,8 @@ TEST_F(tls, sendfile) EXPECT_GE(filefd, 0); fstat(filefd, &st); EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0); + + close(filefd); } TEST_F(tls, send_then_sendfile) @@ -448,6 +462,9 @@ TEST_F(tls, send_then_sendfile) EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0); EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size); + + free(buf); + close(filefd); } static void chunked_sendfile(struct __test_metadata *_metadata, @@ -547,6 +564,40 @@ TEST_F(tls, msg_more) EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } +TEST_F(tls, cmsg_msg_more) +{ + char *test_str = "test_read"; + char record_type = 100; + int send_len = 10; + + /* we don't allow MSG_MORE with non-DATA records */ + EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len, + MSG_MORE), -1); + EXPECT_EQ(errno, EINVAL); +} + +TEST_F(tls, msg_more_then_cmsg) +{ + char *test_str = "test_read"; + char record_type = 100; + int send_len = 10; + char buf[10 * 2]; + int ret; + + EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1); + + ret = tls_send_cmsg(self->fd, record_type, test_str, send_len, 0); + EXPECT_EQ(ret, send_len); + + /* initial DATA record didn't get merged with the non-DATA record */ + EXPECT_EQ(recv(self->cfd, buf, send_len * 2, 0), send_len); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL), + send_len); +} + TEST_F(tls, msg_more_unsent) { char const *test_str = "test_read"; @@ -895,6 +946,37 @@ TEST_F(tls, peek_and_splice) EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } +#define MAX_FRAGS 48 +TEST_F(tls, splice_short) +{ + struct iovec sendchar_iov; + char read_buf[0x10000]; + char sendbuf[0x100]; + char sendchar = 'S'; + int pipefds[2]; + int i; + + sendchar_iov.iov_base = &sendchar; + sendchar_iov.iov_len = 1; + + memset(sendbuf, 's', sizeof(sendbuf)); + + ASSERT_GE(pipe2(pipefds, O_NONBLOCK), 0); + ASSERT_GE(fcntl(pipefds[0], F_SETPIPE_SZ, (MAX_FRAGS + 1) * 0x1000), 0); + + for (i = 0; i < MAX_FRAGS; i++) + ASSERT_GE(vmsplice(pipefds[1], &sendchar_iov, 1, 0), 0); + + ASSERT_EQ(write(pipefds[1], sendbuf, sizeof(sendbuf)), sizeof(sendbuf)); + + EXPECT_EQ(splice(pipefds[0], NULL, self->fd, NULL, MAX_FRAGS + 0x1000, 0), + MAX_FRAGS + sizeof(sendbuf)); + EXPECT_EQ(recv(self->cfd, read_buf, sizeof(read_buf), 0), MAX_FRAGS + sizeof(sendbuf)); + EXPECT_EQ(recv(self->cfd, read_buf, sizeof(read_buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EAGAIN); +} +#undef MAX_FRAGS + TEST_F(tls, recvmsg_single) { char const *test_str = "test_recvmsg_single"; @@ -1753,6 +1835,42 @@ TEST_F(tls_basic, rekey_tx) EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } +TEST_F(tls_basic, disconnect) +{ + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + struct tls_crypto_info_keys key; + struct sockaddr_in addr; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &key, 0); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &key, key.len); + ASSERT_EQ(ret, 0); + + /* Pre-queue the data so that setsockopt parses it but doesn't + * dequeue it from the TCP socket. recvmsg would dequeue. + */ + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &key, key.len); + ASSERT_EQ(ret, 0); + + addr.sin_family = AF_UNSPEC; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + ret = connect(self->cfd, &addr, sizeof(addr)); + EXPECT_EQ(ret, -1); + EXPECT_EQ(errno, EOPNOTSUPP); + + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); +} + TEST_F(tls, rekey) { char const *test_str_1 = "test_message_before_rekey"; @@ -2128,6 +2246,284 @@ TEST_F(tls, rekey_poll_delay) } } +struct raw_rec { + unsigned int plain_len; + unsigned char plain_data[100]; + unsigned int cipher_len; + unsigned char cipher_data[128]; +}; + +/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: 'Hello world' */ +static const struct raw_rec id0_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0xa2, 0x33, + 0xde, 0x8d, 0x94, 0xf0, 0x29, 0x6c, 0xb1, 0xaf, + 0x6a, 0x75, 0xb2, 0x93, 0xad, 0x45, 0xd5, 0xfd, + 0x03, 0x51, 0x57, 0x8f, 0xf9, 0xcc, 0x3b, 0x42, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:0, plaintext: '' */ +static const struct raw_rec id0_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x38, 0x7b, + 0xa6, 0x1c, 0xdd, 0xa7, 0x19, 0x33, 0xab, 0xae, + 0x88, 0xe1, 0xd2, 0x08, 0x4f, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: '' */ +static const struct raw_rec id0_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc5, 0x37, 0x90, + 0x70, 0x45, 0x89, 0xfb, 0x5c, 0xc7, 0x89, 0x03, + 0x68, 0x80, 0xd3, 0xd8, 0xcc, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: 'Hello world' */ +static const struct raw_rec id1_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x3a, 0x1a, 0x9c, + 0xd0, 0xa8, 0x9a, 0xd6, 0x69, 0xd6, 0x1a, 0xe3, + 0xb5, 0x1f, 0x0d, 0x2c, 0xe2, 0x97, 0x46, 0xff, + 0x2b, 0xcc, 0x5a, 0xc4, 0xa3, 0xb9, 0xef, 0xba, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:1, plaintext: '' */ +static const struct raw_rec id1_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x3e, 0xf0, 0xfe, + 0xee, 0xd9, 0xe2, 0x5d, 0xc7, 0x11, 0x4c, 0xe6, + 0xb4, 0x7e, 0xef, 0x40, 0x2b, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: '' */ +static const struct raw_rec id1_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0xce, 0xfc, 0x86, + 0xc8, 0xf0, 0x55, 0xf9, 0x47, 0x3f, 0x74, 0xdc, + 0xc9, 0xbf, 0xfe, 0x5b, 0xb1, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: 'Hello world' */ +static const struct raw_rec id2_ctrl_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19, + 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87, + 0x2a, 0x04, 0x11, 0x3d, 0xf8, 0x64, 0x5f, 0x36, + 0x8b, 0xa8, 0xee, 0x4c, 0x6d, 0x62, 0xa5, 0x00, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: 'Hello world' */ +static const struct raw_rec id2_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19, + 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87, + 0x8e, 0xa1, 0xd0, 0xcd, 0x33, 0xb5, 0x86, 0x2b, + 0x17, 0xf1, 0x52, 0x2a, 0x55, 0x62, 0x65, 0x11, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: '' */ +static const struct raw_rec id2_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xdc, 0x5c, 0x0e, + 0x41, 0xdd, 0xba, 0xd3, 0xcc, 0xcf, 0x6d, 0xd9, + 0x06, 0xdb, 0x79, 0xe5, 0x5d, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: '' */ +static const struct raw_rec id2_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xc3, 0xca, 0x26, + 0x22, 0xe4, 0x25, 0xfb, 0x5f, 0x6d, 0xbf, 0x83, + 0x30, 0x48, 0x69, 0x1a, 0x47, + }, +}; + +FIXTURE(zero_len) +{ + int fd, cfd; + bool notls; +}; + +FIXTURE_VARIANT(zero_len) +{ + const struct raw_rec *recs[4]; + ssize_t recv_ret[4]; +}; + +FIXTURE_VARIANT_ADD(zero_len, data_data_data) +{ + .recs = { &id0_data_l11, &id1_data_l11, &id2_data_l11, }, + .recv_ret = { 33, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, data_0ctrl_data) +{ + .recs = { &id0_data_l11, &id1_ctrl_l0, &id2_data_l11, }, + .recv_ret = { 11, 0, 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0data) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l0, }, + .recv_ret = { -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_ctrl) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l11, }, + .recv_ret = { 0, 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0ctrl) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l0, }, + .recv_ret = { 0, 0, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0ctrl_0ctrl_0ctrl) +{ + .recs = { &id0_ctrl_l0, &id1_ctrl_l0, &id2_ctrl_l0, }, + .recv_ret = { 0, 0, 0, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_data) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l11, }, + .recv_ret = { 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, data_0data_0data) +{ + .recs = { &id0_data_l11, &id1_data_l0, &id2_data_l0, }, + .recv_ret = { 11, -EAGAIN, }, +}; + +FIXTURE_SETUP(zero_len) +{ + struct tls_crypto_info_keys tls12; + int ret; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128, + &tls12, 0); + + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); + if (self->notls) + return; + + /* Don't install keys on fd, we'll send raw records */ + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(zero_len) +{ + close(self->fd); + close(self->cfd); +} + +TEST_F(zero_len, test) +{ + const struct raw_rec *const *rec; + unsigned char buf[128]; + int rec_off; + int i; + + for (i = 0; i < 4 && variant->recs[i]; i++) + EXPECT_EQ(send(self->fd, variant->recs[i]->cipher_data, + variant->recs[i]->cipher_len, 0), + variant->recs[i]->cipher_len); + + rec = &variant->recs[0]; + rec_off = 0; + for (i = 0; i < 4; i++) { + int j, ret; + + ret = variant->recv_ret[i] >= 0 ? variant->recv_ret[i] : -1; + EXPECT_EQ(__tls_recv_cmsg(_metadata, self->cfd, NULL, + buf, sizeof(buf), MSG_DONTWAIT), ret); + if (ret == -1) + EXPECT_EQ(errno, -variant->recv_ret[i]); + if (variant->recv_ret[i] == -EAGAIN) + break; + + for (j = 0; j < ret; j++) { + while (rec_off == (*rec)->plain_len) { + rec++; + rec_off = 0; + } + EXPECT_EQ(buf[j], (*rec)->plain_data[rec_off]); + rec_off++; + } + } +}; + FIXTURE(tls_err) { int fd, cfd; @@ -2390,10 +2786,10 @@ TEST_F(tls_err, epoll_partial_rec) TEST_F(tls_err, poll_partial_rec_async) { struct pollfd pfd = { }; + char token = '\0'; ssize_t rec_len; char rec[256]; char buf[128]; - char token; int p[2]; int ret; @@ -2444,6 +2840,163 @@ TEST_F(tls_err, poll_partial_rec_async) } } +/* Use OOB+large send to trigger copy mode due to memory pressure. + * OOB causes a short read. + */ +TEST_F(tls_err, oob_pressure) +{ + char buf[1<<16]; + int i; + + memrnd(buf, sizeof(buf)); + + EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5); + EXPECT_EQ(send(self->fd2, buf, sizeof(buf), 0), sizeof(buf)); + for (i = 0; i < 64; i++) + EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5); +} + +/* + * Parse a stream of TLS records and ensure that each record respects + * the specified @max_payload_len. + */ +static size_t parse_tls_records(struct __test_metadata *_metadata, + const __u8 *rx_buf, int rx_len, int overhead, + __u16 max_payload_len) +{ + const __u8 *rec = rx_buf; + size_t total_plaintext_rx = 0; + const __u8 rec_header_len = 5; + + while (rec < rx_buf + rx_len) { + __u16 record_payload_len; + __u16 plaintext_len; + + /* Sanity check that it's a TLS header for application data */ + ASSERT_EQ(rec[0], 23); + ASSERT_EQ(rec[1], 0x3); + ASSERT_EQ(rec[2], 0x3); + + memcpy(&record_payload_len, rec + 3, 2); + record_payload_len = ntohs(record_payload_len); + ASSERT_GE(record_payload_len, overhead); + + plaintext_len = record_payload_len - overhead; + total_plaintext_rx += plaintext_len; + + /* Plaintext must not exceed the specified limit */ + ASSERT_LE(plaintext_len, max_payload_len); + rec += rec_header_len + record_payload_len; + } + + return total_plaintext_rx; +} + +TEST(tls_12_tx_max_payload_len) +{ + struct tls_crypto_info_keys tls12; + int cfd, ret, fd, overhead; + size_t total_plaintext_rx = 0; + __u8 tx[1024], rx[2000]; + __u16 limit = 128; + __u16 opt = 0; + unsigned int optlen = sizeof(opt); + bool notls; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128, + &tls12, 0); + + ulp_sock_pair(_metadata, &fd, &cfd, ¬ls); + + if (notls) + exit(KSFT_SKIP); + + /* Don't install keys on fd, we'll parse raw records */ + ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit, + sizeof(limit)); + ASSERT_EQ(ret, 0); + + ret = getsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &opt, &optlen); + EXPECT_EQ(ret, 0); + EXPECT_EQ(limit, opt); + EXPECT_EQ(optlen, sizeof(limit)); + + memset(tx, 0, sizeof(tx)); + ASSERT_EQ(send(cfd, tx, sizeof(tx), 0), sizeof(tx)); + close(cfd); + + ret = recv(fd, rx, sizeof(rx), 0); + + /* + * 16B tag + 8B IV -- record header (5B) is not counted but we'll + * need it to walk the record stream + */ + overhead = 16 + 8; + total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead, + limit); + + ASSERT_EQ(total_plaintext_rx, sizeof(tx)); + close(fd); +} + +TEST(tls_12_tx_max_payload_len_open_rec) +{ + struct tls_crypto_info_keys tls12; + int cfd, ret, fd, overhead; + size_t total_plaintext_rx = 0; + __u8 tx[1024], rx[2000]; + __u16 tx_partial = 256; + __u16 og_limit = 512, limit = 128; + bool notls; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128, + &tls12, 0); + + ulp_sock_pair(_metadata, &fd, &cfd, ¬ls); + + if (notls) + exit(KSFT_SKIP); + + /* Don't install keys on fd, we'll parse raw records */ + ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &og_limit, + sizeof(og_limit)); + ASSERT_EQ(ret, 0); + + memset(tx, 0, sizeof(tx)); + ASSERT_EQ(send(cfd, tx, tx_partial, MSG_MORE), tx_partial); + + /* + * Changing the payload limit with a pending open record should + * not be allowed. + */ + ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit, + sizeof(limit)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EBUSY); + + ASSERT_EQ(send(cfd, tx + tx_partial, sizeof(tx) - tx_partial, MSG_EOR), + sizeof(tx) - tx_partial); + close(cfd); + + ret = recv(fd, rx, sizeof(rx), 0); + + /* + * 16B tag + 8B IV -- record header (5B) is not counted but we'll + * need it to walk the record stream + */ + overhead = 16 + 8; + total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead, + og_limit); + ASSERT_EQ(total_plaintext_rx, sizeof(tx)); + close(fd); +} + TEST(non_established) { struct tls12_crypto_info_aes_gcm_256 tls12; struct sockaddr_in addr; @@ -2672,6 +3225,75 @@ TEST(prequeue) { close(cfd); } +TEST(data_steal) { + struct tls_crypto_info_keys tls; + char buf[20000], buf2[20000]; + struct sockaddr_in addr; + int sfd, cfd, ret, fd; + int pid, status; + socklen_t len; + + len = sizeof(addr); + memrnd(buf, sizeof(buf)); + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls, 0); + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + + fd = socket(AF_INET, SOCK_STREAM, 0); + sfd = socket(AF_INET, SOCK_STREAM, 0); + + ASSERT_EQ(bind(sfd, &addr, sizeof(addr)), 0); + ASSERT_EQ(listen(sfd, 10), 0); + ASSERT_EQ(getsockname(sfd, &addr, &len), 0); + ASSERT_EQ(connect(fd, &addr, sizeof(addr)), 0); + ASSERT_GE(cfd = accept(sfd, &addr, &len), 0); + close(sfd); + + ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + if (ret) { + ASSERT_EQ(errno, ENOENT); + SKIP(return, "no TLS support"); + } + ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0); + + /* Spawn a child and get it into the read wait path of the underlying + * TCP socket (before kernel .recvmsg is replaced with the TLS one). + */ + pid = fork(); + ASSERT_GE(pid, 0); + if (!pid) { + EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2 + 1, MSG_WAITALL), + sizeof(buf) / 2 + 1); + exit(!__test_passed(_metadata)); + } + + /* Send a sync byte and poll until it's consumed to ensure + * the child is in recv() before we proceed to install TLS. + */ + ASSERT_EQ(send(fd, buf, 1, 0), 1); + do { + usleep(500); + } while (recv(cfd, buf, 1, MSG_PEEK | MSG_DONTWAIT) == 1); + EXPECT_EQ(errno, EAGAIN); + + ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0); + ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0); + + EXPECT_EQ(send(fd, buf, sizeof(buf), 0), sizeof(buf)); + EXPECT_EQ(wait(&status), pid); + EXPECT_EQ(status, 0); + EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT), -1); + /* Don't check errno, the error will be different depending + * on what random bytes TLS interpreted as the record length. + */ + + close(fd); + close(cfd); +} + static void __attribute__((constructor)) fips_check(void) { int res; FILE *f; diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c deleted file mode 100644 index 9ba03164d73a..000000000000 --- a/tools/testing/selftests/net/toeplitz.c +++ /dev/null @@ -1,589 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Toeplitz test - * - * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3 - * 2. Compute the rx_hash in software based on the packet contents - * 3. Compare the two - * - * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given. - * - * If '-C $rx_irq_cpu_list' is given, also - * - * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU - * 5. Compute the rxqueue that RSS would select based on this rx_hash - * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq - * 7. Compare the cpus from 4 and 6 - * - * Else if '-r $rps_bitmap' is given, also - * - * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU - * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap - * 6. Compare the cpus from 4 and 5 - */ - -#define _GNU_SOURCE - -#include <arpa/inet.h> -#include <errno.h> -#include <error.h> -#include <fcntl.h> -#include <getopt.h> -#include <linux/filter.h> -#include <linux/if_ether.h> -#include <linux/if_packet.h> -#include <net/if.h> -#include <netdb.h> -#include <netinet/ip.h> -#include <netinet/ip6.h> -#include <netinet/tcp.h> -#include <netinet/udp.h> -#include <poll.h> -#include <stdbool.h> -#include <stddef.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/mman.h> -#include <sys/socket.h> -#include <sys/stat.h> -#include <sys/sysinfo.h> -#include <sys/time.h> -#include <sys/types.h> -#include <unistd.h> - -#include "../kselftest.h" - -#define TOEPLITZ_KEY_MIN_LEN 40 -#define TOEPLITZ_KEY_MAX_LEN 60 - -#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */ -#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN) -#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN) - -#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2)) - -#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */ - -#define RPS_MAX_CPUS 16UL /* must be a power of 2 */ - -/* configuration options (cmdline arguments) */ -static uint16_t cfg_dport = 8000; -static int cfg_family = AF_INET6; -static char *cfg_ifname = "eth0"; -static int cfg_num_queues; -static int cfg_num_rps_cpus; -static bool cfg_sink; -static int cfg_type = SOCK_STREAM; -static int cfg_timeout_msec = 1000; -static bool cfg_verbose; - -/* global vars */ -static int num_cpus; -static int ring_block_nr; -static int ring_block_sz; - -/* stats */ -static int frames_received; -static int frames_nohash; -static int frames_error; - -#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0) - -/* tpacket ring */ -struct ring_state { - int fd; - char *mmap; - int idx; - int cpu; -}; - -static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */ -static int rps_silo_to_cpu[RPS_MAX_CPUS]; -static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN]; -static struct ring_state rings[RSS_MAX_CPUS]; - -static inline uint32_t toeplitz(const unsigned char *four_tuple, - const unsigned char *key) -{ - int i, bit, ret = 0; - uint32_t key32; - - key32 = ntohl(*((uint32_t *)key)); - key += 4; - - for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) { - for (bit = 7; bit >= 0; bit--) { - if (four_tuple[i] & (1 << bit)) - ret ^= key32; - - key32 <<= 1; - key32 |= !!(key[0] & (1 << bit)); - } - key++; - } - - return ret; -} - -/* Compare computed cpu with arrival cpu from packet_fanout_cpu */ -static void verify_rss(uint32_t rx_hash, int cpu) -{ - int queue = rx_hash % cfg_num_queues; - - log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]); - if (rx_irq_cpus[queue] != cpu) { - log_verbose(". error: rss cpu mismatch (%d)", cpu); - frames_error++; - } -} - -static void verify_rps(uint64_t rx_hash, int cpu) -{ - int silo = (rx_hash * cfg_num_rps_cpus) >> 32; - - log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]); - if (rps_silo_to_cpu[silo] != cpu) { - log_verbose(". error: rps cpu mismatch (%d)", cpu); - frames_error++; - } -} - -static void log_rxhash(int cpu, uint32_t rx_hash, - const char *addrs, int addr_len) -{ - char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN]; - uint16_t *ports; - - if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) || - !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr))) - error(1, 0, "address parse error"); - - ports = (void *)addrs + (addr_len * 2); - log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]", - cpu, rx_hash, saddr, daddr, - ntohs(ports[0]), ntohs(ports[1])); -} - -/* Compare computed rxhash with rxhash received from tpacket_v3 */ -static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu) -{ - unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0}; - uint32_t rx_hash_sw; - const char *addrs; - int addr_len; - - if (cfg_family == AF_INET) { - addr_len = sizeof(struct in_addr); - addrs = pkt + offsetof(struct iphdr, saddr); - } else { - addr_len = sizeof(struct in6_addr); - addrs = pkt + offsetof(struct ip6_hdr, ip6_src); - } - - memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2)); - rx_hash_sw = toeplitz(four_tuple, toeplitz_key); - - if (cfg_verbose) - log_rxhash(cpu, rx_hash, addrs, addr_len); - - if (rx_hash != rx_hash_sw) { - log_verbose(" != expected 0x%x\n", rx_hash_sw); - frames_error++; - return; - } - - log_verbose(" OK"); - if (cfg_num_queues) - verify_rss(rx_hash, cpu); - else if (cfg_num_rps_cpus) - verify_rps(rx_hash, cpu); - log_verbose("\n"); -} - -static char *recv_frame(const struct ring_state *ring, char *frame) -{ - struct tpacket3_hdr *hdr = (void *)frame; - - if (hdr->hv1.tp_rxhash) - verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash, - ring->cpu); - else - frames_nohash++; - - return frame + hdr->tp_next_offset; -} - -/* A single TPACKET_V3 block can hold multiple frames */ -static bool recv_block(struct ring_state *ring) -{ - struct tpacket_block_desc *block; - char *frame; - int i; - - block = (void *)(ring->mmap + ring->idx * ring_block_sz); - if (!(block->hdr.bh1.block_status & TP_STATUS_USER)) - return false; - - frame = (char *)block; - frame += block->hdr.bh1.offset_to_first_pkt; - - for (i = 0; i < block->hdr.bh1.num_pkts; i++) { - frame = recv_frame(ring, frame); - frames_received++; - } - - block->hdr.bh1.block_status = TP_STATUS_KERNEL; - ring->idx = (ring->idx + 1) % ring_block_nr; - - return true; -} - -/* simple test: sleep once unconditionally and then process all rings */ -static void process_rings(void) -{ - int i; - - usleep(1000 * cfg_timeout_msec); - - for (i = 0; i < num_cpus; i++) - do {} while (recv_block(&rings[i])); - - fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n", - frames_received - frames_nohash - frames_error, - frames_nohash, frames_error); -} - -static char *setup_ring(int fd) -{ - struct tpacket_req3 req3 = {0}; - void *ring; - - req3.tp_retire_blk_tov = cfg_timeout_msec / 8; - req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH; - - req3.tp_frame_size = 2048; - req3.tp_frame_nr = 1 << 10; - req3.tp_block_nr = 16; - - req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr; - req3.tp_block_size /= req3.tp_block_nr; - - if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3))) - error(1, errno, "setsockopt PACKET_RX_RING"); - - ring_block_sz = req3.tp_block_size; - ring_block_nr = req3.tp_block_nr; - - ring = mmap(0, req3.tp_block_size * req3.tp_block_nr, - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0); - if (ring == MAP_FAILED) - error(1, 0, "mmap failed"); - - return ring; -} - -static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport) -{ - struct sock_filter filter[] = { - BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), - BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2), - BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0), - BPF_STMT(BPF_RET + BPF_K, 0), - BPF_STMT(BPF_RET + BPF_K, 0xFFFF), - }; - struct sock_fprog prog = {}; - - prog.filter = filter; - prog.len = ARRAY_SIZE(filter); - if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) - error(1, errno, "setsockopt filter"); -} - -/* filter on transport protocol and destination port */ -static void set_filter(int fd) -{ - const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */ - uint8_t proto; - - proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP; - if (cfg_family == AF_INET) - __set_filter(fd, offsetof(struct iphdr, protocol), proto, - sizeof(struct iphdr) + off_dport); - else - __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto, - sizeof(struct ip6_hdr) + off_dport); -} - -/* drop everything: used temporarily during setup */ -static void set_filter_null(int fd) -{ - struct sock_filter filter[] = { - BPF_STMT(BPF_RET + BPF_K, 0), - }; - struct sock_fprog prog = {}; - - prog.filter = filter; - prog.len = ARRAY_SIZE(filter); - if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) - error(1, errno, "setsockopt filter"); -} - -static int create_ring(char **ring) -{ - struct fanout_args args = { - .id = 1, - .type_flags = PACKET_FANOUT_CPU, - .max_num_members = RSS_MAX_CPUS - }; - struct sockaddr_ll ll = { 0 }; - int fd, val; - - fd = socket(PF_PACKET, SOCK_DGRAM, 0); - if (fd == -1) - error(1, errno, "socket creation failed"); - - val = TPACKET_V3; - if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val))) - error(1, errno, "setsockopt PACKET_VERSION"); - *ring = setup_ring(fd); - - /* block packets until all rings are added to the fanout group: - * else packets can arrive during setup and get misclassified - */ - set_filter_null(fd); - - ll.sll_family = AF_PACKET; - ll.sll_ifindex = if_nametoindex(cfg_ifname); - ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) : - htons(ETH_P_IPV6); - if (bind(fd, (void *)&ll, sizeof(ll))) - error(1, errno, "bind"); - - /* must come after bind: verifies all programs in group match */ - if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) { - /* on failure, retry using old API if that is sufficient: - * it has a hard limit of 256 sockets, so only try if - * (a) only testing rxhash, not RSS or (b) <= 256 cpus. - * in this API, the third argument is left implicit. - */ - if (cfg_num_queues || num_cpus > 256 || - setsockopt(fd, SOL_PACKET, PACKET_FANOUT, - &args, sizeof(uint32_t))) - error(1, errno, "setsockopt PACKET_FANOUT cpu"); - } - - return fd; -} - -/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */ -static int setup_sink(void) -{ - int fd, val; - - fd = socket(cfg_family, cfg_type, 0); - if (fd == -1) - error(1, errno, "socket %d.%d", cfg_family, cfg_type); - - val = 1 << 20; - if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val))) - error(1, errno, "setsockopt rcvbuf"); - - return fd; -} - -static void setup_rings(void) -{ - int i; - - for (i = 0; i < num_cpus; i++) { - rings[i].cpu = i; - rings[i].fd = create_ring(&rings[i].mmap); - } - - /* accept packets once all rings in the fanout group are up */ - for (i = 0; i < num_cpus; i++) - set_filter(rings[i].fd); -} - -static void cleanup_rings(void) -{ - int i; - - for (i = 0; i < num_cpus; i++) { - if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz)) - error(1, errno, "munmap"); - if (close(rings[i].fd)) - error(1, errno, "close"); - } -} - -static void parse_cpulist(const char *arg) -{ - do { - rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10); - - arg = strchr(arg, ','); - if (!arg) - break; - arg++; // skip ',' - } while (1); -} - -static void show_cpulist(void) -{ - int i; - - for (i = 0; i < cfg_num_queues; i++) - fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]); -} - -static void show_silos(void) -{ - int i; - - for (i = 0; i < cfg_num_rps_cpus; i++) - fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]); -} - -static void parse_toeplitz_key(const char *str, int slen, unsigned char *key) -{ - int i, ret, off; - - if (slen < TOEPLITZ_STR_MIN_LEN || - slen > TOEPLITZ_STR_MAX_LEN + 1) - error(1, 0, "invalid toeplitz key"); - - for (i = 0, off = 0; off < slen; i++, off += 3) { - ret = sscanf(str + off, "%hhx", &key[i]); - if (ret != 1) - error(1, 0, "key parse error at %d off %d len %d", - i, off, slen); - } -} - -static void parse_rps_bitmap(const char *arg) -{ - unsigned long bitmap; - int i; - - bitmap = strtoul(arg, NULL, 0); - - if (bitmap & ~(RPS_MAX_CPUS - 1)) - error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu", - bitmap, RPS_MAX_CPUS - 1); - - for (i = 0; i < RPS_MAX_CPUS; i++) - if (bitmap & 1UL << i) - rps_silo_to_cpu[cfg_num_rps_cpus++] = i; -} - -static void parse_opts(int argc, char **argv) -{ - static struct option long_options[] = { - {"dport", required_argument, 0, 'd'}, - {"cpus", required_argument, 0, 'C'}, - {"key", required_argument, 0, 'k'}, - {"iface", required_argument, 0, 'i'}, - {"ipv4", no_argument, 0, '4'}, - {"ipv6", no_argument, 0, '6'}, - {"sink", no_argument, 0, 's'}, - {"tcp", no_argument, 0, 't'}, - {"timeout", required_argument, 0, 'T'}, - {"udp", no_argument, 0, 'u'}, - {"verbose", no_argument, 0, 'v'}, - {"rps", required_argument, 0, 'r'}, - {0, 0, 0, 0} - }; - bool have_toeplitz = false; - int index, c; - - while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) { - switch (c) { - case '4': - cfg_family = AF_INET; - break; - case '6': - cfg_family = AF_INET6; - break; - case 'C': - parse_cpulist(optarg); - break; - case 'd': - cfg_dport = strtol(optarg, NULL, 0); - break; - case 'i': - cfg_ifname = optarg; - break; - case 'k': - parse_toeplitz_key(optarg, strlen(optarg), - toeplitz_key); - have_toeplitz = true; - break; - case 'r': - parse_rps_bitmap(optarg); - break; - case 's': - cfg_sink = true; - break; - case 't': - cfg_type = SOCK_STREAM; - break; - case 'T': - cfg_timeout_msec = strtol(optarg, NULL, 0); - break; - case 'u': - cfg_type = SOCK_DGRAM; - break; - case 'v': - cfg_verbose = true; - break; - - default: - error(1, 0, "unknown option %c", optopt); - break; - } - } - - if (!have_toeplitz) - error(1, 0, "Must supply rss key ('-k')"); - - num_cpus = get_nprocs(); - if (num_cpus > RSS_MAX_CPUS) - error(1, 0, "increase RSS_MAX_CPUS"); - - if (cfg_num_queues && cfg_num_rps_cpus) - error(1, 0, - "Can't supply both RSS cpus ('-C') and RPS map ('-r')"); - if (cfg_verbose) { - show_cpulist(); - show_silos(); - } -} - -int main(int argc, char **argv) -{ - const int min_tests = 10; - int fd_sink = -1; - - parse_opts(argc, argv); - - if (cfg_sink) - fd_sink = setup_sink(); - - setup_rings(); - process_rings(); - cleanup_rings(); - - if (cfg_sink && close(fd_sink)) - error(1, errno, "close sink"); - - if (frames_received - frames_nohash < min_tests) - error(1, 0, "too few frames for verification"); - - return frames_error; -} diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh deleted file mode 100755 index 8ff172f7bb1b..000000000000 --- a/tools/testing/selftests/net/toeplitz.sh +++ /dev/null @@ -1,199 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping -# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu -# ('-rps <rps_map>') -# -# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action, -# which is a driver-specific encoding. -# -# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \ -# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)] - -source setup_loopback.sh -readonly SERVER_IP4="192.168.1.200/24" -readonly SERVER_IP6="fda8::1/64" -readonly SERVER_MAC="aa:00:00:00:00:02" - -readonly CLIENT_IP4="192.168.1.100/24" -readonly CLIENT_IP6="fda8::2/64" -readonly CLIENT_MAC="aa:00:00:00:00:01" - -PORT=8000 -KEY="$(</proc/sys/net/core/netdev_rss_key)" -TEST_RSS=false -RPS_MAP="" -PROTO_FLAG="" -IP_FLAG="" -DEV="eth0" - -# Return the number of rxqs among which RSS is configured to spread packets. -# This is determined by reading the RSS indirection table using ethtool. -get_rss_cfg_num_rxqs() { - echo $(ethtool -x "${DEV}" | - grep -E [[:space:]]+[0-9]+:[[:space:]]+ | - cut -d: -f2- | - awk '{$1=$1};1' | - tr ' ' '\n' | - sort -u | - wc -l) -} - -# Return a list of the receive irq handler cpus. -# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc. -# Reads /sys/kernel/irq/ in order, so algorithm depends on -# irq_{rxq-0} < irq_{rxq-1}, etc. -get_rx_irq_cpus() { - CPUS="" - # sort so that irq 2 is read before irq 10 - SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V) - # Consider only as many queues as RSS actually uses. We assume that - # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1). - RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs) - RXQ_COUNT=0 - - for i in ${SORTED_IRQS} - do - [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break - # lookup relevant IRQs by action name - [[ -e "$i/actions" ]] || continue - cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue - irqname=$(<"$i/actions") - - # does the IRQ get called - irqcount=$(cat "$i/per_cpu_count" | tr -d '0,') - [[ -n "${irqcount}" ]] || continue - - # lookup CPU - irq=$(basename "$i") - cpu=$(cat "/proc/irq/$irq/smp_affinity_list") - - if [[ -z "${CPUS}" ]]; then - CPUS="${cpu}" - else - CPUS="${CPUS},${cpu}" - fi - RXQ_COUNT=$((RXQ_COUNT+1)) - done - - echo "${CPUS}" -} - -get_disable_rfs_cmd() { - echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;" -} - -get_set_rps_bitmaps_cmd() { - CMD="" - for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus - do - CMD="${CMD} echo $1 > ${i};" - done - - echo "${CMD}" -} - -get_disable_rps_cmd() { - echo "$(get_set_rps_bitmaps_cmd 0)" -} - -die() { - echo "$1" - exit 1 -} - -check_nic_rxhash_enabled() { - local -r pattern="receive-hashing:\ on" - - ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled" -} - -parse_opts() { - local prog=$0 - shift 1 - - while [[ "$1" =~ "-" ]]; do - if [[ "$1" = "-irq_prefix" ]]; then - shift - IRQ_PATTERN="^$1-[0-9]*$" - elif [[ "$1" = "-u" || "$1" = "-t" ]]; then - PROTO_FLAG="$1" - elif [[ "$1" = "-4" ]]; then - IP_FLAG="$1" - SERVER_IP="${SERVER_IP4}" - CLIENT_IP="${CLIENT_IP4}" - elif [[ "$1" = "-6" ]]; then - IP_FLAG="$1" - SERVER_IP="${SERVER_IP6}" - CLIENT_IP="${CLIENT_IP6}" - elif [[ "$1" = "-rss" ]]; then - TEST_RSS=true - elif [[ "$1" = "-rps" ]]; then - shift - RPS_MAP="$1" - elif [[ "$1" = "-i" ]]; then - shift - DEV="$1" - else - die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \ - [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]" - fi - shift - done -} - -setup() { - setup_loopback_environment "${DEV}" - - # Set up server_ns namespace and client_ns namespace - setup_macvlan_ns "${DEV}" $server_ns server \ - "${SERVER_MAC}" "${SERVER_IP}" - setup_macvlan_ns "${DEV}" $client_ns client \ - "${CLIENT_MAC}" "${CLIENT_IP}" -} - -cleanup() { - cleanup_macvlan_ns $server_ns server $client_ns client - cleanup_loopback "${DEV}" -} - -parse_opts $0 $@ - -setup -trap cleanup EXIT - -check_nic_rxhash_enabled - -# Actual test starts here -if [[ "${TEST_RSS}" = true ]]; then - # RPS/RFS must be disabled because they move packets between cpus, - # which breaks the PACKET_FANOUT_CPU identification of RSS decisions. - eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \ - ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ - -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ - -C "$(get_rx_irq_cpus)" -s -v & -elif [[ ! -z "${RPS_MAP}" ]]; then - eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \ - ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ - -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ - -r "0x${RPS_MAP}" -s -v & -else - ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ - -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v & -fi - -server_pid=$! - -ip netns exec $client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \ - "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" & - -client_pid=$! - -wait "${server_pid}" -exit_code=$? -kill -9 "${client_pid}" -if [[ "${exit_code}" -eq 0 ]]; then - echo "Test Succeeded!" -fi -exit "${exit_code}" diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh deleted file mode 100755 index 2fef34f4aba1..000000000000 --- a/tools/testing/selftests/net/toeplitz_client.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# A simple program for generating traffic for the toeplitz test. -# -# This program sends packets periodically for, conservatively, 20 seconds. The -# intent is for the calling program to kill this program once it is no longer -# needed, rather than waiting for the 20 second expiration. - -send_traffic() { - expiration=$((SECONDS+20)) - while [[ "${SECONDS}" -lt "${expiration}" ]] - do - if [[ "${PROTO}" == "-u" ]]; then - echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}" - else - echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}" - fi - sleep 0.001 - done -} - -PROTO=$1 -IPVER=$2 -ADDR=$3 -PORT=$4 - -send_traffic diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh index 282f14760940..a7c6ab8a0347 100755 --- a/tools/testing/selftests/net/traceroute.sh +++ b/tools/testing/selftests/net/traceroute.sh @@ -10,28 +10,6 @@ PAUSE_ON_FAIL=no ################################################################################ # -log_test() -{ - local rc=$1 - local expected=$2 - local msg="$3" - - if [ ${rc} -eq ${expected} ]; then - printf "TEST: %-60s [ OK ]\n" "${msg}" - nsuccess=$((nsuccess+1)) - else - ret=1 - nfail=$((nfail+1)) - printf "TEST: %-60s [FAIL]\n" "${msg}" - if [ "${PAUSE_ON_FAIL}" = "yes" ]; then - echo - echo "hit enter to continue, 'q' to quit" - read a - [ "$a" = "q" ] && exit 1 - fi - fi -} - run_cmd() { local ns @@ -58,6 +36,35 @@ run_cmd() return $rc } +__check_traceroute_version() +{ + local cmd=$1; shift + local req_ver=$1; shift + local ver + + req_ver=$(echo "$req_ver" | sed 's/\.//g') + ver=$($cmd -V 2>&1 | grep -Eo '[0-9]+.[0-9]+.[0-9]+' | sed 's/\.//g') + if [[ $ver -lt $req_ver ]]; then + return 1 + else + return 0 + fi +} + +check_traceroute6_version() +{ + local req_ver=$1; shift + + __check_traceroute_version traceroute6 "$req_ver" +} + +check_traceroute_version() +{ + local req_ver=$1; shift + + __check_traceroute_version traceroute "$req_ver" +} + ################################################################################ # create namespaces and interconnects @@ -81,6 +88,8 @@ create_ns() ip netns exec ${ns} ip -6 ro add unreachable default metric 8192 ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ${ns} sysctl -qw net.ipv4.icmp_ratelimit=0 + ip netns exec ${ns} sysctl -qw net.ipv6.icmp.ratelimit=0 ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1 @@ -203,34 +212,275 @@ setup_traceroute6() run_traceroute6() { - if [ ! -x "$(command -v traceroute6)" ]; then - echo "SKIP: Could not run IPV6 test without traceroute6" - return - fi - setup_traceroute6 + RET=0 + # traceroute6 host-2 from host-1 (expects 2000:102::2) run_cmd $h1 "traceroute6 2000:103::4 | grep -q 2000:102::2" - log_test $? 0 "IPV6 traceroute" + check_err $? "traceroute6 did not return 2000:102::2" + log_test "IPv6 traceroute" cleanup_traceroute6 } ################################################################################ +# traceroute6 with VRF test +# +# Verify that in this scenario +# +# ------------------------ N2 +# | | +# ------ ------ N3 ---- +# | R1 | | R2 |------|H2| +# ------ ------ ---- +# | | +# ------------------------ N1 +# | +# ---- +# |H1| +# ---- +# +# Where H1's default route goes through R1 and R1's default route goes through +# R2 over N2, traceroute6 from H1 to H2 reports R2's address on N2 and not N1. +# The interfaces connecting R2 to the different subnets are membmer in a VRF +# and the intention is to check that traceroute6 does not report the VRF's +# address. +# +# Addresses are assigned as follows: +# +# N1: 2000:101::/64 +# N2: 2000:102::/64 +# N3: 2000:103::/64 +# +# R1's host part of address: 1 +# R2's host part of address: 2 +# H1's host part of address: 3 +# H2's host part of address: 4 +# +# For example: +# the IPv6 address of R1's interface on N2 is 2000:102::1/64 + +cleanup_traceroute6_vrf() +{ + cleanup_all_ns +} + +setup_traceroute6_vrf() +{ + # Start clean + cleanup_traceroute6_vrf + + setup_ns h1 h2 r1 r2 + create_ns "$h1" + create_ns "$h2" + create_ns "$r1" + create_ns "$r2" + + ip -n "$r2" link add name vrf100 up type vrf table 100 + ip -n "$r2" addr add 2001:db8:100::1/64 dev vrf100 + + # Setup N3 + connect_ns "$r2" eth3 - 2000:103::2/64 "$h2" eth3 - 2000:103::4/64 + + ip -n "$r2" link set dev eth3 master vrf100 + + ip -n "$h2" route add default via 2000:103::2 + + # Setup N2 + connect_ns "$r1" eth2 - 2000:102::1/64 "$r2" eth2 - 2000:102::2/64 + + ip -n "$r1" route add default via 2000:102::2 + + ip -n "$r2" link set dev eth2 master vrf100 + + # Setup N1. host-1 and router-2 connect to a bridge in router-1. + ip -n "$r1" link add name br100 up type bridge + ip -n "$r1" addr add 2000:101::1/64 dev br100 + + connect_ns "$h1" eth0 - 2000:101::3/64 "$r1" eth0 - - + + ip -n "$h1" route add default via 2000:101::1 + + ip -n "$r1" link set dev eth0 master br100 + + connect_ns "$r2" eth1 - 2000:101::2/64 "$r1" eth1 - - + + ip -n "$r2" link set dev eth1 master vrf100 + + ip -n "$r1" link set dev eth1 master br100 + + # Prime the network + ip netns exec "$h1" ping6 -c5 2000:103::4 >/dev/null 2>&1 +} + +run_traceroute6_vrf() +{ + setup_traceroute6_vrf + + RET=0 + + # traceroute6 host-2 from host-1 (expects 2000:102::2) + run_cmd "$h1" "traceroute6 2000:103::4 | grep 2000:102::2" + check_err $? "traceroute6 did not return 2000:102::2" + log_test "IPv6 traceroute with VRF" + + cleanup_traceroute6_vrf +} + +################################################################################ +# traceroute6 with ICMP extensions test +# +# Verify that in this scenario +# +# ---- ---- ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- +# +# ICMP extensions are correctly reported. The loopback interfaces on all the +# nodes are assigned global addresses and the interfaces connecting the nodes +# are assigned IPv6 link-local addresses. + +cleanup_traceroute6_ext() +{ + cleanup_all_ns +} + +setup_traceroute6_ext() +{ + # Start clean + cleanup_traceroute6_ext + + setup_ns h1 r1 h2 + create_ns "$h1" + create_ns "$r1" + create_ns "$h2" + + # Setup N1 + connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64 + # Setup N2 + connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64 + + # Setup H1 + ip -n "$h1" address add 2001:db8:1::1/128 dev lo + ip -n "$h1" route add ::/0 nexthop via fe80::2 dev eth1 + + # Setup R1 + ip -n "$r1" address add 2001:db8:1::2/128 dev lo + ip -n "$r1" route add 2001:db8:1::1/128 nexthop via fe80::1 dev eth1 + ip -n "$r1" route add 2001:db8:1::3/128 nexthop via fe80::4 dev eth2 + + # Setup H2 + ip -n "$h2" address add 2001:db8:1::3/128 dev lo + ip -n "$h2" route add ::/0 nexthop via fe80::3 dev eth2 + + # Prime the network + ip netns exec "$h1" ping6 -c5 2001:db8:1::3 >/dev/null 2>&1 +} + +traceroute6_ext_iio_iif_test() +{ + local r1_ifindex h2_ifindex + local pkt_len=$1; shift + + # Test that incoming interface info is not appended by default. + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended by default when should not" + + # Test that the extension is appended when enabled. + run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv6/icmp/errors_extension_mask\"" + check_err $? "Failed to enable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC" + check_err $? "Incoming interface info not appended after enable" + + # Test that the extension is not appended when disabled. + run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv6/icmp/errors_extension_mask\"" + check_err $? "Failed to disable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended after disable" + + # Test that the extension is sent correctly from both R1 and H2. + run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01" + r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1" + + run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01" + h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from H2" + + # Add a global address on the incoming interface of R1 and check that + # it is reported. + run_cmd "$r1" "ip address add 2001:db8:100::1/64 dev eth1 nodad" + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,2001:db8:100::1,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1 after address addition" + run_cmd "$r1" "ip address del 2001:db8:100::1/64 dev eth1" + + # Change name and MTU and make sure the result is still correct. + run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501" + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'" + check_err $? "Wrong incoming interface info reported from R1 after name and MTU change" + run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500" + + run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00" + run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00" +} + +run_traceroute6_ext() +{ + # Need at least version 2.1.5 for RFC 5837 support. + if ! check_traceroute6_version 2.1.5; then + log_test_skip "traceroute6 too old, missing ICMP extensions support" + return + fi + + setup_traceroute6_ext + + RET=0 + + ## General ICMP extensions tests + + # Test that ICMP extensions are disabled by default. + run_cmd "$h1" "sysctl net.ipv6.icmp.errors_extension_mask | grep \"= 0$\"" + check_err $? "ICMP extensions are not disabled by default" + + # Test that unsupported values are rejected. Do not use "sysctl" as + # older versions do not return an error code upon failure. + run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv6/icmp/errors_extension_mask\"" + check_fail $? "Unsupported sysctl value was not rejected" + + ## Extension-specific tests + + # Incoming interface info test. Test with various packet sizes, + # including the default one. + traceroute6_ext_iio_iif_test + traceroute6_ext_iio_iif_test 127 + traceroute6_ext_iio_iif_test 128 + traceroute6_ext_iio_iif_test 129 + + log_test "IPv6 traceroute with ICMP extensions" + + cleanup_traceroute6_ext +} + +################################################################################ # traceroute test # -# Verify that traceroute from H1 to H2 shows 1.0.1.1 in this scenario +# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when +# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively. # -# 1.0.3.1/24 +# 1.0.3.3/24 1.0.3.1/24 # ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ---- # |H1|--------------------------|R1|--------------------------|H2| # ---- N1 ---- N2 ---- # -# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and -# 1.0.3.1/24 and 1.0.1.1/24 are respectively R1's primary and secondary -# address on N1. -# +# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and 1.0.3.1/24 and +# 1.0.1.1/24 are R1's primary addresses on N1. The kernel is expected to prefer +# a source address that is on the same subnet as the destination IP of the ICMP +# error message. cleanup_traceroute() { @@ -250,6 +500,7 @@ setup_traceroute() connect_ns $h1 eth0 1.0.1.3/24 - \ $router eth1 1.0.3.1/24 - + ip -n "$h1" addr add 1.0.3.3/24 dev eth0 ip netns exec $h1 ip route add default via 1.0.1.1 ip netns exec $router ip addr add 1.0.1.1/24 dev eth1 @@ -268,18 +519,232 @@ setup_traceroute() run_traceroute() { - if [ ! -x "$(command -v traceroute)" ]; then - echo "SKIP: Could not run IPV4 test without traceroute" + setup_traceroute + + RET=0 + + # traceroute host-2 from host-1. Expect a source IP that is on the same + # subnet as destination IP of the ICMP error message. + run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep -q 1.0.1.1" + check_err $? "traceroute did not return 1.0.1.1" + run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep -q 1.0.3.1" + check_err $? "traceroute did not return 1.0.3.1" + log_test "IPv4 traceroute" + + cleanup_traceroute +} + +################################################################################ +# traceroute with VRF test +# +# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when +# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively. The +# intention is to check that the kernel does not choose an IP assigned to the +# VRF device, but rather an address from the VRF port (eth1) that received the +# packet that generates the ICMP error message. +# +# 1.0.4.1/24 (vrf100) +# 1.0.3.3/24 1.0.3.1/24 +# ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- + +cleanup_traceroute_vrf() +{ + cleanup_all_ns +} + +setup_traceroute_vrf() +{ + # Start clean + cleanup_traceroute_vrf + + setup_ns h1 h2 router + create_ns "$h1" + create_ns "$h2" + create_ns "$router" + + ip -n "$router" link add name vrf100 up type vrf table 100 + ip -n "$router" addr add 1.0.4.1/24 dev vrf100 + + connect_ns "$h1" eth0 1.0.1.3/24 - \ + "$router" eth1 1.0.1.1/24 - + + ip -n "$h1" addr add 1.0.3.3/24 dev eth0 + ip -n "$h1" route add default via 1.0.1.1 + + ip -n "$router" link set dev eth1 master vrf100 + ip -n "$router" addr add 1.0.3.1/24 dev eth1 + ip netns exec "$router" sysctl -qw \ + net.ipv4.icmp_errors_use_inbound_ifaddr=1 + + connect_ns "$h2" eth0 1.0.2.4/24 - \ + "$router" eth2 1.0.2.1/24 - + + ip -n "$h2" route add default via 1.0.2.1 + + ip -n "$router" link set dev eth2 master vrf100 + + # Prime the network + ip netns exec "$h1" ping -c5 1.0.2.4 >/dev/null 2>&1 +} + +run_traceroute_vrf() +{ + setup_traceroute_vrf + + RET=0 + + # traceroute host-2 from host-1. Expect a source IP that is on the same + # subnet as destination IP of the ICMP error message. + run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep 1.0.1.1" + check_err $? "traceroute did not return 1.0.1.1" + run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep 1.0.3.1" + check_err $? "traceroute did not return 1.0.3.1" + log_test "IPv4 traceroute with VRF" + + cleanup_traceroute_vrf +} + +################################################################################ +# traceroute with ICMP extensions test +# +# Verify that in this scenario +# +# ---- ---- ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- +# +# ICMP extensions are correctly reported. The loopback interfaces on all the +# nodes are assigned global addresses and the interfaces connecting the nodes +# are assigned IPv6 link-local addresses. + +cleanup_traceroute_ext() +{ + cleanup_all_ns +} + +setup_traceroute_ext() +{ + # Start clean + cleanup_traceroute_ext + + setup_ns h1 r1 h2 + create_ns "$h1" + create_ns "$r1" + create_ns "$h2" + + # Setup N1 + connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64 + # Setup N2 + connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64 + + # Setup H1 + ip -n "$h1" address add 192.0.2.1/32 dev lo + ip -n "$h1" route add 0.0.0.0/0 nexthop via inet6 fe80::2 dev eth1 + + # Setup R1 + ip -n "$r1" address add 192.0.2.2/32 dev lo + ip -n "$r1" route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1 + ip -n "$r1" route add 192.0.2.3/32 nexthop via inet6 fe80::4 dev eth2 + + # Setup H2 + ip -n "$h2" address add 192.0.2.3/32 dev lo + ip -n "$h2" route add 0.0.0.0/0 nexthop via inet6 fe80::3 dev eth2 + + # Prime the network + ip netns exec "$h1" ping -c5 192.0.2.3 >/dev/null 2>&1 +} + +traceroute_ext_iio_iif_test() +{ + local r1_ifindex h2_ifindex + local pkt_len=$1; shift + + # Test that incoming interface info is not appended by default. + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended by default when should not" + + # Test that the extension is appended when enabled. + run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv4/icmp_errors_extension_mask\"" + check_err $? "Failed to enable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC" + check_err $? "Incoming interface info not appended after enable" + + # Test that the extension is not appended when disabled. + run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv4/icmp_errors_extension_mask\"" + check_err $? "Failed to disable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended after disable" + + # Test that the extension is sent correctly from both R1 and H2. + run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01" + r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1" + + run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01" + h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from H2" + + # Add a global address on the incoming interface of R1 and check that + # it is reported. + run_cmd "$r1" "ip address add 198.51.100.1/24 dev eth1" + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,198.51.100.1,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1 after address addition" + run_cmd "$r1" "ip address del 198.51.100.1/24 dev eth1" + + # Change name and MTU and make sure the result is still correct. + # Re-add the route towards H1 since it was deleted when we removed the + # last IPv4 address from eth1 on R1. + run_cmd "$r1" "ip route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1" + run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501" + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'" + check_err $? "Wrong incoming interface info reported from R1 after name and MTU change" + run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500" + + run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00" + run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00" +} + +run_traceroute_ext() +{ + # Need at least version 2.1.5 for RFC 5837 support. + if ! check_traceroute_version 2.1.5; then + log_test_skip "traceroute too old, missing ICMP extensions support" return fi - setup_traceroute + setup_traceroute_ext - # traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while. - run_cmd $h1 "traceroute 1.0.2.4 | grep -q 1.0.1.1" - log_test $? 0 "IPV4 traceroute" + RET=0 - cleanup_traceroute + ## General ICMP extensions tests + + # Test that ICMP extensions are disabled by default. + run_cmd "$h1" "sysctl net.ipv4.icmp_errors_extension_mask | grep \"= 0$\"" + check_err $? "ICMP extensions are not disabled by default" + + # Test that unsupported values are rejected. Do not use "sysctl" as + # older versions do not return an error code upon failure. + run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv4/icmp_errors_extension_mask\"" + check_fail $? "Unsupported sysctl value was not rejected" + + ## Extension-specific tests + + # Incoming interface info test. Test with various packet sizes, + # including the default one. + traceroute_ext_iio_iif_test + traceroute_ext_iio_iif_test 127 + traceroute_ext_iio_iif_test 128 + traceroute_ext_iio_iif_test 129 + + log_test "IPv4 traceroute with ICMP extensions" + + cleanup_traceroute_ext } ################################################################################ @@ -288,15 +753,16 @@ run_traceroute() run_tests() { run_traceroute6 + run_traceroute6_vrf + run_traceroute6_ext run_traceroute + run_traceroute_vrf + run_traceroute_ext } ################################################################################ # main -declare -i nfail=0 -declare -i nsuccess=0 - while getopts :pv o do case $o in @@ -306,7 +772,10 @@ do esac done +require_command traceroute6 +require_command traceroute +require_command jq + run_tests -printf "\nTests passed: %3d\n" ${nsuccess} -printf "Tests failed: %3d\n" ${nfail} +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c index fa83918b62d1..cf106a49b55e 100644 --- a/tools/testing/selftests/net/tun.c +++ b/tools/testing/selftests/net/tun.c @@ -8,14 +8,119 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> -#include <linux/if.h> #include <linux/if_tun.h> -#include <linux/netlink.h> -#include <linux/rtnetlink.h> #include <sys/ioctl.h> #include <sys/socket.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" +#include "tuntap_helpers.h" + +static const char param_dev_geneve_name[] = "geneve1"; +static unsigned char param_hwaddr_outer_dst[] = { 0x00, 0xfe, 0x98, + 0x14, 0x22, 0x42 }; +static unsigned char param_hwaddr_outer_src[] = { 0x00, 0xfe, 0x98, + 0x94, 0xd2, 0x43 }; +static unsigned char param_hwaddr_inner_dst[] = { 0x00, 0xfe, 0x98, + 0x94, 0x22, 0xcc }; +static unsigned char param_hwaddr_inner_src[] = { 0x00, 0xfe, 0x98, + 0x94, 0xd2, 0xdd }; + +static struct in_addr param_ipaddr4_outer_dst = { + __constant_htonl(0xac100001), +}; + +static struct in_addr param_ipaddr4_outer_src = { + __constant_htonl(0xac100002), +}; + +static struct in_addr param_ipaddr4_inner_dst = { + __constant_htonl(0xac100101), +}; + +static struct in_addr param_ipaddr4_inner_src = { + __constant_htonl(0xac100102), +}; + +static struct in6_addr param_ipaddr6_outer_dst = { + { { 0x20, 0x02, 0x0d, 0xb8, 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } }, +}; + +static struct in6_addr param_ipaddr6_outer_src = { + { { 0x20, 0x02, 0x0d, 0xb8, 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } }, +}; + +static struct in6_addr param_ipaddr6_inner_dst = { + { { 0x20, 0x02, 0x0d, 0xb8, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } }, +}; + +static struct in6_addr param_ipaddr6_inner_src = { + { { 0x20, 0x02, 0x0d, 0xb8, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } }, +}; + +#ifndef BIT +#define BIT(nr) (1UL << (nr)) +#endif + +#define VN_ID 1 +#define VN_PORT 4789 +#define UDP_SRC_PORT 22 +#define UDP_DST_PORT 48878 +#define IPPREFIX_LEN 24 +#define IP6PREFIX_LEN 64 +#define TIMEOUT_SEC 10 +#define TIMEOUT_USEC 100000 +#define MAX_RETRIES 20 + +#define UDP_TUNNEL_GENEVE_4IN4 0x01 +#define UDP_TUNNEL_GENEVE_6IN4 0x02 +#define UDP_TUNNEL_GENEVE_4IN6 0x04 +#define UDP_TUNNEL_GENEVE_6IN6 0x08 + +#define UDP_TUNNEL_MAX_SEGMENTS BIT(7) + +#define UDP_TUNNEL_OUTER_IPV4 (UDP_TUNNEL_GENEVE_4IN4 | UDP_TUNNEL_GENEVE_6IN4) +#define UDP_TUNNEL_INNER_IPV4 (UDP_TUNNEL_GENEVE_4IN4 | UDP_TUNNEL_GENEVE_4IN6) + +#define UDP_TUNNEL_GENEVE_4IN4_HDRLEN \ + (ETH_HLEN + 2 * sizeof(struct iphdr) + GENEVE_HLEN + \ + 2 * sizeof(struct udphdr)) +#define UDP_TUNNEL_GENEVE_6IN6_HDRLEN \ + (ETH_HLEN + 2 * sizeof(struct ipv6hdr) + GENEVE_HLEN + \ + 2 * sizeof(struct udphdr)) +#define UDP_TUNNEL_GENEVE_4IN6_HDRLEN \ + (ETH_HLEN + sizeof(struct iphdr) + sizeof(struct ipv6hdr) + \ + GENEVE_HLEN + 2 * sizeof(struct udphdr)) +#define UDP_TUNNEL_GENEVE_6IN4_HDRLEN \ + (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct iphdr) + \ + GENEVE_HLEN + 2 * sizeof(struct udphdr)) + +#define UDP_TUNNEL_HDRLEN(type) \ + ((type) == UDP_TUNNEL_GENEVE_4IN4 ? UDP_TUNNEL_GENEVE_4IN4_HDRLEN : \ + (type) == UDP_TUNNEL_GENEVE_6IN6 ? UDP_TUNNEL_GENEVE_6IN6_HDRLEN : \ + (type) == UDP_TUNNEL_GENEVE_4IN6 ? UDP_TUNNEL_GENEVE_4IN6_HDRLEN : \ + (type) == UDP_TUNNEL_GENEVE_6IN4 ? UDP_TUNNEL_GENEVE_6IN4_HDRLEN : \ + 0) + +#define UDP_TUNNEL_MSS(type) (ETH_DATA_LEN - UDP_TUNNEL_HDRLEN(type)) +#define UDP_TUNNEL_MAX(type, is_tap) \ + (ETH_MAX_MTU - UDP_TUNNEL_HDRLEN(type) - ((is_tap) ? ETH_HLEN : 0)) + +#define TUN_VNET_TNL_SIZE sizeof(struct virtio_net_hdr_v1_hash_tunnel) +#define MAX_VNET_TUNNEL_PACKET_SZ \ + (TUN_VNET_TNL_SIZE + ETH_HLEN + UDP_TUNNEL_GENEVE_6IN6_HDRLEN + \ + ETH_MAX_MTU) + +struct geneve_setup_config { + int family; + union { + struct in_addr r4; + struct in6_addr r6; + } remote; + __be32 vnid; + __be16 vnport; + unsigned char hwaddr[6]; + uint8_t csum; +}; static int tun_attach(int fd, char *dev) { @@ -25,7 +130,7 @@ static int tun_attach(int fd, char *dev) strcpy(ifr.ifr_name, dev); ifr.ifr_flags = IFF_ATTACH_QUEUE; - return ioctl(fd, TUNSETQUEUE, (void *) &ifr); + return ioctl(fd, TUNSETQUEUE, (void *)&ifr); } static int tun_detach(int fd, char *dev) @@ -36,7 +141,7 @@ static int tun_detach(int fd, char *dev) strcpy(ifr.ifr_name, dev); ifr.ifr_flags = IFF_DETACH_QUEUE; - return ioctl(fd, TUNSETQUEUE, (void *) &ifr); + return ioctl(fd, TUNSETQUEUE, (void *)&ifr); } static int tun_alloc(char *dev) @@ -54,7 +159,7 @@ static int tun_alloc(char *dev) strcpy(ifr.ifr_name, dev); ifr.ifr_flags = IFF_TAP | IFF_NAPI | IFF_MULTI_QUEUE; - err = ioctl(fd, TUNSETIFF, (void *) &ifr); + err = ioctl(fd, TUNSETIFF, (void *)&ifr); if (err < 0) { fprintf(stderr, "can't TUNSETIFF: %s\n", strerror(errno)); close(fd); @@ -66,42 +171,315 @@ static int tun_alloc(char *dev) static int tun_delete(char *dev) { - struct { - struct nlmsghdr nh; - struct ifinfomsg ifm; - unsigned char data[64]; - } req; - struct rtattr *rta; - int ret, rtnl; + return ip_link_del(dev); +} + +static int tun_open(char *dev, const int flags, const int hdrlen, + const int features, const unsigned char *mac_addr) +{ + struct ifreq ifr = { 0 }; + int fd, sk = -1; + + fd = open("/dev/net/tun", O_RDWR); + if (fd < 0) { + perror("open"); + return -1; + } + + ifr.ifr_flags = flags; + if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) { + perror("ioctl(TUNSETIFF)"); + goto err; + } + strcpy(dev, ifr.ifr_name); + + if (hdrlen > 0) { + if (ioctl(fd, TUNSETVNETHDRSZ, &hdrlen) < 0) { + perror("ioctl(TUNSETVNETHDRSZ)"); + goto err; + } + } + + if (features) { + if (ioctl(fd, TUNSETOFFLOAD, features) < 0) { + perror("ioctl(TUNSETOFFLOAD)"); + goto err; + } + } + + sk = socket(PF_INET, SOCK_DGRAM, 0); + if (sk < 0) { + perror("socket"); + goto err; + } + + if (ioctl(sk, SIOCGIFFLAGS, &ifr) < 0) { + perror("ioctl(SIOCGIFFLAGS)"); + goto err; + } + + ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); + if (ioctl(sk, SIOCSIFFLAGS, &ifr) < 0) { + perror("ioctl(SIOCSIFFLAGS)"); + goto err; + } + + if (mac_addr && flags & IFF_TAP) { + ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER; + memcpy(ifr.ifr_hwaddr.sa_data, mac_addr, ETH_ALEN); + + if (ioctl(sk, SIOCSIFHWADDR, &ifr) < 0) { + perror("ioctl(SIOCSIFHWADDR)"); + goto err; + } + } + +out: + if (sk >= 0) + close(sk); + return fd; + +err: + close(fd); + fd = -1; + goto out; +} + +static size_t sockaddr_len(int family) +{ + return (family == AF_INET) ? sizeof(struct sockaddr_in) : + sizeof(struct sockaddr_in6); +} + +static int geneve_fill_newlink(struct rt_link_newlink_req *req, void *data) +{ + struct geneve_setup_config *cfg = data; + +#define SET_GENEVE_REMOTE rt_link_newlink_req_set_linkinfo_data_geneve_remote +#define SET_GENEVE_REMOTE6 rt_link_newlink_req_set_linkinfo_data_geneve_remote6 + + rt_link_newlink_req_set_address(req, cfg->hwaddr, ETH_ALEN); + rt_link_newlink_req_set_linkinfo_data_geneve_id(req, cfg->vnid); + rt_link_newlink_req_set_linkinfo_data_geneve_port(req, cfg->vnport); + rt_link_newlink_req_set_linkinfo_data_geneve_udp_csum(req, cfg->csum); + + if (cfg->family == AF_INET) + SET_GENEVE_REMOTE(req, cfg->remote.r4.s_addr); + else + SET_GENEVE_REMOTE6(req, &cfg->remote.r6, + sizeof(cfg->remote.r6)); + + return 0; +} + +static int geneve_create(const char *dev, int family, void *remote, + void *hwaddr) +{ + struct geneve_setup_config geneve; + + memset(&geneve, 0, sizeof(geneve)); + geneve.vnid = VN_ID; + geneve.vnport = htons(VN_PORT); + geneve.csum = 1; + geneve.family = family; + if (family == AF_INET) + memcpy(&geneve.remote.r4, remote, sizeof(struct in_addr)); + else + memcpy(&geneve.remote.r6, remote, sizeof(struct in6_addr)); + memcpy(geneve.hwaddr, hwaddr, ETH_ALEN); + + return ip_link_add(dev, "geneve", geneve_fill_newlink, (void *)&geneve); +} + +static int set_pmtu_discover(int fd, bool is_ipv4) +{ + int level, name, val; + + if (is_ipv4) { + level = SOL_IP; + name = IP_MTU_DISCOVER; + val = IP_PMTUDISC_DO; + } else { + level = SOL_IPV6; + name = IPV6_MTU_DISCOVER; + val = IPV6_PMTUDISC_DO; + } + + return setsockopt(fd, level, name, &val, sizeof(val)); +} + +static int udp_socket_open(struct sockaddr_storage *ssa, bool do_frag, + bool do_connect, struct sockaddr_storage *dsa) +{ + struct timeval to = { .tv_sec = TIMEOUT_SEC }; + int fd, family = ssa->ss_family; + int salen = sockaddr_len(family); + + fd = socket(family, SOCK_DGRAM, 0); + if (fd < 0) + return -1; + + if (bind(fd, (struct sockaddr *)ssa, salen) < 0) { + perror("bind"); + goto err; + } + + if (do_connect && connect(fd, (struct sockaddr *)dsa, salen) < 0) { + perror("connect"); + goto err; + } + + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &to, sizeof(to)) < 0) { + perror("setsockopt(SO_RCVTIMEO)"); + goto err; + } + + if (!do_frag && set_pmtu_discover(fd, family == AF_INET) < 0) { + perror("set_pmtu_discover"); + goto err; + } + return fd; + +err: + close(fd); + return -1; +} + +static void parse_route_rsp(struct rt_route_getroute_rsp *rsp, void *rtm_type) +{ + *(uint8_t *)rtm_type = rsp->_hdr.rtm_type; +} + +static int ip_route_check(const char *intf, int family, void *addr) +{ + uint8_t rtm_type, table = RT_TABLE_LOCAL; + int retries = MAX_RETRIES; - rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE); - if (rtnl < 0) { - fprintf(stderr, "can't open rtnl: %s\n", strerror(errno)); - return 1; + while (retries-- > 0) { + if (ip_route_get(intf, family, table, addr, parse_route_rsp, + &rtm_type) == 0 && + rtm_type == RTN_LOCAL) + break; + + usleep(TIMEOUT_USEC); } - memset(&req, 0, sizeof(req)); - req.nh.nlmsg_len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(req.ifm))); - req.nh.nlmsg_flags = NLM_F_REQUEST; - req.nh.nlmsg_type = RTM_DELLINK; + if (retries < 0) + return -1; + + return 0; +} + +static int send_gso_udp_msg(int socket, struct sockaddr_storage *addr, + uint8_t *send_buf, int send_len, int gso_size) +{ + char control[CMSG_SPACE(sizeof(uint16_t))] = { 0 }; + int alen = sockaddr_len(addr->ss_family); + struct msghdr msg = { 0 }; + struct iovec iov = { 0 }; + int ret; + + iov.iov_base = send_buf; + iov.iov_len = send_len; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_name = addr; + msg.msg_namelen = alen; - req.ifm.ifi_family = AF_UNSPEC; + if (gso_size > 0) { + struct cmsghdr *cmsg; - rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len)); - rta->rta_type = IFLA_IFNAME; - rta->rta_len = RTA_LENGTH(IFNAMSIZ); - req.nh.nlmsg_len += rta->rta_len; - memcpy(RTA_DATA(rta), dev, IFNAMSIZ); + msg.msg_control = control; + msg.msg_controllen = sizeof(control); - ret = send(rtnl, &req, req.nh.nlmsg_len, 0); + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_UDP; + cmsg->cmsg_type = UDP_SEGMENT; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint16_t)); + *(uint16_t *)CMSG_DATA(cmsg) = gso_size; + } + + ret = sendmsg(socket, &msg, 0); if (ret < 0) - fprintf(stderr, "can't send: %s\n", strerror(errno)); - ret = (unsigned int)ret != req.nh.nlmsg_len; + perror("sendmsg"); - close(rtnl); return ret; } +static int validate_hdrlen(uint8_t **cur, int *len, int x) +{ + if (*len < x) + return -1; + *cur += x; + *len -= x; + return 0; +} + +static int parse_udp_tunnel_vnet_packet(uint8_t *buf, int len, int tunnel_type, + bool is_tap) +{ + struct ipv6hdr *iph6; + struct udphdr *udph; + struct iphdr *iph4; + uint8_t *cur = buf; + + if (validate_hdrlen(&cur, &len, TUN_VNET_TNL_SIZE)) + return -1; + + if (is_tap) { + if (validate_hdrlen(&cur, &len, ETH_HLEN)) + return -1; + } + + if (tunnel_type & UDP_TUNNEL_OUTER_IPV4) { + iph4 = (struct iphdr *)cur; + if (validate_hdrlen(&cur, &len, sizeof(struct iphdr))) + return -1; + if (iph4->version != 4 || iph4->protocol != IPPROTO_UDP) + return -1; + } else { + iph6 = (struct ipv6hdr *)cur; + if (validate_hdrlen(&cur, &len, sizeof(struct ipv6hdr))) + return -1; + if (iph6->version != 6 || iph6->nexthdr != IPPROTO_UDP) + return -1; + } + + udph = (struct udphdr *)cur; + if (validate_hdrlen(&cur, &len, sizeof(struct udphdr))) + return -1; + if (ntohs(udph->dest) != VN_PORT) + return -1; + + if (validate_hdrlen(&cur, &len, GENEVE_HLEN)) + return -1; + if (validate_hdrlen(&cur, &len, ETH_HLEN)) + return -1; + + if (tunnel_type & UDP_TUNNEL_INNER_IPV4) { + iph4 = (struct iphdr *)cur; + if (validate_hdrlen(&cur, &len, sizeof(struct iphdr))) + return -1; + if (iph4->version != 4 || iph4->protocol != IPPROTO_UDP) + return -1; + } else { + iph6 = (struct ipv6hdr *)cur; + if (validate_hdrlen(&cur, &len, sizeof(struct ipv6hdr))) + return -1; + if (iph6->version != 6 || iph6->nexthdr != IPPROTO_UDP) + return -1; + } + + udph = (struct udphdr *)cur; + if (validate_hdrlen(&cur, &len, sizeof(struct udphdr))) + return -1; + if (ntohs(udph->dest) != UDP_DST_PORT) + return -1; + + return len; +} + FIXTURE(tun) { char ifname[IFNAMSIZ]; @@ -127,31 +505,36 @@ FIXTURE_TEARDOWN(tun) close(self->fd2); } -TEST_F(tun, delete_detach_close) { +TEST_F(tun, delete_detach_close) +{ EXPECT_EQ(tun_delete(self->ifname), 0); EXPECT_EQ(tun_detach(self->fd, self->ifname), -1); EXPECT_EQ(errno, 22); } -TEST_F(tun, detach_delete_close) { +TEST_F(tun, detach_delete_close) +{ EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); EXPECT_EQ(tun_delete(self->ifname), 0); } -TEST_F(tun, detach_close_delete) { +TEST_F(tun, detach_close_delete) +{ EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); close(self->fd); self->fd = -1; EXPECT_EQ(tun_delete(self->ifname), 0); } -TEST_F(tun, reattach_delete_close) { +TEST_F(tun, reattach_delete_close) +{ EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); EXPECT_EQ(tun_attach(self->fd, self->ifname), 0); EXPECT_EQ(tun_delete(self->ifname), 0); } -TEST_F(tun, reattach_close_delete) { +TEST_F(tun, reattach_close_delete) +{ EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); EXPECT_EQ(tun_attach(self->fd, self->ifname), 0); close(self->fd); @@ -159,4 +542,447 @@ TEST_F(tun, reattach_close_delete) { EXPECT_EQ(tun_delete(self->ifname), 0); } +FIXTURE(tun_vnet_udptnl) +{ + char ifname[IFNAMSIZ]; + int fd, sock; +}; + +FIXTURE_VARIANT(tun_vnet_udptnl) +{ + int tunnel_type; + int gso_size; + int data_size; + int r_num_mss; + bool is_tap, no_gso; +}; + +/* clang-format off */ +#define TUN_VNET_UDPTNL_VARIANT_ADD(type, desc) \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_1byte) { \ + /* no GSO: send a single byte */ \ + .tunnel_type = type, \ + .data_size = 1, \ + .r_num_mss = 1, \ + .is_tap = true, \ + .no_gso = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_1mss) { \ + /* no GSO: send a single MSS, fall back to no GSO */ \ + .tunnel_type = type, \ + .data_size = UDP_TUNNEL_MSS(type), \ + .r_num_mss = 1, \ + .is_tap = true, \ + .no_gso = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_gtmss) { \ + /* no GSO: send a single MSS + 1B: fail */ \ + .tunnel_type = type, \ + .data_size = UDP_TUNNEL_MSS(type) + 1, \ + .r_num_mss = 1, \ + .is_tap = true, \ + .no_gso = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_1byte) { \ + /* GSO: send 1 byte, gso 1 byte, fall back to no GSO */ \ + .tunnel_type = type, \ + .gso_size = 1, \ + .data_size = 1, \ + .r_num_mss = 1, \ + .is_tap = true, \ + .no_gso = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_1mss) { \ + /* send a single MSS: fall back to no GSO */ \ + .tunnel_type = type, \ + .gso_size = UDP_TUNNEL_MSS(type), \ + .data_size = UDP_TUNNEL_MSS(type), \ + .r_num_mss = 1, \ + .is_tap = true, \ + .no_gso = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_ltgso) { \ + /* data <= MSS < gso: will fall back to no GSO */ \ + .tunnel_type = type, \ + .gso_size = UDP_TUNNEL_MSS(type) + 1, \ + .data_size = UDP_TUNNEL_MSS(type), \ + .r_num_mss = 1, \ + .is_tap = true, \ + .no_gso = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_gtgso) { \ + /* GSO: a single MSS + 1B */ \ + .tunnel_type = type, \ + .gso_size = UDP_TUNNEL_MSS(type), \ + .data_size = UDP_TUNNEL_MSS(type) + 1, \ + .r_num_mss = 2, \ + .is_tap = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_2mss) { \ + /* no GSO: send exactly 2 MSS */ \ + .tunnel_type = type, \ + .gso_size = UDP_TUNNEL_MSS(type), \ + .data_size = UDP_TUNNEL_MSS(type) * 2, \ + .r_num_mss = 2, \ + .is_tap = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_maxbytes) { \ + /* GSO: send max bytes */ \ + .tunnel_type = type, \ + .gso_size = UDP_TUNNEL_MSS(type), \ + .data_size = UDP_TUNNEL_MAX(type, true), \ + .r_num_mss = UDP_TUNNEL_MAX(type, true) / \ + UDP_TUNNEL_MSS(type) + 1, \ + .is_tap = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_over_maxbytes) { \ + /* GSO: send oversize max bytes: fail */ \ + .tunnel_type = type, \ + .gso_size = UDP_TUNNEL_MSS(type), \ + .data_size = ETH_MAX_MTU, \ + .r_num_mss = ETH_MAX_MTU / UDP_TUNNEL_MSS(type) + 1, \ + .is_tap = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_maxsegs) { \ + /* GSO: send max number of min sized segments */ \ + .tunnel_type = type, \ + .gso_size = 1, \ + .data_size = UDP_TUNNEL_MAX_SEGMENTS, \ + .r_num_mss = UDP_TUNNEL_MAX_SEGMENTS, \ + .is_tap = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_5byte) { \ + /* GSO: send 5 bytes, gso 2 bytes */ \ + .tunnel_type = type, \ + .gso_size = 2, \ + .data_size = 5, \ + .r_num_mss = 3, \ + .is_tap = true, \ + } /* clang-format on */ + +TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_4IN4, 4in4); +TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_6IN4, 6in4); +TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_4IN6, 4in6); +TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_6IN6, 6in6); + +static void assign_ifaddr_vars(int family, int is_outer, void **srcip, + void **dstip, void **srcmac, void **dstmac) +{ + if (is_outer) { + if (family == AF_INET) { + *srcip = (void *)¶m_ipaddr4_outer_src; + *dstip = (void *)¶m_ipaddr4_outer_dst; + } else { + *srcip = (void *)¶m_ipaddr6_outer_src; + *dstip = (void *)¶m_ipaddr6_outer_dst; + } + *srcmac = param_hwaddr_outer_src; + *dstmac = param_hwaddr_outer_dst; + } else { + if (family == AF_INET) { + *srcip = (void *)¶m_ipaddr4_inner_src; + *dstip = (void *)¶m_ipaddr4_inner_dst; + } else { + *srcip = (void *)¶m_ipaddr6_inner_src; + *dstip = (void *)¶m_ipaddr6_inner_dst; + } + *srcmac = param_hwaddr_inner_src; + *dstmac = param_hwaddr_inner_dst; + } +} + +static void assign_sockaddr_vars(int family, int is_outer, + struct sockaddr_storage *src, + struct sockaddr_storage *dst) +{ + src->ss_family = family; + dst->ss_family = family; + + if (family == AF_INET) { + struct sockaddr_in *s4 = (struct sockaddr_in *)src; + struct sockaddr_in *d4 = (struct sockaddr_in *)dst; + + s4->sin_addr = is_outer ? param_ipaddr4_outer_src : + param_ipaddr4_inner_src; + d4->sin_addr = is_outer ? param_ipaddr4_outer_dst : + param_ipaddr4_inner_dst; + if (!is_outer) { + s4->sin_port = htons(UDP_SRC_PORT); + d4->sin_port = htons(UDP_DST_PORT); + } + } else { + struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)src; + struct sockaddr_in6 *d6 = (struct sockaddr_in6 *)dst; + + s6->sin6_addr = is_outer ? param_ipaddr6_outer_src : + param_ipaddr6_inner_src; + d6->sin6_addr = is_outer ? param_ipaddr6_outer_dst : + param_ipaddr6_inner_dst; + if (!is_outer) { + s6->sin6_port = htons(UDP_SRC_PORT); + d6->sin6_port = htons(UDP_DST_PORT); + } + } +} + +FIXTURE_SETUP(tun_vnet_udptnl) +{ + int ret, family, prefix, flags, features; + int tunnel_type = variant->tunnel_type; + struct sockaddr_storage ssa, dsa; + void *sip, *dip, *smac, *dmac; + + flags = (variant->is_tap ? IFF_TAP : IFF_TUN) | IFF_VNET_HDR | + IFF_MULTI_QUEUE | IFF_NO_PI; + features = TUN_F_CSUM | TUN_F_UDP_TUNNEL_GSO | + TUN_F_UDP_TUNNEL_GSO_CSUM | TUN_F_USO4 | TUN_F_USO6; + self->fd = tun_open(self->ifname, flags, TUN_VNET_TNL_SIZE, features, + param_hwaddr_outer_src); + ASSERT_GE(self->fd, 0); + + family = (tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? AF_INET : AF_INET6; + prefix = (family == AF_INET) ? IPPREFIX_LEN : IP6PREFIX_LEN; + assign_ifaddr_vars(family, 1, &sip, &dip, &smac, &dmac); + + ret = ip_addr_add(self->ifname, family, sip, prefix); + ASSERT_EQ(ret, 0); + ret = ip_neigh_add(self->ifname, family, dip, dmac); + ASSERT_EQ(ret, 0); + ret = ip_route_check(self->ifname, family, sip); + ASSERT_EQ(ret, 0); + + ret = geneve_create(param_dev_geneve_name, family, dip, + param_hwaddr_inner_src); + ASSERT_EQ(ret, 0); + + family = (tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET : AF_INET6; + prefix = (family == AF_INET) ? IPPREFIX_LEN : IP6PREFIX_LEN; + assign_ifaddr_vars(family, 0, &sip, &dip, &smac, &dmac); + + ret = ip_addr_add(param_dev_geneve_name, family, sip, prefix); + ASSERT_EQ(ret, 0); + ret = ip_neigh_add(param_dev_geneve_name, family, dip, dmac); + ASSERT_EQ(ret, 0); + ret = ip_route_check(param_dev_geneve_name, family, sip); + ASSERT_EQ(ret, 0); + + assign_sockaddr_vars(family, 0, &ssa, &dsa); + self->sock = udp_socket_open(&ssa, false, true, &dsa); + ASSERT_GE(self->sock, 0); +} + +FIXTURE_TEARDOWN(tun_vnet_udptnl) +{ + int ret; + + if (self->sock != -1) + close(self->sock); + + ret = ip_link_del(param_dev_geneve_name); + EXPECT_EQ(ret, 0); + + ret = tun_delete(self->ifname); + EXPECT_EQ(ret, 0); +} + +static int build_gso_packet_into_tun(const FIXTURE_VARIANT(tun_vnet_udptnl) * + variant, + uint8_t *buf) +{ + int pktlen, hlen, proto, inner_family, outer_family; + int tunnel_type = variant->tunnel_type; + int payload_len = variant->data_size; + int gso_size = variant->gso_size; + uint8_t *outer_udph, *cur = buf; + void *sip, *dip, *smac, *dmac; + bool is_tap = variant->is_tap; + + hlen = (is_tap ? ETH_HLEN : 0) + UDP_TUNNEL_HDRLEN(tunnel_type); + inner_family = (tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET : + AF_INET6; + outer_family = (tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? AF_INET : + AF_INET6; + + cur += build_virtio_net_hdr_v1_hash_tunnel(cur, is_tap, hlen, gso_size, + outer_family, inner_family); + + pktlen = hlen + payload_len; + assign_ifaddr_vars(outer_family, 1, &sip, &dip, &smac, &dmac); + + if (is_tap) { + proto = outer_family == AF_INET ? ETH_P_IP : ETH_P_IPV6; + pktlen -= ETH_HLEN; + cur += build_eth(cur, proto, dmac, smac); + } + + if (outer_family == AF_INET) { + pktlen = pktlen - sizeof(struct iphdr); + cur += build_ipv4_header(cur, IPPROTO_UDP, pktlen, dip, sip); + } else { + pktlen = pktlen - sizeof(struct ipv6hdr); + cur += build_ipv6_header(cur, IPPROTO_UDP, 0, pktlen, dip, sip); + } + + outer_udph = cur; + assign_ifaddr_vars(inner_family, 0, &sip, &dip, &smac, &dmac); + + pktlen -= sizeof(struct udphdr); + proto = inner_family == AF_INET ? ETH_P_IP : ETH_P_IPV6; + cur += build_udp_header(cur, UDP_SRC_PORT, VN_PORT, pktlen); + cur += build_geneve_header(cur, VN_ID); + cur += build_eth(cur, proto, dmac, smac); + + pktlen = sizeof(struct udphdr) + payload_len; + if (inner_family == AF_INET) + cur += build_ipv4_header(cur, IPPROTO_UDP, pktlen, dip, sip); + else + cur += build_ipv6_header(cur, IPPROTO_UDP, 0, pktlen, dip, sip); + + cur += build_udp_packet(cur, UDP_DST_PORT, UDP_SRC_PORT, payload_len, + inner_family, false); + + build_udp_packet_csum(outer_udph, outer_family, false); + + return cur - buf; +} + +static int +receive_gso_packet_from_tunnel(FIXTURE_DATA(tun_vnet_udptnl) * self, + const FIXTURE_VARIANT(tun_vnet_udptnl) * variant, + int *r_num_mss) +{ + uint8_t packet_buf[MAX_VNET_TUNNEL_PACKET_SZ]; + int len, total_len = 0, socket = self->sock; + int payload_len = variant->data_size; + + while (total_len < payload_len) { + len = recv(socket, packet_buf, sizeof(packet_buf), 0); + if (len <= 0) { + if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK) + perror("recv"); + break; + } + + (*r_num_mss)++; + total_len += len; + } + + return total_len; +} + +static int send_gso_packet_into_tunnel(FIXTURE_DATA(tun_vnet_udptnl) * self, + const FIXTURE_VARIANT(tun_vnet_udptnl) * + variant) +{ + int family = (variant->tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET : + AF_INET6; + uint8_t buf[MAX_VNET_TUNNEL_PACKET_SZ] = { 0 }; + int payload_len = variant->data_size; + int gso_size = variant->gso_size; + struct sockaddr_storage ssa, dsa; + + assign_sockaddr_vars(family, 0, &ssa, &dsa); + return send_gso_udp_msg(self->sock, &dsa, buf, payload_len, gso_size); +} + +static int +receive_gso_packet_from_tun(FIXTURE_DATA(tun_vnet_udptnl) * self, + const FIXTURE_VARIANT(tun_vnet_udptnl) * variant, + struct virtio_net_hdr_v1_hash_tunnel *vnet_hdr) +{ + struct timeval timeout = { .tv_sec = TIMEOUT_SEC }; + uint8_t buf[MAX_VNET_TUNNEL_PACKET_SZ]; + int tunnel_type = variant->tunnel_type; + int payload_len = variant->data_size; + bool is_tap = variant->is_tap; + int ret, len, total_len = 0; + int tun_fd = self->fd; + fd_set fdset; + + while (total_len < payload_len) { + FD_ZERO(&fdset); + FD_SET(tun_fd, &fdset); + + ret = select(tun_fd + 1, &fdset, NULL, NULL, &timeout); + if (ret <= 0) { + perror("select"); + break; + } + if (!FD_ISSET(tun_fd, &fdset)) + continue; + + len = read(tun_fd, buf, sizeof(buf)); + if (len <= 0) { + if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK) + perror("read"); + break; + } + + len = parse_udp_tunnel_vnet_packet(buf, len, tunnel_type, + is_tap); + if (len < 0) + continue; + + if (total_len == 0) + memcpy(vnet_hdr, buf, TUN_VNET_TNL_SIZE); + + total_len += len; + } + + return total_len; +} + +TEST_F(tun_vnet_udptnl, send_gso_packet) +{ + uint8_t pkt[MAX_VNET_TUNNEL_PACKET_SZ]; + int r_num_mss = 0; + int ret, off; + + memset(pkt, 0, sizeof(pkt)); + off = build_gso_packet_into_tun(variant, pkt); + ret = write(self->fd, pkt, off); + ASSERT_EQ(ret, off); + + ret = receive_gso_packet_from_tunnel(self, variant, &r_num_mss); + EXPECT_EQ(ret, variant->data_size); + EXPECT_EQ(r_num_mss, variant->r_num_mss); +} + +TEST_F(tun_vnet_udptnl, recv_gso_packet) +{ + struct virtio_net_hdr_v1_hash_tunnel vnet_hdr = { 0 }; + struct virtio_net_hdr_v1 *vh = &vnet_hdr.hash_hdr.hdr; + int ret, gso_type = VIRTIO_NET_HDR_GSO_UDP_L4; + + ret = send_gso_packet_into_tunnel(self, variant); + EXPECT_EQ(ret, variant->data_size); + + memset(&vnet_hdr, 0, sizeof(vnet_hdr)); + ret = receive_gso_packet_from_tun(self, variant, &vnet_hdr); + EXPECT_EQ(ret, variant->data_size); + + if (!variant->no_gso) { + EXPECT_EQ(vh->gso_size, variant->gso_size); + gso_type |= (variant->tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? + (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4) : + (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6); + EXPECT_EQ(vh->gso_type, gso_type); + } +} + +XFAIL_ADD(tun_vnet_udptnl, 4in4_nogsosz_gtmss, recv_gso_packet); +XFAIL_ADD(tun_vnet_udptnl, 6in4_nogsosz_gtmss, recv_gso_packet); +XFAIL_ADD(tun_vnet_udptnl, 4in6_nogsosz_gtmss, recv_gso_packet); +XFAIL_ADD(tun_vnet_udptnl, 6in6_nogsosz_gtmss, recv_gso_packet); + +XFAIL_ADD(tun_vnet_udptnl, 4in4_over_maxbytes, send_gso_packet); +XFAIL_ADD(tun_vnet_udptnl, 6in4_over_maxbytes, send_gso_packet); +XFAIL_ADD(tun_vnet_udptnl, 4in6_over_maxbytes, send_gso_packet); +XFAIL_ADD(tun_vnet_udptnl, 6in6_over_maxbytes, send_gso_packet); + +XFAIL_ADD(tun_vnet_udptnl, 4in4_over_maxbytes, recv_gso_packet); +XFAIL_ADD(tun_vnet_udptnl, 6in4_over_maxbytes, recv_gso_packet); +XFAIL_ADD(tun_vnet_udptnl, 4in6_over_maxbytes, recv_gso_packet); +XFAIL_ADD(tun_vnet_udptnl, 6in6_over_maxbytes, recv_gso_packet); + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/tuntap_helpers.h b/tools/testing/selftests/net/tuntap_helpers.h new file mode 100644 index 000000000000..d6c0437136ec --- /dev/null +++ b/tools/testing/selftests/net/tuntap_helpers.h @@ -0,0 +1,390 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _TUNTAP_HELPERS_H +#define _TUNTAP_HELPERS_H + +#include <errno.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <linux/virtio_net.h> +#include <netinet/in.h> +#include <netinet/if_ether.h> +#include <netinet/udp.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <ynl.h> + +#include "rt-route-user.h" +#include "rt-addr-user.h" +#include "rt-neigh-user.h" +#include "rt-link-user.h" + +#define GENEVE_HLEN 8 +#define PKT_DATA 0xCB +#define TUNTAP_DEFAULT_TTL 8 +#define TUNTAP_DEFAULT_IPID 1337 + +unsigned int if_nametoindex(const char *ifname); + +static inline int ip_addr_len(int family) +{ + return (family == AF_INET) ? sizeof(struct in_addr) : + sizeof(struct in6_addr); +} + +static inline void fill_ifaddr_msg(struct ifaddrmsg *ifam, int family, + int prefix, int flags, const char *dev) +{ + ifam->ifa_family = family; + ifam->ifa_prefixlen = prefix; + ifam->ifa_index = if_nametoindex(dev); + ifam->ifa_flags = flags; + ifam->ifa_scope = RT_SCOPE_UNIVERSE; +} + +static inline int ip_addr_add(const char *dev, int family, void *addr, + uint8_t prefix) +{ + int nl_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + int ifa_flags = IFA_F_PERMANENT | IFA_F_NODAD; + int ret = -1, ipalen = ip_addr_len(family); + struct rt_addr_newaddr_req *req; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_rt_addr_family, NULL); + if (!ys) + return -1; + + req = rt_addr_newaddr_req_alloc(); + if (!req) + goto err_req_alloc; + + fill_ifaddr_msg(&req->_hdr, family, prefix, ifa_flags, dev); + rt_addr_newaddr_req_set_nlflags(req, nl_flags); + rt_addr_newaddr_req_set_local(req, addr, ipalen); + + ret = rt_addr_newaddr(ys, req); + rt_addr_newaddr_req_free(req); +err_req_alloc: + ynl_sock_destroy(ys); + return ret; +} + +static inline void fill_neigh_req_header(struct ndmsg *ndm, int family, + int state, const char *dev) +{ + ndm->ndm_family = family; + ndm->ndm_ifindex = if_nametoindex(dev); + ndm->ndm_state = state; + ndm->ndm_flags = 0; + ndm->ndm_type = RTN_UNICAST; +} + +static inline int ip_neigh_add(const char *dev, int family, void *addr, + unsigned char *lladdr) +{ + int nl_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + int ret = -1, ipalen = ip_addr_len(family); + struct rt_neigh_newneigh_req *req; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_rt_neigh_family, NULL); + if (!ys) + return -1; + + req = rt_neigh_newneigh_req_alloc(); + if (!req) + goto err_req_alloc; + + fill_neigh_req_header(&req->_hdr, family, NUD_PERMANENT, dev); + rt_neigh_newneigh_req_set_nlflags(req, nl_flags); + rt_neigh_newneigh_req_set_dst(req, addr, ipalen); + rt_neigh_newneigh_req_set_lladdr(req, lladdr, ETH_ALEN); + rt_neigh_newneigh_req_set_ifindex(req, if_nametoindex(dev)); + + ret = rt_neigh_newneigh(ys, req); + rt_neigh_newneigh_req_free(req); +err_req_alloc: + ynl_sock_destroy(ys); + return ret; +} + +static inline void fill_route_req_header(struct rtmsg *rtm, int family, + int table) +{ + rtm->rtm_family = family; + rtm->rtm_table = table; +} + +static inline int +ip_route_get(const char *dev, int family, int table, void *dst, + void (*parse_rsp)(struct rt_route_getroute_rsp *rsp, void *out), + void *out) +{ + int ret = -1, ipalen = ip_addr_len(family); + struct rt_route_getroute_req *req; + struct rt_route_getroute_rsp *rsp; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_rt_route_family, NULL); + if (!ys) + return -1; + + req = rt_route_getroute_req_alloc(); + if (!req) + goto err_req_alloc; + + fill_route_req_header(&req->_hdr, family, table); + rt_route_getroute_req_set_nlflags(req, NLM_F_REQUEST); + rt_route_getroute_req_set_dst(req, dst, ipalen); + rt_route_getroute_req_set_oif(req, if_nametoindex(dev)); + + rsp = rt_route_getroute(ys, req); + if (!rsp) + goto err_rsp_get; + + ret = 0; + if (parse_rsp) + parse_rsp(rsp, out); + + rt_route_getroute_rsp_free(rsp); +err_rsp_get: + rt_route_getroute_req_free(req); +err_req_alloc: + ynl_sock_destroy(ys); + return ret; +} + +static inline int +ip_link_add(const char *dev, char *link_type, + int (*fill_link_attr)(struct rt_link_newlink_req *req, void *data), + void *data) +{ + int nl_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + struct rt_link_newlink_req *req; + struct ynl_sock *ys; + int ret = -1; + + ys = ynl_sock_create(&ynl_rt_link_family, NULL); + if (!ys) + return -1; + + req = rt_link_newlink_req_alloc(); + if (!req) + goto err_req_alloc; + + req->_hdr.ifi_flags = IFF_UP; + rt_link_newlink_req_set_nlflags(req, nl_flags); + rt_link_newlink_req_set_ifname(req, dev); + rt_link_newlink_req_set_linkinfo_kind(req, link_type); + + if (fill_link_attr && fill_link_attr(req, data) < 0) + goto err_attr_fill; + + ret = rt_link_newlink(ys, req); +err_attr_fill: + rt_link_newlink_req_free(req); +err_req_alloc: + ynl_sock_destroy(ys); + return ret; +} + +static inline int ip_link_del(const char *dev) +{ + struct rt_link_dellink_req *req; + struct ynl_sock *ys; + int ret = -1; + + ys = ynl_sock_create(&ynl_rt_link_family, NULL); + if (!ys) + return -1; + + req = rt_link_dellink_req_alloc(); + if (!req) + goto err_req_alloc; + + rt_link_dellink_req_set_nlflags(req, NLM_F_REQUEST); + rt_link_dellink_req_set_ifname(req, dev); + + ret = rt_link_dellink(ys, req); + rt_link_dellink_req_free(req); +err_req_alloc: + ynl_sock_destroy(ys); + return ret; +} + +static inline size_t build_eth(uint8_t *buf, uint16_t proto, unsigned char *src, + unsigned char *dest) +{ + struct ethhdr *eth = (struct ethhdr *)buf; + + eth->h_proto = htons(proto); + memcpy(eth->h_source, src, ETH_ALEN); + memcpy(eth->h_dest, dest, ETH_ALEN); + + return ETH_HLEN; +} + +static inline uint32_t add_csum(const uint8_t *buf, int len) +{ + uint16_t *sbuf = (uint16_t *)buf; + uint32_t sum = 0; + + while (len > 1) { + sum += *sbuf++; + len -= 2; + } + + if (len) + sum += *(uint8_t *)sbuf; + + return sum; +} + +static inline uint16_t finish_ip_csum(uint32_t sum) +{ + while (sum >> 16) + sum = (sum & 0xffff) + (sum >> 16); + return ~((uint16_t)sum); +} + +static inline uint16_t build_ip_csum(const uint8_t *buf, int len, uint32_t sum) +{ + sum += add_csum(buf, len); + return finish_ip_csum(sum); +} + +static inline int build_ipv4_header(uint8_t *buf, uint8_t proto, + int payload_len, struct in_addr *src, + struct in_addr *dst) +{ + struct iphdr *iph = (struct iphdr *)buf; + + iph->ihl = 5; + iph->version = 4; + iph->ttl = TUNTAP_DEFAULT_TTL; + iph->tot_len = htons(sizeof(*iph) + payload_len); + iph->id = htons(TUNTAP_DEFAULT_IPID); + iph->protocol = proto; + iph->saddr = src->s_addr; + iph->daddr = dst->s_addr; + iph->check = build_ip_csum(buf, iph->ihl << 2, 0); + + return iph->ihl << 2; +} + +static inline void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield) +{ + uint16_t val, *ptr = (uint16_t *)ip6h; + + val = ntohs(*ptr); + val &= 0xF00F; + val |= ((uint16_t)dsfield) << 4; + *ptr = htons(val); +} + +static inline int build_ipv6_header(uint8_t *buf, uint8_t proto, + uint8_t dsfield, int payload_len, + struct in6_addr *src, struct in6_addr *dst) +{ + struct ipv6hdr *ip6h = (struct ipv6hdr *)buf; + + ip6h->version = 6; + ip6h->payload_len = htons(payload_len); + ip6h->nexthdr = proto; + ip6h->hop_limit = TUNTAP_DEFAULT_TTL; + ipv6_set_dsfield(ip6h, dsfield); + memcpy(&ip6h->saddr, src, sizeof(ip6h->saddr)); + memcpy(&ip6h->daddr, dst, sizeof(ip6h->daddr)); + + return sizeof(struct ipv6hdr); +} + +static inline int build_geneve_header(uint8_t *buf, uint32_t vni) +{ + uint16_t protocol = htons(ETH_P_TEB); + uint32_t geneve_vni = htonl((vni << 8) & 0xffffff00); + + memcpy(buf + 2, &protocol, 2); + memcpy(buf + 4, &geneve_vni, 4); + return GENEVE_HLEN; +} + +static inline int build_udp_header(uint8_t *buf, uint16_t sport, uint16_t dport, + int payload_len) +{ + struct udphdr *udph = (struct udphdr *)buf; + + udph->source = htons(sport); + udph->dest = htons(dport); + udph->len = htons(sizeof(*udph) + payload_len); + return sizeof(*udph); +} + +static inline void build_udp_packet_csum(uint8_t *buf, int family, + bool csum_off) +{ + struct udphdr *udph = (struct udphdr *)buf; + size_t ipalen = ip_addr_len(family); + uint32_t sum; + + /* No extension IPv4 and IPv6 headers addresses are the last fields */ + sum = add_csum(buf - 2 * ipalen, 2 * ipalen); + sum += htons(IPPROTO_UDP) + udph->len; + + if (!csum_off) + sum += add_csum(buf, udph->len); + + udph->check = finish_ip_csum(sum); +} + +static inline int build_udp_packet(uint8_t *buf, uint16_t sport, uint16_t dport, + int payload_len, int family, bool csum_off) +{ + struct udphdr *udph = (struct udphdr *)buf; + + build_udp_header(buf, sport, dport, payload_len); + memset(buf + sizeof(*udph), PKT_DATA, payload_len); + build_udp_packet_csum(buf, family, csum_off); + + return sizeof(*udph) + payload_len; +} + +static inline int build_virtio_net_hdr_v1_hash_tunnel(uint8_t *buf, bool is_tap, + int hdr_len, int gso_size, + int outer_family, + int inner_family) +{ + struct virtio_net_hdr_v1_hash_tunnel *vh_tunnel = (void *)buf; + struct virtio_net_hdr_v1 *vh = &vh_tunnel->hash_hdr.hdr; + int outer_iphlen, inner_iphlen, eth_hlen, gso_type; + + eth_hlen = is_tap ? ETH_HLEN : 0; + outer_iphlen = (outer_family == AF_INET) ? sizeof(struct iphdr) : + sizeof(struct ipv6hdr); + inner_iphlen = (inner_family == AF_INET) ? sizeof(struct iphdr) : + sizeof(struct ipv6hdr); + + vh_tunnel->outer_th_offset = eth_hlen + outer_iphlen; + vh_tunnel->inner_nh_offset = vh_tunnel->outer_th_offset + ETH_HLEN + + GENEVE_HLEN + sizeof(struct udphdr); + + vh->csum_start = vh_tunnel->inner_nh_offset + inner_iphlen; + vh->csum_offset = __builtin_offsetof(struct udphdr, check); + vh->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + vh->hdr_len = hdr_len; + vh->gso_size = gso_size; + + if (gso_size) { + gso_type = outer_family == AF_INET ? + VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4 : + VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6; + vh->gso_type = VIRTIO_NET_HDR_GSO_UDP_L4 | gso_type; + } + + return sizeof(struct virtio_net_hdr_v1_hash_tunnel); +} + +#endif /* _TUNTAP_HELPERS_H */ diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index dae91eb97d69..170be192f5c7 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -206,18 +206,16 @@ static void __print_timestamp(const char *name, struct timespec *cur, fprintf(stderr, "\n"); } -static void print_timestamp_usr(void) +static void record_timestamp_usr(void) { if (clock_gettime(CLOCK_REALTIME, &ts_usr)) error(1, errno, "clock_gettime"); - - __print_timestamp(" USR", &ts_usr, 0, 0); } static void print_timestamp(struct scm_timestamping *tss, int tstype, int tskey, int payload_len) { - const char *tsname; + const char *tsname = NULL; validate_key(tskey, tstype); @@ -599,8 +597,6 @@ static void do_test(int family, unsigned int report_opt) fill_header_udp(buf + off, family == PF_INET); } - print_timestamp_usr(); - iov.iov_base = buf; iov.iov_len = total_len; @@ -655,10 +651,14 @@ static void do_test(int family, unsigned int report_opt) } + record_timestamp_usr(); + val = sendmsg(fd, &msg, 0); if (val != total_len) error(1, errno, "send"); + __print_timestamp(" USR", &ts_usr, 0, 0); + /* wait for all errors to be queued, else ACKs arrive OOO */ if (cfg_sleep_usec) usleep(cfg_sleep_usec); diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index d5ffd8c9172e..b17e032a6d75 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -3,7 +3,7 @@ # # Run a series of udpgro functional tests. -source net_helper.sh +source lib.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" @@ -48,7 +48,7 @@ run_one() { cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} & local PID1=$! wait_local_port_listen ${PEER_NS} 8000 udp @@ -95,7 +95,7 @@ run_one_nat() { # will land on the 'plain' one ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & local PID1=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${family} -b ${addr2%/*} ${rx_args} & local PID2=$! wait_local_port_listen "${PEER_NS}" 8000 udp @@ -117,9 +117,9 @@ run_one_2sock() { cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} -p 12345 & local PID1=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 100 ${rx_args} & local PID2=$! wait_local_port_listen "${PEER_NS}" 12345 udp diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index c51ea90a1395..54fa4821bc5e 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -3,11 +3,11 @@ # # Run a series of udpgro benchmarks -source net_helper.sh +source lib.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.bpf.o" +BPF_FILE="lib/xdp_dummy.bpf.o" cleanup() { local -r jobs="$(jobs -p)" diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index 17404f49cdb6..9a2cfec1153e 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -3,11 +3,11 @@ # # Run a series of udpgro benchmarks -source net_helper.sh +source lib.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.bpf.o" +BPF_FILE="lib/xdp_dummy.bpf.o" cleanup() { local -r jobs="$(jobs -p)" diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 550d8eb3e224..9b722c1e4b0f 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,9 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -source net_helper.sh +source lib.sh -BPF_FILE="xdp_dummy.bpf.o" +BPF_FILE="lib/xdp_dummy.bpf.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 readonly DST=1 @@ -162,6 +162,39 @@ run_test() { echo " ok" } +run_test_csum() { + local -r msg="$1" + local -r dst="$2" + local csum_error_filter=UdpInCsumErrors + local csum_errors + + printf "%-40s" "$msg" + + is_ipv6 "$dst" && csum_error_filter=Udp6InCsumErrors + + ip netns exec "$NS_DST" iperf3 -s -1 >/dev/null & + wait_local_port_listen "$NS_DST" 5201 tcp + local spid="$!" + ip netns exec "$NS_SRC" iperf3 -c "$dst" -t 2 >/dev/null + local retc="$?" + wait "$spid" + local rets="$?" + if [ "$rets" -ne 0 ] || [ "$retc" -ne 0 ]; then + echo " fail client exit code $retc, server $rets" + ret=1 + return + fi + + csum_errors=$(ip netns exec "$NS_DST" nstat -as "$csum_error_filter" | + grep "$csum_error_filter" | awk '{print $2}') + if [ -n "$csum_errors" ] && [ "$csum_errors" -gt 0 ]; then + echo " fail - csum error on receive $csum_errors, expected 0" + ret=1 + return + fi + echo " ok" +} + run_bench() { local -r msg=$1 local -r dst=$2 @@ -260,6 +293,37 @@ for family in 4 6; do ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST cleanup + + # force segmentation and re-aggregation + create_vxlan_pair + ip netns exec "$NS_DST" ethtool -K veth"$DST" generic-receive-offload on + ip netns exec "$NS_SRC" ethtool -K veth"$SRC" tso off + ip -n "$NS_SRC" link set dev veth"$SRC" mtu 1430 + + # forward to a 2nd veth pair + ip -n "$NS_DST" link add br0 type bridge + ip -n "$NS_DST" link set dev veth"$DST" master br0 + + # segment the aggregated TSO packet, without csum offload + ip -n "$NS_DST" link add veth_segment type veth peer veth_rx + for FEATURE in tso tx-udp-segmentation tx-checksumming; do + ip netns exec "$NS_DST" ethtool -K veth_segment "$FEATURE" off + done + ip -n "$NS_DST" link set dev veth_segment master br0 up + ip -n "$NS_DST" link set dev br0 up + ip -n "$NS_DST" link set dev veth_rx up + + # move the lower layer IP in the last added veth + for ADDR in "$BM_NET_V4$DST/24" "$BM_NET_V6$DST/64"; do + # the dad argument will let iproute emit a unharmful warning + # with ipv4 addresses + ip -n "$NS_DST" addr del dev veth"$DST" "$ADDR" + ip -n "$NS_DST" addr add dev veth_rx "$ADDR" \ + nodad 2>/dev/null + done + + run_test_csum "GSO after GRO" "$OL_NET$DST" + cleanup done exit $ret diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index 477392715a9a..86d80cce55b4 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -25,7 +25,7 @@ #include <sys/types.h> #include <unistd.h> -#include "../kselftest.h" +#include "kselftest.h" #ifndef ETH_MAX_MTU #define ETH_MAX_MTU 0xFFFFU diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 6bb7dfaa30b6..9709dd067c72 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -BPF_FILE="xdp_dummy.bpf.o" +BPF_FILE="lib/xdp_dummy.bpf.o" readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" readonly BASE=`basename $STATS` readonly SRC=2 diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh index e7cb8c678bde..e8c02c64e03a 100755 --- a/tools/testing/selftests/net/vlan_bridge_binding.sh +++ b/tools/testing/selftests/net/vlan_bridge_binding.sh @@ -18,29 +18,29 @@ setup_prepare() { local port - ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add br up type bridge vlan_filtering 1 for port in d1 d2 d3; do - ip_link_add $port type veth peer name r$port - ip_link_set_up $port - ip_link_set_up r$port - ip_link_set_master $port br + adf_ip_link_add $port type veth peer name r$port + adf_ip_link_set_up $port + adf_ip_link_set_up r$port + adf_ip_link_set_master $port br done - bridge_vlan_add vid 11 dev br self - bridge_vlan_add vid 11 dev d1 master + adf_bridge_vlan_add vid 11 dev br self + adf_bridge_vlan_add vid 11 dev d1 master - bridge_vlan_add vid 12 dev br self - bridge_vlan_add vid 12 dev d2 master + adf_bridge_vlan_add vid 12 dev br self + adf_bridge_vlan_add vid 12 dev d2 master - bridge_vlan_add vid 13 dev br self - bridge_vlan_add vid 13 dev d1 master - bridge_vlan_add vid 13 dev d2 master + adf_bridge_vlan_add vid 13 dev br self + adf_bridge_vlan_add vid 13 dev d1 master + adf_bridge_vlan_add vid 13 dev d2 master - bridge_vlan_add vid 14 dev br self - bridge_vlan_add vid 14 dev d1 master - bridge_vlan_add vid 14 dev d2 master - bridge_vlan_add vid 14 dev d3 master + adf_bridge_vlan_add vid 14 dev br self + adf_bridge_vlan_add vid 14 dev d1 master + adf_bridge_vlan_add vid 14 dev d2 master + adf_bridge_vlan_add vid 14 dev d3 master } operstate_is() @@ -74,7 +74,7 @@ add_one_vlan() local link=$1; shift local id=$1; shift - ip_link_add $link.$id link $link type vlan id $id "$@" + adf_ip_link_add $link.$id link $link type vlan id $id "$@" } add_vlans() @@ -98,7 +98,7 @@ down_netdevs() local dev for dev in "$@"; do - ip_link_set_down $dev + adf_ip_link_set_down $dev done } @@ -207,13 +207,13 @@ test_binding_toggle_off() do_test_binding_off : "on->off" } -dfr_set_binding_on() +adf_set_binding_on() { set_vlans type vlan bridge_binding on defer set_vlans type vlan bridge_binding off } -dfr_set_binding_off() +adf_set_binding_off() { set_vlans type vlan bridge_binding off defer set_vlans type vlan bridge_binding on @@ -223,14 +223,14 @@ test_binding_toggle_on_when_lower_down() { add_vlans bridge_binding off set_vlans up - do_test_binding_on dfr_set_binding_on "off->on when lower down" + do_test_binding_on adf_set_binding_on "off->on when lower down" } test_binding_toggle_off_when_lower_down() { add_vlans bridge_binding on set_vlans up - do_test_binding_off dfr_set_binding_off "on->off when lower down" + do_test_binding_off adf_set_binding_off "on->off when lower down" } test_binding_toggle_on_when_upper_down() @@ -249,6 +249,8 @@ test_binding_toggle_off_when_upper_down() do_test_binding_off : "on->off when upper down" } +require_command jq + trap defer_scopes_cleanup EXIT setup_prepare tests_run diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh index 7bc804ffaf7c..e195d5cab6f7 100755 --- a/tools/testing/selftests/net/vlan_hw_filter.sh +++ b/tools/testing/selftests/net/vlan_hw_filter.sh @@ -3,27 +3,101 @@ readonly NETNS="ns-$(mktemp -u XXXXXX)" +ALL_TESTS=" + test_vlan_filter_check + test_vlan0_del_crash_01 + test_vlan0_del_crash_02 + test_vlan0_del_crash_03 + test_vid0_memleak +" + ret=0 +setup() { + ip netns add ${NETNS} +} + cleanup() { - ip netns del $NETNS + ip netns del $NETNS 2>/dev/null } trap cleanup EXIT fail() { - echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 - ret=1 + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + ret=1 +} + +tests_run() +{ + local current_test + for current_test in ${TESTS:-$ALL_TESTS}; do + $current_test + done +} + +test_vlan_filter_check() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2 + ip netns exec ${NETNS} ip link set bond_slave_1 master bond0 + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 + ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0 + ip netns exec ${NETNS} ip link set bond_slave_1 nomaster + ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function" + cleanup } -ip netns add ${NETNS} -ip netns exec ${NETNS} ip link add bond0 type bond mode 0 -ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2 -ip netns exec ${NETNS} ip link set bond_slave_1 master bond0 -ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off -ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 -ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0 -ip netns exec ${NETNS} ip link set bond_slave_1 nomaster -ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function" +#enable vlan_filter feature of real_dev with vlan0 during running time +test_vlan0_del_crash_01() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ip link set dev bond0 up + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on + ip netns exec ${NETNS} ip link set dev bond0 down + ip netns exec ${NETNS} ip link set dev bond0 up + ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" + cleanup +} + +#enable vlan_filter feature and add vlan0 for real_dev during running time +test_vlan0_del_crash_02() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ip link set dev bond0 up + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on + ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q + ip netns exec ${NETNS} ip link set dev bond0 down + ip netns exec ${NETNS} ip link set dev bond0 up + ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" + cleanup +} + +#enable vlan_filter feature of real_dev during running time +#test kernel_bug of vlan unregister +test_vlan0_del_crash_03() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ip link set dev bond0 up + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on + ip netns exec ${NETNS} ip link set dev bond0 down + ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" + cleanup +} + +test_vid0_memleak() { + setup + ip netns exec ${NETNS} ip link add bond0 up type bond mode 0 + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ip link del dev bond0 || fail "Please check vlan HW filter function" + cleanup +} +tests_run exit $ret diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh index e9c2f71da207..ce34cb2e6e0b 100755 --- a/tools/testing/selftests/net/vrf_route_leaking.sh +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -275,7 +275,7 @@ setup_sym() # Wait for ip config to settle - sleep 2 + slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 } setup_asym() @@ -370,7 +370,7 @@ setup_asym() ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad # Wait for ip config to settle - sleep 2 + slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 } check_connectivity() diff --git a/tools/testing/selftests/net/xdp_dummy.bpf.c b/tools/testing/selftests/net/xdp_dummy.bpf.c deleted file mode 100644 index d988b2e0cee8..000000000000 --- a/tools/testing/selftests/net/xdp_dummy.bpf.c +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#define KBUILD_MODNAME "xdp_dummy" -#include <linux/bpf.h> -#include <bpf/bpf_helpers.h> - -SEC("xdp") -int xdp_dummy_prog(struct xdp_md *ctx) -{ - return XDP_PASS; -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/net/xfrm_state.sh b/tools/testing/selftests/net/xfrm_state.sh new file mode 100755 index 000000000000..f6c54a6496d7 --- /dev/null +++ b/tools/testing/selftests/net/xfrm_state.sh @@ -0,0 +1,613 @@ +#!/bin/bash -e +# SPDX-License-Identifier: GPL-2.0 +# +# xfrm/IPsec tests. +# Currently implemented: +# - ICMP error source address verification (IETF RFC 4301 section 6) +# - ICMP MTU exceeded handling over IPsec tunnels. +# +# Addresses and topology: +# IPv4 prefix 10.1.c.d IPv6 prefix fc00:c::d/64 where c is the segment number +# and d is the interface identifier. +# IPv6 uses the same c:d as IPv4, and start with IPv6 prefix instead ipv4 prefix +# +# Network topology default: ns_set_v4 or ns_set_v6 +# 1.1 1.2 2.1 2.2 3.1 3.2 4.1 4.2 5.1 5.2 6.1 6.2 +# eth0 eth1 eth0 eth1 eth0 eth1 eth0 eth1 eth0 eth1 eth0 eth1 +# a -------- r1 -------- s1 -------- r2 -------- s2 -------- r3 -------- b +# a, b = Alice and Bob hosts without IPsec. +# r1, r2, r3 routers, without IPsec +# s1, s2, IPsec gateways/routers that setup tunnel(s). + +# Network topology x: IPsec gateway that generates ICMP response - ns_set_v4x or ns_set_v6x +# 1.1 1.2 2.1 2.2 3.1 3.2 4.1 4.2 5.1 5.2 +# eth0 eth1 eth0 eth1 eth0 eth1 eth0 eth1 eth0 eth1 +# a -------- r1 -------- s1 -------- r2 -------- s2 -------- b + +. lib.sh + +EXIT_ON_TEST_FAIL=no +PAUSE=no +VERBOSE=${VERBOSE:-0} +DEBUG=0 + +# Name Description +tests=" + unreachable_ipv4 IPv4 unreachable from router r3 + unreachable_ipv6 IPv6 unreachable from router r3 + unreachable_gw_ipv4 IPv4 unreachable from IPsec gateway s2 + unreachable_gw_ipv6 IPv6 unreachable from IPsec gateway s2 + mtu_ipv4_s2 IPv4 MTU exceeded from IPsec gateway s2 + mtu_ipv6_s2 IPv6 MTU exceeded from IPsec gateway s2 + mtu_ipv4_r2 IPv4 MTU exceeded from ESP router r2 + mtu_ipv6_r2 IPv6 MTU exceeded from ESP router r2 + mtu_ipv4_r3 IPv4 MTU exceeded from router r3 + mtu_ipv6_r3 IPv6 MTU exceeded from router r3" + +prefix4="10.1" +prefix6="fc00" + +run_cmd_err() { + cmd="$*" + + if [ "$VERBOSE" -gt 0 ]; then + printf " COMMAND: %s\n" "$cmd" + fi + + out="$($cmd 2>&1)" && rc=0 || rc=$? + if [ "$VERBOSE" -gt 1 ] && [ -n "$out" ]; then + echo " $out" + echo + fi + return 0 +} + +run_cmd() { + run_cmd_err "$@" || exit 1 +} + +run_test() { + # If errexit is set, unset it for sub-shell and restore after test + errexit=0 + if [[ $- =~ "e" ]]; then + errexit=1 + set +e + fi + + ( + unset IFS + + # shellcheck disable=SC2030 # fail is read by trap/cleanup within this subshell + fail="yes" + + # Since cleanup() relies on variables modified by this sub shell, + # it has to run in this context. + trap 'log_test_error $?; cleanup' EXIT INT TERM + + if [ "$VERBOSE" -gt 0 ]; then + printf "\n#############################################################\n\n" + fi + + ret=0 + case "${name}" in + # can't use eval and test names shell check will complain about unused code + unreachable_ipv4) test_unreachable_ipv4 ;; + unreachable_ipv6) test_unreachable_ipv6 ;; + unreachable_gw_ipv4) test_unreachable_gw_ipv4 ;; + unreachable_gw_ipv6) test_unreachable_gw_ipv6 ;; + mtu_ipv4_s2) test_mtu_ipv4_s2 ;; + mtu_ipv6_s2) test_mtu_ipv6_s2 ;; + mtu_ipv4_r2) test_mtu_ipv4_r2 ;; + mtu_ipv6_r2) test_mtu_ipv6_r2 ;; + mtu_ipv4_r3) test_mtu_ipv4_r3 ;; + mtu_ipv6_r3) test_mtu_ipv6_r3 ;; + esac + ret=$? + + if [ $ret -eq 0 ]; then + fail="no" + + if [ "$VERBOSE" -gt 1 ]; then + show_icmp_filter + fi + + printf "TEST: %-60s [ PASS ]\n" "${desc}" + elif [ $ret -eq "$ksft_skip" ]; then + fail="no" + printf "TEST: %-60s [SKIP]\n" "${desc}" + fi + + return $ret + ) + ret=$? + + [ $errexit -eq 1 ] && set -e + + case $ret in + 0) + all_skipped=false + [ "$exitcode" -eq "$ksft_skip" ] && exitcode=0 + ;; + "$ksft_skip") + [ $all_skipped = true ] && exitcode=$ksft_skip + ;; + *) + all_skipped=false + exitcode=1 + ;; + esac + + return 0 # don't trigger errexit (-e); actual status in exitcode +} + +setup_namespaces() { + local namespaces="" + + NS_A="" + NS_B="" + NS_R1="" + NS_R2="" + NS_R3="" + NS_S1="" + NS_S2="" + + for ns in ${ns_set}; do + namespaces="$namespaces NS_${ns^^}" + done + + # shellcheck disable=SC2086 # setup_ns expects unquoted list + setup_ns $namespaces + + ns_active= #ordered list of namespaces for this test. + + [ -n "${NS_A}" ] && ns_a=(ip netns exec "${NS_A}") && ns_active="${ns_active} $NS_A" + [ -n "${NS_R1}" ] && ns_active="${ns_active} $NS_R1" + [ -n "${NS_S1}" ] && ns_s1=(ip netns exec "${NS_S1}") && ns_active="${ns_active} $NS_S1" + [ -n "${NS_R2}" ] && ns_r2=(ip netns exec "${NS_R2}") && ns_active="${ns_active} $NS_R2" + [ -n "${NS_S2}" ] && ns_s2=(ip netns exec "${NS_S2}") && ns_active="${ns_active} $NS_S2" + [ -n "${NS_R3}" ] && ns_r3=(ip netns exec "${NS_R3}") && ns_active="${ns_active} $NS_R3" + [ -n "${NS_B}" ] && ns_active="${ns_active} $NS_B" +} + +addr_add() { + local -a ns_cmd=(ip netns exec "$1") + local addr="$2" + local dev="$3" + + run_cmd "${ns_cmd[@]}" ip addr add "${addr}" dev "${dev}" + run_cmd "${ns_cmd[@]}" ip link set up "${dev}" +} + +veth_add() { + local ns=$2 + local pns=$1 + local -a ns_cmd=(ip netns exec "${pns}") + local ln="eth0" + local rn="eth1" + + run_cmd "${ns_cmd[@]}" ip link add "${ln}" type veth peer name "${rn}" netns "${ns}" +} + +show_icmp_filter() { + run_cmd "${ns_r2[@]}" nft list ruleset + echo "$out" +} + +setup_icmp_filter() { + run_cmd "${ns_r2[@]}" nft add table inet filter + run_cmd "${ns_r2[@]}" nft add chain inet filter FORWARD \ + '{ type filter hook forward priority filter; policy drop ; }' + run_cmd "${ns_r2[@]}" nft add rule inet filter FORWARD counter ip protocol esp \ + counter log accept + run_cmd "${ns_r2[@]}" nft add rule inet filter FORWARD counter ip protocol \ + icmp counter log drop + + if [ "$VERBOSE" -gt 0 ]; then + run_cmd "${ns_r2[@]}" nft list ruleset + echo "$out" + fi +} + +setup_icmpv6_filter() { + run_cmd "${ns_r2[@]}" nft add table inet filter + run_cmd "${ns_r2[@]}" nft add chain inet filter FORWARD \ + '{ type filter hook forward priority filter; policy drop ; }' + run_cmd "${ns_r2[@]}" nft add rule inet filter FORWARD ip6 nexthdr \ + ipv6-icmp icmpv6 type echo-request counter log drop + run_cmd "${ns_r2[@]}" nft add rule inet filter FORWARD ip6 nexthdr esp \ + counter log accept + run_cmd "${ns_r2[@]}" nft add rule inet filter FORWARD ip6 nexthdr \ + ipv6-icmp icmpv6 type \ + '{nd-neighbor-solicit,nd-neighbor-advert,nd-router-solicit,nd-router-advert}' \ + counter log drop + if [ "$VERBOSE" -gt 0 ]; then + run_cmd "${ns_r2[@]}" nft list ruleset + echo "$out" + fi +} + +set_xfrm_params() { + s1_src=${src} + s1_dst=${dst} + s1_src_net=${src_net} + s1_dst_net=${dst_net} +} + +setup_ns_set_v4() { + ns_set="a r1 s1 r2 s2 r3 b" # Network topology default + imax=$(echo "$ns_set" | wc -w) # number of namespaces in this topology + + src="10.1.3.1" + dst="10.1.4.2" + src_net="10.1.1.0/24" + dst_net="10.1.6.0/24" + + prefix=${prefix4} + prefix_len=24 + s="." + S="." + + set_xfrm_params +} + +setup_ns_set_v4x() { + ns_set="a r1 s1 r2 s2 b" # Network topology: x + imax=$(echo "$ns_set" | wc -w) # number of namespaces in this topology + prefix=${prefix4} + s="." + S="." + src="10.1.3.1" + dst="10.1.4.2" + src_net="10.1.1.0/24" + dst_net="10.1.5.0/24" + prefix_len=24 + + set_xfrm_params +} + +setup_ns_set_v6() { + ns_set="a r1 s1 r2 s2 r3 b" # Network topology default + imax=$(echo "$ns_set" | wc -w) # number of namespaces in this topology + prefix=${prefix6} + s=":" + S="::" + src="fc00:3::1" + dst="fc00:4::2" + src_net="fc00:1::0/64" + dst_net="fc00:6::0/64" + prefix_len=64 + + set_xfrm_params +} + +setup_ns_set_v6x() { + ns_set="a r1 s1 r2 s2 b" # Network topology: x + imax=$(echo "$ns_set" | wc -w) + prefix=${prefix6} + s=":" + S="::" + src="fc00:3::1" + dst="fc00:4::2" + src_net="fc00:1::0/64" + dst_net="fc00:5::0/64" + prefix_len=64 + + set_xfrm_params +} + +setup_network() { + # Create veths and add addresses + local -a ns_cmd + i=1 + p="" + for ns in ${ns_active}; do + ns_cmd=(ip netns exec "${ns}") + + if [ "${i}" -ne 1 ]; then + # Create veth between previous and current namespace + veth_add "${p}" "${ns}" + # Add addresses: previous gets .1 on eth0, current gets .2 on eth1 + addr_add "${p}" "${prefix}${s}$((i-1))${S}1/${prefix_len}" eth0 + addr_add "${ns}" "${prefix}${s}$((i-1))${S}2/${prefix_len}" eth1 + fi + + # Enable forwarding + run_cmd "${ns_cmd[@]}" sysctl -q net/ipv4/ip_forward=1 + run_cmd "${ns_cmd[@]}" sysctl -q net/ipv6/conf/all/forwarding=1 + run_cmd "${ns_cmd[@]}" sysctl -q net/ipv6/conf/default/accept_dad=0 + + p=${ns} + i=$((i + 1)) + done + + # Add routes (needs all addresses to exist first) + i=1 + for ns in ${ns_active}; do + ns_cmd=(ip netns exec "${ns}") + + # Forward routes to networks beyond this node + if [ "${i}" -ne "${imax}" ]; then + nhf="${prefix}${s}${i}${S}2" # nexthop forward + for j in $(seq $((i + 1)) "${imax}"); do + run_cmd "${ns_cmd[@]}" ip route replace \ + "${prefix}${s}${j}${S}0/${prefix_len}" via "${nhf}" + done + fi + + # Reverse routes to networks before this node + if [ "${i}" -gt 1 ]; then + nhr="${prefix}${s}$((i-1))${S}1" # nexthop reverse + for j in $(seq 1 $((i - 2))); do + run_cmd "${ns_cmd[@]}" ip route replace \ + "${prefix}${s}${j}${S}0/${prefix_len}" via "${nhr}" + done + fi + + i=$((i + 1)) + done +} + +setup_xfrm_mode() { + local MODE=${1:-tunnel} + if [ "${MODE}" != "tunnel" ] && [ "${MODE}" != "beet" ]; then + echo "xfrm mode ${MODE} not supported" + log_test_error + return 1 + fi + + run_cmd "${ns_s1[@]}" ip xfrm policy add src "${s1_src_net}" dst "${s1_dst_net}" dir out \ + tmpl src "${s1_src}" dst "${s1_dst}" proto esp reqid 1 mode "${MODE}" + + # no "input" policies. we are only doing forwarding so far + + run_cmd "${ns_s1[@]}" ip xfrm policy add src "${s1_dst_net}" dst "${s1_src_net}" dir fwd \ + flag icmp tmpl src "${s1_dst}" dst "${s1_src}" proto esp reqid 2 mode "${MODE}" + + run_cmd "${ns_s1[@]}" ip xfrm state add src "${s1_src}" dst "${s1_dst}" proto esp spi 1 \ + reqid 1 mode "${MODE}" aead 'rfc4106(gcm(aes))' \ + 0x1111111111111111111111111111111111111111 96 \ + sel src "${s1_src_net}" dst "${s1_dst_net}" dir out + + run_cmd "${ns_s1[@]}" ip xfrm state add src "${s1_dst}" dst "${s1_src}" proto esp spi 2 \ + reqid 2 flag icmp replay-window 8 mode "${MODE}" aead 'rfc4106(gcm(aes))' \ + 0x2222222222222222222222222222222222222222 96 \ + sel src "${s1_dst_net}" dst "${s1_src_net}" dir in + + run_cmd "${ns_s2[@]}" ip xfrm policy add src "${s1_dst_net}" dst "${s1_src_net}" dir out \ + flag icmp tmpl src "${s1_dst}" dst "${s1_src}" proto esp reqid 2 mode "${MODE}" + + run_cmd "${ns_s2[@]}" ip xfrm policy add src "${s1_src_net}" dst "${s1_dst_net}" dir fwd \ + tmpl src "${s1_src}" dst "${s1_dst}" proto esp reqid 1 mode "${MODE}" + + run_cmd "${ns_s2[@]}" ip xfrm state add src "${s1_dst}" dst "${s1_src}" proto esp spi 2 \ + reqid 2 mode "${MODE}" aead 'rfc4106(gcm(aes))' \ + 0x2222222222222222222222222222222222222222 96 \ + sel src "${s1_dst_net}" dst "${s1_src_net}" dir out + + run_cmd "${ns_s2[@]}" ip xfrm state add src "${s1_src}" dst "${s1_dst}" proto esp spi 1 \ + reqid 1 flag icmp replay-window 8 mode "${MODE}" aead 'rfc4106(gcm(aes))' \ + 0x1111111111111111111111111111111111111111 96 \ + sel src "${s1_src_net}" dst "${s1_dst_net}" dir in +} + +setup_xfrm() { + setup_xfrm_mode tunnel +} + +setup() { + [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return "$ksft_skip" + + for arg; do + case "${arg}" in + ns_set_v4) setup_ns_set_v4 ;; + ns_set_v4x) setup_ns_set_v4x ;; + ns_set_v6) setup_ns_set_v6 ;; + ns_set_v6x) setup_ns_set_v6x ;; + namespaces) setup_namespaces ;; + network) setup_network ;; + xfrm) setup_xfrm ;; + icmp_filter) setup_icmp_filter ;; + icmpv6_filter) setup_icmpv6_filter ;; + *) echo " ${arg} not supported"; return 1 ;; + esac || return 1 + done +} + +# shellcheck disable=SC2317 # called via trap +pause() { + echo + echo "Pausing. Hit enter to continue" + read -r _ +} + +# shellcheck disable=SC2317 # called via trap +log_test_error() { + # shellcheck disable=SC2031 # fail is set in subshell, read via trap + if [ "${fail}" = "yes" ] && [ -n "${desc}" ]; then + if [ "$VERBOSE" -gt 0 ]; then + show_icmp_filter + fi + printf "TEST: %-60s [ FAIL ] %s\n" "${desc}" "${name}" + [ -n "${cmd}" ] && printf '%s\n\n' "${cmd}" + [ -n "${out}" ] && printf '%s\n\n' "${out}" + fi +} + +# shellcheck disable=SC2317 # called via trap +cleanup() { + # shellcheck disable=SC2031 # fail is set in subshell, read via trap + [[ "$PAUSE" = "always" || ( "$PAUSE" = "fail" && "$fail" = "yes" ) ]] && pause + cleanup_all_ns + # shellcheck disable=SC2031 # fail is set in subshell, read via trap + [ "${EXIT_ON_TEST_FAIL}" = "yes" ] && [ "${fail}" = "yes" ] && exit 1 +} + +test_unreachable_ipv6() { + setup ns_set_v6 namespaces network xfrm icmpv6_filter || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:6::2 + run_cmd_err "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:6::3 + rc=0 + echo -e "$out" | grep -q -E 'From fc00:5::2 icmp_seq.* Destination' || rc=1 + return "${rc}" +} + +test_unreachable_gw_ipv6() { + setup ns_set_v6x namespaces network xfrm icmpv6_filter || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:5::2 + run_cmd_err "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:5::3 + rc=0 + echo -e "$out" | grep -q -E 'From fc00:4::2 icmp_seq.* Destination' || rc=1 + return "${rc}" +} + +test_unreachable_ipv4() { + setup ns_set_v4 namespaces network icmp_filter xfrm || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.6.2 + run_cmd_err "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.6.3 + rc=0 + echo -e "$out" | grep -q -E 'From 10.1.5.2 icmp_seq.* Destination' || rc=1 + return "${rc}" +} + +test_unreachable_gw_ipv4() { + setup ns_set_v4x namespaces network icmp_filter xfrm || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.5.2 + run_cmd_err "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.5.3 + rc=0 + echo -e "$out" | grep -q -E 'From 10.1.4.2 icmp_seq.* Destination' || rc=1 + return "${rc}" +} + +test_mtu_ipv4_r2() { + setup ns_set_v4 namespaces network icmp_filter xfrm || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.6.2 + run_cmd "${ns_r2[@]}" ip route replace 10.1.3.0/24 dev eth1 src 10.1.3.2 mtu 1300 + run_cmd "${ns_r2[@]}" ip route replace 10.1.4.0/24 dev eth0 src 10.1.4.1 mtu 1300 + # shellcheck disable=SC1010 # -M do: do = dont-fragment, not shell keyword + run_cmd "${ns_a[@]}" ping -M do -s 1300 -W 5 -w 4 -c 1 10.1.6.2 || true + rc=0 + echo -e "$out" | grep -q -E "From 10.1.2.2 icmp_seq=.* Frag needed and DF set" || rc=1 + return "${rc}" +} + +test_mtu_ipv6_r2() { + setup ns_set_v6 namespaces network xfrm icmpv6_filter || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:6::2 + run_cmd "${ns_r2[@]}" ip -6 route replace fc00:3::/64 \ + dev eth1 metric 256 src fc00:3::2 mtu 1300 + run_cmd "${ns_r2[@]}" ip -6 route replace fc00:4::/64 \ + dev eth0 metric 256 src fc00:4::1 mtu 1300 + # shellcheck disable=SC1010 # -M do: do = dont-fragment, not shell keyword + run_cmd "${ns_a[@]}" ping -M do -s 1300 -W 5 -w 4 -c 1 fc00:6::2 || true + rc=0 + echo -e "$out" | grep -q -E "From fc00:2::2 icmp_seq=.* Packet too big: mtu=1230" || rc=1 + return "${rc}" +} + +test_mtu_ipv4_r3() { + setup ns_set_v4 namespaces network icmp_filter xfrm || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.6.2 + run_cmd "${ns_r3[@]}" ip route replace 10.1.6.0/24 dev eth0 mtu 1300 + # shellcheck disable=SC1010 # -M do: do = dont-fragment, not shell keyword + run_cmd "${ns_a[@]}" ping -M do -s 1350 -W 5 -w 4 -c 1 10.1.6.2 || true + rc=0 + echo -e "$out" | grep -q -E "From 10.1.5.2 .* Frag needed and DF set \(mtu = 1300\)" || rc=1 + return "${rc}" +} + +test_mtu_ipv4_s2() { + setup ns_set_v4x namespaces network icmp_filter xfrm || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.5.2 + run_cmd "${ns_s2[@]}" ip route replace 10.1.5.0/24 dev eth0 src 10.1.5.1 mtu 1300 + # shellcheck disable=SC1010 # -M do: do = dont-fragment, not shell keyword + run_cmd "${ns_a[@]}" ping -M do -s 1350 -W 5 -w 4 -c 1 10.1.5.2 || true + rc=0 + echo -e "$out" | grep -q -E "From 10.1.4.2.*Frag needed and DF set \(mtu = 1300\)" || rc=1 + return "${rc}" +} + +test_mtu_ipv6_s2() { + setup ns_set_v6x namespaces network xfrm icmpv6_filter || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:5::2 + run_cmd "${ns_s2[@]}" ip -6 route replace fc00:5::/64 dev eth0 metric 256 mtu 1300 + # shellcheck disable=SC1010 # -M do: do = dont-fragment, not shell keyword + run_cmd "${ns_a[@]}" ping -M do -s 1350 -W 5 -w 4 -c 1 fc00:5::2 || true + rc=0 + echo -e "$out" | grep -q -E "From fc00:4::2.*Packet too big: mtu=1300" || rc=1 + return "${rc}" +} + +test_mtu_ipv6_r3() { + setup ns_set_v6 namespaces network xfrm icmpv6_filter || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:6::2 + run_cmd "${ns_r3[@]}" ip -6 route replace fc00:6::/64 dev eth1 metric 256 mtu 1300 + # shellcheck disable=SC1010 # -M do: do = dont-fragment, not shell keyword + run_cmd "${ns_a[@]}" ping -M do -s 1300 -W 5 -w 4 -c 1 fc00:6::2 || true + rc=0 + echo -e "$out" | grep -q -E "From fc00:5::2 icmp_seq=.* Packet too big: mtu=1300" || rc=1 + return "${rc}" +} + +################################################################################ +# +usage() { + echo + echo "$0 [OPTIONS] [TEST]..." + echo "If no TEST argument is given, all tests will be run." + echo + echo -e "\t-p Pause on fail. Namespaces are kept for diagnostics" + echo -e "\t-P Pause after the test. Namespaces are kept for diagnostics" + echo -e "\t-v Verbose output. Show commands; -vv Show output and nft rules also" + echo "Available tests${tests}" + exit 1 +} + +################################################################################ +# +exitcode=0 +all_skipped=true +out= +cmd= + +while getopts :epPv o; do + case $o in + e) EXIT_ON_TEST_FAIL=yes ;; + P) PAUSE=always ;; + p) PAUSE=fail ;; + v) VERBOSE=$((VERBOSE + 1)) ;; + *) usage ;; + esac +done +shift $((OPTIND - 1)) + +IFS=$'\t\n' + +for arg; do + # Check first that all requested tests are available before running any + command -v "test_${arg}" >/dev/null || { + echo "=== Test ${arg} not found" + usage + } +done + +name="" +desc="" +fail="no" + +for t in ${tests}; do + [ "${name}" = "" ] && name="${t}" && continue + [ "${desc}" = "" ] && desc="${t}" + + run_this=1 + for arg; do + [ "${arg}" = "${name}" ] && run_this=1 && break + run_this=0 + done + if [ $run_this -eq 1 ]; then + run_test + fi + name="" + desc="" +done + +exit ${exitcode} diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk index 12e7cae251be..793a2fc33d9f 100644 --- a/tools/testing/selftests/net/ynl.mk +++ b/tools/testing/selftests/net/ynl.mk @@ -5,10 +5,11 @@ # Inputs: # # YNL_GENS: families we need in the selftests -# YNL_PROGS: TEST_PROGS which need YNL (TODO, none exist, yet) +# YNL_GEN_PROGS: TEST_GEN_PROGS which need YNL # YNL_GEN_FILES: TEST_GEN_FILES which need YNL -YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES)) +YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES)) \ + $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_PROGS)) YNL_SPECS := \ $(patsubst %,$(top_srcdir)/Documentation/netlink/specs/%.yaml,$(YNL_GENS)) @@ -27,7 +28,8 @@ $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig: $(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig $(Q)rm -f $(top_srcdir)/tools/net/ynl/libynl.a - $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a + $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl \ + GENS="$(YNL_GENS)" RSTS="" libynl.a $(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a EXTRA_CLEAN += \ |
