diff options
Diffstat (limited to 'tools/testing/selftests/drivers')
110 files changed, 14731 insertions, 1330 deletions
diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c index 6062723a172e..d78aec662586 100644 --- a/tools/testing/selftests/drivers/dma-buf/udmabuf.c +++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c @@ -16,7 +16,7 @@ #include <sys/mman.h> #include <linux/memfd.h> #include <linux/udmabuf.h> -#include "../../kselftest.h" +#include "kselftest.h" #define TEST_PREFIX "drivers/dma-buf/udmabuf" #define NUM_PAGES 4 @@ -138,7 +138,7 @@ int main(int argc, char *argv[]) void *addr1, *addr2; ksft_print_header(); - ksft_set_plan(6); + ksft_set_plan(7); devfd = open("/dev/udmabuf", O_RDWR); if (devfd < 0) { @@ -250,6 +250,24 @@ int main(int argc, char *argv[]) close(buf); close(memfd); + + /* same test as above but we pin first before writing to memfd */ + page_size = getpagesize() * 512; /* 2 MB */ + size = MEMFD_SIZE * page_size; + memfd = create_memfd_with_seals(size, true); + buf = create_udmabuf_list(devfd, memfd, size); + addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize()); + addr1 = mmap_fd(memfd, size); + write_to_memfd(addr1, size, 'a'); + write_to_memfd(addr1, size, 'b'); + ret = compare_chunks(addr1, addr2, size); + if (ret < 0) + ksft_test_result_fail("%s: [FAIL,test-7]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-7]\n", TEST_PREFIX); + + close(buf); + close(memfd); close(devfd); ksft_print_msg("%s: ok\n", TEST_PREFIX); diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore new file mode 100644 index 000000000000..585ecb4d5dc4 --- /dev/null +++ b/tools/testing/selftests/drivers/net/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +napi_id_helper +psp_responder diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 137470bdee0c..b72080c6d06b 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -1,18 +1,37 @@ # SPDX-License-Identifier: GPL-2.0 +CFLAGS += $(KHDR_INCLUDES) TEST_INCLUDES := $(wildcard lib/py/*.py) \ $(wildcard lib/sh/*.sh) \ - ../../net/net_helper.sh \ ../../net/lib.sh \ +TEST_GEN_FILES := \ + napi_id_helper \ +# end of TEST_GEN_FILES + TEST_PROGS := \ - netcons_basic.sh \ - netcons_overflow.sh \ + gro.py \ + hds.py \ + macsec.py \ + napi_id.py \ + napi_threaded.py \ + netpoll_basic.py \ ping.py \ + psp.py \ queues.py \ - stats.py \ + ring_reconfig.py \ shaper.py \ - hds.py \ + stats.py \ + xdp.py \ # end of TEST_PROGS +# YNL files, must be before "include ..lib.mk" +YNL_GEN_FILES := psp_responder +TEST_GEN_FILES += $(YNL_GEN_FILES) + include ../../lib.mk + +# YNL build +YNL_GENS := psp + +include ../../net/ynl.mk diff --git a/tools/testing/selftests/drivers/net/README.rst b/tools/testing/selftests/drivers/net/README.rst index 3b6a29e6564b..c8588436c224 100644 --- a/tools/testing/selftests/drivers/net/README.rst +++ b/tools/testing/selftests/drivers/net/README.rst @@ -26,6 +26,10 @@ The netdevice against which tests will be run must exist, be running Refer to list of :ref:`Variables` later in this file to set up running the tests against a real device. +The current support for bash tests restricts the use of the same interface name +on the local system and the remote one and will bail if this case is +encountered. + Both modes required ~~~~~~~~~~~~~~~~~~~ @@ -47,6 +51,10 @@ or:: # Variable set in a file NETIF=eth0 +Please note that the config parser is very simple, if there are +any non-alphanumeric characters in the value it needs to be in +double quotes. + Local test (which don't require endpoint for sending / receiving traffic) need only the ``NETIF`` variable. Remaining variables define the endpoint and communication method. @@ -62,6 +70,44 @@ LOCAL_V4, LOCAL_V6, REMOTE_V4, REMOTE_V6 Local and remote endpoint IP addresses. +LOCAL_PREFIX_V6 +~~~~~~~~~~~~~~~ + +Local IP prefix/subnet which can be used to allocate extra IP addresses (for +network name spaces behind macvlan, veth, netkit devices). DUT must be +reachable using these addresses from the endpoint. + +LOCAL_PREFIX_V6 must NOT match LOCAL_V6. + +Example: + NETIF = "eth0" + LOCAL_V6 = "2001:db8:1::1" + REMOTE_V6 = "2001:db8:1::2" + LOCAL_PREFIX_V6 = "2001:db8:2::0/64" + + +-----------------------------+ +------------------------------+ + dst | INIT NS | | TEST NS | + 2001: | +---------------+ | | | + db8:2::2| | NETIF | | bpf | | + +---|>| 2001:db8:1::1 | |redirect| +-------------------------+ | + | | | |-----------|--------|>| Netkit | | + | | +---------------+ | _peer | | nk_guest | | + | | +-------------+ Netkit pair | | | fe80::2/64 | | + | | | Netkit |.............|........|>| 2001:db8:2::2/64 | | + | | | nk_host | | | +-------------------------+ | + | | | fe80::1/64 | | | | + | | +-------------+ | | route: | + | | | | default | + | | route: | | via fe80::1 dev nk_guest | + | | 2001:db8:2::2/128 | +------------------------------+ + | | via fe80::2 dev nk_host | + | +-----------------------------+ + | + | +---------------+ + | | REMOTE | + +---| 2001:db8:1::2 | + +---------------+ + REMOTE_TYPE ~~~~~~~~~~~ @@ -134,3 +180,118 @@ Run the test:: ok 2 ping.test_v6 # SKIP Test requires IPv6 connectivity ok 3 ping.test_tcp # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:1 error:0 + +Dependencies +~~~~~~~~~~~~ + +The tests have a handful of dependencies. For Fedora / CentOS:: + + dnf -y install netsniff-ng python-yaml socat iperf3 + +Guidance for test authors +========================= + +This section mostly applies to Python tests but some of the guidance +may be more broadly applicable. + +Kernel config +~~~~~~~~~~~~~ + +Each test directory has a ``config`` file listing which kernel +configuration options the tests depend on. This file must be kept +up to date, the CIs build minimal kernels for each test group. + +Adding checks inside the tests to validate that the necessary kernel +configs are enabled is discouraged. The test author may include such +checks, but standalone patches to make tests compatible e.g. with +distro kernel configs are unlikely to be accepted. + +Avoid libraries and frameworks +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Test files should be relatively self contained. The libraries should +only include very core or non-trivial code. +It may be tempting to "factor out" the common code, but fight that urge. +Library code increases the barrier of entry, and complexity in general. + +Avoid mixing test code and boilerplate +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In Python, try to avoid adding code in the ``main()`` function which +instantiates ``NetDrvEnv()`` and calls ``ksft_run()``. It's okay to +set up global resources (e.g. open an RtNetlink socket used by multiple +tests), but any complex logic, test-specific environment configuration +and validation should be done in the tests (even if it means it has to +be repeated). + +Local host is the DUT +~~~~~~~~~~~~~~~~~~~~~ + +Dual-host tests (tests with an endpoint) should be written from the DUT +perspective. IOW the local machine should be the one tested, remote is +just for traffic generation. + +Avoid modifying remote +~~~~~~~~~~~~~~~~~~~~~~ + +Avoid making configuration changes to the remote system as much as possible. +Remote system may be used concurrently by multiple DUTs. + +defer() +~~~~~~~ + +The env must be clean after test exits. Register a ``defer()`` for any +action that needs an "undo" as soon as possible. If you need to run +the cancel action as part of the test - ``defer()`` returns an object +you can ``.exec()``-ute. + +ksft_pr() +~~~~~~~~~ + +Use ``ksft_pr()`` instead of ``print()`` to avoid breaking TAP format. + +ksft_disruptive +~~~~~~~~~~~~~~~ + +By default the tests are expected to be able to run on +single-interface systems. All tests which may disconnect ``NETIF`` +must be annotated with ``@ksft_disruptive``. + +ksft_variants +~~~~~~~~~~~~~ + +Use the ``@ksft_variants`` decorator to run a test with multiple sets +of inputs as separate test cases. This avoids duplicating test functions +that only differ in parameters. + +Parameters can be a single value, a tuple, or a ``KsftNamedVariant`` +(which gives an explicit name to the sub-case). The argument to the +decorator can be a list or a generator. + +Example:: + + @ksft_variants([ + KsftNamedVariant("main", False), + KsftNamedVariant("ctx", True), + ]) + def resize_periodic(cfg, create_context): + # test body receives (cfg, create_context) where create_context + # is False for the "main" variant and True for "ctx" + pass + +or:: + + def _gro_variants(): + for mode in ["sw", "hw"]: + for protocol in ["tcp4", "tcp6"]: + yield (mode, protocol) + + @ksft_variants(_gro_variants()) + def test(cfg, mode, protocol): + pass + +Running tests CI-style +====================== + +See https://github.com/linux-netdev/nipa/wiki for instructions on how +to easily run the tests using ``virtme-ng``. diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 2b10854e4b1e..9af5f84edd37 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -4,21 +4,30 @@ TEST_PROGS := \ bond-arp-interval-causes-panic.sh \ bond-break-lacpdu-tx.sh \ + bond-eth-type-change.sh \ bond-lladdr-target.sh \ + bond_ipsec_offload.sh \ + bond_lacp_prio.sh \ + bond_macvlan_ipvlan.sh \ + bond_options.sh \ + bond_passive_lacp.sh \ + bond_stacked_header_parse.sh \ dev_addr_lists.sh \ mode-1-recovery-updelay.sh \ mode-2-recovery-updelay.sh \ - bond_options.sh \ - bond-eth-type-change.sh \ - bond_macvlan_ipvlan.sh + netcons_over_bonding.sh \ +# end of TEST_PROGS TEST_FILES := \ - lag_lib.sh \ bond_topo_2d1c.sh \ - bond_topo_3d1c.sh + bond_topo_3d1c.sh \ + lag_lib.sh \ +# end of TEST_FILES TEST_INCLUDES := \ + ../../../net/lib.sh \ + ../lib/sh/lib_netcons.sh \ ../../../net/forwarding/lib.sh \ - ../../../net/lib.sh +# end of TEST_INCLUDES include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh b/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh new file mode 100755 index 000000000000..f09e100232c7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh @@ -0,0 +1,156 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# IPsec over bonding offload test: +# +# +----------------+ +# | bond0 | +# | | | +# | eth0 eth1 | +# +---+-------+----+ +# +# We use netdevsim instead of physical interfaces +#------------------------------------------------------------------- +# Example commands +# ip x s add proto esp src 192.0.2.1 dst 192.0.2.2 \ +# spi 0x07 mode transport reqid 0x07 replay-window 32 \ +# aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \ +# sel src 192.0.2.1/24 dst 192.0.2.2/24 +# offload dev bond0 dir out +# ip x p add dir out src 192.0.2.1/24 dst 192.0.2.2/24 \ +# tmpl proto esp src 192.0.2.1 dst 192.0.2.2 \ +# spi 0x07 mode transport reqid 0x07 +# +#------------------------------------------------------------------- + +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/lib.sh +srcip=192.0.2.1 +dstip=192.0.2.2 +ipsec0=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/ipsec +ipsec1=/sys/kernel/debug/netdevsim/netdevsim0/ports/1/ipsec +active_slave="" + +# shellcheck disable=SC2317 +active_slave_changed() +{ + local old_active_slave=$1 + local new_active_slave + + # shellcheck disable=SC2154 + new_active_slave=$(ip -n "${ns}" -d -j link show bond0 | \ + jq -r ".[].linkinfo.info_data.active_slave") + [ "$new_active_slave" != "$old_active_slave" ] && [ "$new_active_slave" != "null" ] +} + +test_offload() +{ + # use ping to exercise the Tx path + ip netns exec "$ns" ping -I bond0 -c 3 -W 1 -i 0 "$dstip" >/dev/null + + active_slave=$(ip -n "${ns}" -d -j link show bond0 | \ + jq -r ".[].linkinfo.info_data.active_slave") + + if [ "$active_slave" = "$nic0" ]; then + sysfs=$ipsec0 + elif [ "$active_slave" = "$nic1" ]; then + sysfs=$ipsec1 + else + check_err 1 "bond_ipsec_offload invalid active_slave $active_slave" + fi + + # The tx/rx order in sysfs may changed after failover + grep -q "SA count=2 tx=3" "$sysfs" && grep -q "tx ipaddr=$dstip" "$sysfs" + check_err $? "incorrect tx count with link ${active_slave}" + + log_test bond_ipsec_offload "active_slave ${active_slave}" +} + +setup_env() +{ + if ! mount | grep -q debugfs; then + mount -t debugfs none /sys/kernel/debug/ &> /dev/null + defer umount /sys/kernel/debug/ + + fi + + # setup netdevsim since dummy/veth dev doesn't have offload support + if [ ! -w /sys/bus/netdevsim/new_device ] ; then + if ! modprobe -q netdevsim; then + echo "SKIP: can't load netdevsim for ipsec offload" + # shellcheck disable=SC2154 + exit "$ksft_skip" + fi + defer modprobe -r netdevsim + fi + + setup_ns ns + defer cleanup_ns "$ns" +} + +setup_bond() +{ + ip -n "$ns" link add bond0 type bond mode active-backup miimon 100 + ip -n "$ns" addr add "$srcip/24" dev bond0 + ip -n "$ns" link set bond0 up + + echo "0 2" | ip netns exec "$ns" tee /sys/bus/netdevsim/new_device >/dev/null + nic0=$(ip netns exec "$ns" ls /sys/bus/netdevsim/devices/netdevsim0/net | head -n 1) + nic1=$(ip netns exec "$ns" ls /sys/bus/netdevsim/devices/netdevsim0/net | tail -n 1) + ip -n "$ns" link set "$nic0" master bond0 + ip -n "$ns" link set "$nic1" master bond0 + + # we didn't create a peer, make sure we can Tx by adding a permanent + # neighbour this need to be added after enslave + ip -n "$ns" neigh add "$dstip" dev bond0 lladdr 00:11:22:33:44:55 + + # create offloaded SAs, both in and out + ip -n "$ns" x p add dir out src "$srcip/24" dst "$dstip/24" \ + tmpl proto esp src "$srcip" dst "$dstip" spi 9 \ + mode transport reqid 42 + + ip -n "$ns" x p add dir in src "$dstip/24" dst "$srcip/24" \ + tmpl proto esp src "$dstip" dst "$srcip" spi 9 \ + mode transport reqid 42 + + ip -n "$ns" x s add proto esp src "$srcip" dst "$dstip" spi 9 \ + mode transport reqid 42 aead "rfc4106(gcm(aes))" \ + 0x3132333435363738393031323334353664636261 128 \ + sel src "$srcip/24" dst "$dstip/24" \ + offload dev bond0 dir out + + ip -n "$ns" x s add proto esp src "$dstip" dst "$srcip" spi 9 \ + mode transport reqid 42 aead "rfc4106(gcm(aes))" \ + 0x3132333435363738393031323334353664636261 128 \ + sel src "$dstip/24" dst "$srcip/24" \ + offload dev bond0 dir in + + # does offload show up in ip output + lines=$(ip -n "$ns" x s list | grep -c "crypto offload parameters: dev bond0 dir") + if [ "$lines" -ne 2 ] ; then + check_err 1 "bond_ipsec_offload SA offload missing from list output" + fi +} + +trap defer_scopes_cleanup EXIT +setup_env +setup_bond + +# start Offload testing +test_offload + +# do failover and re-test +ip -n "$ns" link set "$active_slave" down +slowwait 5 active_slave_changed "$active_slave" +test_offload + +# make sure offload get removed from driver +ip -n "$ns" x s flush +ip -n "$ns" x p flush +line0=$(grep -c "SA count=0" "$ipsec0") +line1=$(grep -c "SA count=0" "$ipsec1") +[ "$line0" -ne 1 ] || [ "$line1" -ne 1 ] +check_fail $? "bond_ipsec_offload SA not removed from driver" + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh new file mode 100755 index 000000000000..a483d505c6a8 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing if bond lacp per port priority works +# +# Switch (s_ns) Backup Switch (b_ns) +# +-------------------------+ +-------------------------+ +# | bond0 | | bond0 | +# | + | | + | +# | eth0 | eth1 | | eth0 | eth1 | +# | +---+---+ | | +---+---+ | +# | | | | | | | | +# +-------------------------+ +-------------------------+ +# | | | | +# +-----------------------------------------------------+ +# | | | | | | +# | +-------+---------+---------+-------+ | +# | eth0 eth1 | eth2 eth3 | +# | + | +# | bond0 | +# +-----------------------------------------------------+ +# Client (c_ns) + +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/lib.sh + +setup_links() +{ + # shellcheck disable=SC2154 + ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}" + ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}" + # shellcheck disable=SC2154 + ip -n "${c_ns}" link add eth2 type veth peer name eth0 netns "${b_ns}" + ip -n "${c_ns}" link add eth3 type veth peer name eth1 netns "${b_ns}" + + ip -n "${c_ns}" link add bond0 type bond mode 802.3ad miimon 100 \ + lacp_rate fast ad_select actor_port_prio + ip -n "${s_ns}" link add bond0 type bond mode 802.3ad miimon 100 \ + lacp_rate fast + ip -n "${b_ns}" link add bond0 type bond mode 802.3ad miimon 100 \ + lacp_rate fast + + ip -n "${c_ns}" link set eth0 master bond0 + ip -n "${c_ns}" link set eth1 master bond0 + ip -n "${c_ns}" link set eth2 master bond0 + ip -n "${c_ns}" link set eth3 master bond0 + ip -n "${s_ns}" link set eth0 master bond0 + ip -n "${s_ns}" link set eth1 master bond0 + ip -n "${b_ns}" link set eth0 master bond0 + ip -n "${b_ns}" link set eth1 master bond0 + + ip -n "${c_ns}" link set bond0 up + ip -n "${s_ns}" link set bond0 up + ip -n "${b_ns}" link set bond0 up +} + +test_port_prio_setting() +{ + RET=0 + ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 1000 + prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth0" \ + ".[].linkinfo.info_slave_data.actor_port_prio") + [ "$prio" -ne 1000 ] && RET=1 + ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 10 + prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth2" \ + ".[].linkinfo.info_slave_data.actor_port_prio") + [ "$prio" -ne 10 ] && RET=1 +} + +test_agg_reselect() +{ + local bond_agg_id slave_agg_id + local expect_slave="$1" + RET=0 + + # Trigger link state change to reselect the aggregator + ip -n "${c_ns}" link set eth1 down + sleep 0.5 + ip -n "${c_ns}" link set eth1 up + sleep 0.5 + + bond_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show bond0" \ + ".[].linkinfo.info_data.ad_info.aggregator") + slave_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show $expect_slave" \ + ".[].linkinfo.info_slave_data.ad_aggregator_id") + # shellcheck disable=SC2034 + [ "${bond_agg_id}" -ne "${slave_agg_id}" ] && \ + RET=1 +} + +trap cleanup_all_ns EXIT +setup_ns c_ns s_ns b_ns +setup_links + +test_port_prio_setting +log_test "bond 802.3ad" "actor_port_prio setting" + +test_agg_reselect eth0 +log_test "bond 802.3ad" "actor_port_prio select" + +# Change the actor port prio and re-test +ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 10 +ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 1000 +test_agg_reselect eth2 +log_test "bond 802.3ad" "actor_port_prio switch" + +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh index c4711272fe45..559f300f965a 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh @@ -30,6 +30,7 @@ check_connection() local message=${3} RET=0 + sleep 0.25 ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null check_err $? "ping failed" log_test "${bond_mode}/${xvlan_type}_${xvlan_mode}: ${message}" diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index edc56e2cc606..187b478d0ddf 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -7,12 +7,14 @@ ALL_TESTS=" prio arp_validate num_grat_arp + fail_over_mac + vlan_over_bond " lib_dir=$(dirname "$0") source ${lib_dir}/bond_topo_3d1c.sh -c_maddr="33:33:00:00:00:10" -g_maddr="33:33:00:00:02:54" +c_maddr="33:33:ff:00:00:10" +g_maddr="33:33:ff:00:02:54" skip_prio() { @@ -352,8 +354,8 @@ garp_test() exp_num=$(echo "${param}" | cut -f6 -d ' ') active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") - slowwait_for_counter $((exp_num + 5)) $exp_num \ - tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}" + slowwait_for_counter $((exp_num + 5)) $exp_num tc_rule_handle_stats_get \ + "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}" &> /dev/null # check result real_num=$(tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}") @@ -376,6 +378,197 @@ num_grat_arp() done } +check_all_mac_same() +{ + RET=0 + # all slaves should have same mac address (with the first port's mac) + local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]') + local eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]') + local eth1_mac=$(ip -n "$s_ns" -j link show eth1 | jq -r '.[]["address"]') + local eth2_mac=$(ip -n "$s_ns" -j link show eth2 | jq -r '.[]["address"]') + if [ "$bond_mac" != "${mac[0]}" ] || [ "$eth0_mac" != "$bond_mac" ] || \ + [ "$eth1_mac" != "$bond_mac" ] || [ "$eth2_mac" != "$bond_mac" ]; then + RET=1 + fi +} + +check_bond_mac_same_with_first() +{ + RET=0 + # bond mac address should be same with the first added slave + local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]') + if [ "$bond_mac" != "${mac[0]}" ]; then + RET=1 + fi +} + +check_bond_mac_same_with_active() +{ + RET=0 + # bond mac address should be same with active slave + local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]') + local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") + local active_slave_mac=$(ip -n "$s_ns" -j link show "$active_slave" | jq -r '.[]["address"]') + if [ "$bond_mac" != "$active_slave_mac" ]; then + RET=1 + fi +} + +check_backup_slave_mac_not_change() +{ + RET=0 + # backup slave's mac address is not changed + if ip -n "$s_ns" -d -j link show type bond_slave | jq -e '.[] + | select(.linkinfo.info_slave_data.state=="BACKUP") + | select(.address != .linkinfo.info_slave_data.perm_hwaddr)' &> /dev/null; then + RET=1 + fi +} + +check_backup_slave_mac_inherit() +{ + local backup_mac + RET=0 + + # backup slaves should use mac[1] or mac[2] + local backup_macs=$(ip -n "$s_ns" -d -j link show type bond_slave | \ + jq -r '.[] | select(.linkinfo.info_slave_data.state=="BACKUP") | .address') + for backup_mac in $backup_macs; do + if [ "$backup_mac" != "${mac[1]}" ] && [ "$backup_mac" != "${mac[2]}" ]; then + RET=1 + fi + done +} + +check_first_slave_random_mac() +{ + RET=0 + # remove the first added slave and added it back + ip -n "$s_ns" link set eth0 nomaster + ip -n "$s_ns" link set eth0 master bond0 + + # the first slave should use random mac address + eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]') + [ "$eth0_mac" = "${mac[0]}" ] && RET=1 + log_test "bond fail_over_mac follow" "random first slave mac" + + # remove the first slave, the permanent MAC address should be restored back + ip -n "$s_ns" link set eth0 nomaster + eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]') + [ "$eth0_mac" != "${mac[0]}" ] && RET=1 +} + +do_active_backup_failover() +{ + local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") + ip -n ${s_ns} link set ${active_slave} down + slowwait 2 active_slave_changed $active_slave + ip -n ${s_ns} link set ${active_slave} up +} + +fail_over_mac() +{ + # Bring down the first interface on the switch to force the bond to + # select another active interface instead of the first one that joined. + ip -n "$g_ns" link set s0 down + + # fail_over_mac none + bond_reset "mode active-backup miimon 100 fail_over_mac 0" + check_all_mac_same + log_test "fail_over_mac 0" "all slaves have same mac" + do_active_backup_failover + check_all_mac_same + log_test "fail_over_mac 0" "failover: all slaves have same mac" + + # fail_over_mac active + bond_reset "mode active-backup miimon 100 fail_over_mac 1" + check_bond_mac_same_with_active + log_test "fail_over_mac 1" "bond mac is same with active slave mac" + check_backup_slave_mac_not_change + log_test "fail_over_mac 1" "backup slave mac is not changed" + do_active_backup_failover + check_bond_mac_same_with_active + log_test "fail_over_mac 1" "failover: bond mac is same with active slave mac" + check_backup_slave_mac_not_change + log_test "fail_over_mac 1" "failover: backup slave mac is not changed" + + # fail_over_mac follow + bond_reset "mode active-backup miimon 100 fail_over_mac 2" + check_bond_mac_same_with_first + log_test "fail_over_mac 2" "bond mac is same with first slave mac" + check_bond_mac_same_with_active + log_test "fail_over_mac 2" "bond mac is same with active slave mac" + check_backup_slave_mac_inherit + log_test "fail_over_mac 2" "backup slave mac inherit" + do_active_backup_failover + check_bond_mac_same_with_first + log_test "fail_over_mac 2" "failover: bond mac is same with first slave mac" + check_bond_mac_same_with_active + log_test "fail_over_mac 2" "failover: bond mac is same with active slave mac" + check_backup_slave_mac_inherit + log_test "fail_over_mac 2" "failover: backup slave mac inherit" + check_first_slave_random_mac + log_test "fail_over_mac 2" "first slave mac random" +} + +vlan_over_bond_arp() +{ + local mode="$1" + RET=0 + + bond_reset "mode $mode arp_interval 100 arp_ip_target 192.0.3.10" + ip -n "${s_ns}" link add bond0.3 link bond0 type vlan id 3 + ip -n "${s_ns}" link set bond0.3 up + ip -n "${s_ns}" addr add 192.0.3.1/24 dev bond0.3 + ip -n "${s_ns}" addr add 2001:db8::3:1/64 dev bond0.3 + + slowwait_for_counter 5 5 tc_rule_handle_stats_get \ + "dev eth0.3 ingress" 101 ".packets" "-n ${c_ns}" &> /dev/null || RET=1 + log_test "vlan over bond arp" "$mode" +} + +vlan_over_bond_ns() +{ + local mode="$1" + RET=0 + + if skip_ns; then + log_test_skip "vlan_over_bond ns" "$mode" + return 0 + fi + + bond_reset "mode $mode arp_interval 100 ns_ip6_target 2001:db8::3:10" + ip -n "${s_ns}" link add bond0.3 link bond0 type vlan id 3 + ip -n "${s_ns}" link set bond0.3 up + ip -n "${s_ns}" addr add 192.0.3.1/24 dev bond0.3 + ip -n "${s_ns}" addr add 2001:db8::3:1/64 dev bond0.3 + + slowwait_for_counter 5 5 tc_rule_handle_stats_get \ + "dev eth0.3 ingress" 102 ".packets" "-n ${c_ns}" &> /dev/null || RET=1 + log_test "vlan over bond ns" "$mode" +} + +vlan_over_bond() +{ + # add vlan 3 for client + ip -n "${c_ns}" link add eth0.3 link eth0 type vlan id 3 + ip -n "${c_ns}" link set eth0.3 up + ip -n "${c_ns}" addr add 192.0.3.10/24 dev eth0.3 + ip -n "${c_ns}" addr add 2001:db8::3:10/64 dev eth0.3 + + # Add tc rule to check the vlan pkts + tc -n "${c_ns}" qdisc add dev eth0.3 clsact + tc -n "${c_ns}" filter add dev eth0.3 ingress protocol arp \ + handle 101 flower skip_hw arp_op request \ + arp_sip 192.0.3.1 arp_tip 192.0.3.10 action pass + tc -n "${c_ns}" filter add dev eth0.3 ingress protocol ipv6 \ + handle 102 flower skip_hw ip_proto icmpv6 \ + type 135 src_ip 2001:db8::3:1 action pass + + vlan_over_bond_arp "active-backup" + vlan_over_bond_ns "active-backup" +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh new file mode 100755 index 000000000000..9c3b089813df --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test if a bond interface works with lacp_active=off. + +# shellcheck disable=SC2034 +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/forwarding/lib.sh + +# shellcheck disable=SC2317 +check_port_state() +{ + local netns=$1 + local port=$2 + local state=$3 + + ip -n "${netns}" -d -j link show "$port" | \ + jq -e ".[].linkinfo.info_slave_data.ad_actor_oper_port_state_str | index(\"${state}\") != null" > /dev/null +} + +check_pkt_count() +{ + RET=0 + local ns="$1" + local iface="$2" + + # wait 65s, one per 30s + slowwait_for_counter 65 2 tc_rule_handle_stats_get \ + "dev ${iface} egress" 101 ".packets" "-n ${ns}" &> /dev/null +} + +setup() { + setup_ns c_ns s_ns + + # shellcheck disable=SC2154 + ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}" + ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}" + + # Add tc filter to count the pkts + tc -n "${c_ns}" qdisc add dev eth0 clsact + tc -n "${c_ns}" filter add dev eth0 egress handle 101 protocol 0x8809 matchall action pass + tc -n "${s_ns}" qdisc add dev eth1 clsact + tc -n "${s_ns}" filter add dev eth1 egress handle 101 protocol 0x8809 matchall action pass + + ip -n "${s_ns}" link add bond0 type bond mode 802.3ad lacp_active on lacp_rate fast + ip -n "${s_ns}" link set eth0 master bond0 + ip -n "${s_ns}" link set eth1 master bond0 + + ip -n "${c_ns}" link add bond0 type bond mode 802.3ad lacp_active off lacp_rate fast + ip -n "${c_ns}" link set eth0 master bond0 + ip -n "${c_ns}" link set eth1 master bond0 + +} + +trap cleanup_all_ns EXIT +setup + +# The bond will send 2 lacpdu pkts during init time, let's wait at least 2s +# after interface up +ip -n "${c_ns}" link set bond0 up +sleep 2 + +# 1. The passive side shouldn't send LACPDU. +check_pkt_count "${c_ns}" "eth0" && RET=1 +log_test "802.3ad lacp_active off" "init port" + +ip -n "${s_ns}" link set bond0 up +# 2. The passive side should not have the 'active' flag. +RET=0 +slowwait 2 check_port_state "${c_ns}" "eth0" "active" && RET=1 +log_test "802.3ad lacp_active off" "port state active" + +# 3. The active side should have the 'active' flag. +RET=0 +slowwait 2 check_port_state "${s_ns}" "eth0" "active" || RET=1 +log_test "802.3ad lacp_active on" "port state active" + +# 4. Make sure the connection is not expired. +RET=0 +slowwait 5 check_port_state "${s_ns}" "eth0" "distributing" +slowwait 10 check_port_state "${s_ns}" "eth0" "expired" && RET=1 +log_test "bond 802.3ad lacp_active off" "port connection" + +# After testing, disconnect one port on each side to check the state. +ip -n "${s_ns}" link set eth0 nomaster +ip -n "${s_ns}" link set eth0 up +ip -n "${c_ns}" link set eth1 nomaster +ip -n "${c_ns}" link set eth1 up +# Due to Periodic Machine and Rx Machine state change, the bond will still +# send lacpdu pkts in a few seconds. sleep at lease 5s to make sure +# negotiation finished +sleep 5 + +# 5. The active side should keep sending LACPDU. +check_pkt_count "${s_ns}" "eth1" || RET=1 +log_test "bond 802.3ad lacp_active on" "port pkt after disconnect" + +# 6. The passive side shouldn't send LACPDU anymore. +check_pkt_count "${c_ns}" "eth0" && RET=1 +log_test "bond 802.3ad lacp_active off" "port pkt after disconnect" + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/bond_stacked_header_parse.sh b/tools/testing/selftests/drivers/net/bonding/bond_stacked_header_parse.sh new file mode 100755 index 000000000000..36bcdef711b0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_stacked_header_parse.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test that bond_header_parse() does not infinitely recurse with stacked bonds. +# +# When a non-Ethernet device (e.g. GRE) is enslaved to a bond that is itself +# enslaved to another bond (bond1 -> bond0 -> gre), receiving a packet via +# AF_PACKET SOCK_DGRAM triggers dev_parse_header() -> bond_header_parse(). +# Since parse() used skb->dev (always the topmost bond) instead of a passed-in +# dev pointer, it would recurse back into itself indefinitely. + +# shellcheck disable=SC2034 +ALL_TESTS=" + bond_test_stacked_header_parse +" +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh + +# shellcheck disable=SC2329 +bond_test_stacked_header_parse() +{ + local devdummy="test-dummy0" + local devgre="test-gre0" + local devbond0="test-bond0" + local devbond1="test-bond1" + + # shellcheck disable=SC2034 + RET=0 + + # Setup: dummy -> gre -> bond0 -> bond1 + ip link add name "$devdummy" type dummy + ip addr add 10.0.0.1/24 dev "$devdummy" + ip link set "$devdummy" up + + ip link add name "$devgre" type gre local 10.0.0.1 + + ip link add name "$devbond0" type bond mode active-backup + ip link add name "$devbond1" type bond mode active-backup + + ip link set "$devgre" master "$devbond0" + ip link set "$devbond0" master "$devbond1" + + ip link set "$devgre" up + ip link set "$devbond0" up + ip link set "$devbond1" up + + # tcpdump on a non-Ethernet bond uses AF_PACKET SOCK_DGRAM (cooked + # capture), which triggers dev_parse_header() -> bond_header_parse() + # on receive. With the bug, this recurses infinitely. + timeout 5 tcpdump -c 1 -i "$devbond1" >/dev/null 2>&1 & + local tcpdump_pid=$! + sleep 1 + + # Send a GRE packet to 10.0.0.1 so it arrives via gre -> bond0 -> bond1 + python3 -c "from scapy.all import *; send(IP(src='10.0.0.2', dst='10.0.0.1')/GRE()/IP()/UDP(), verbose=0)" + check_err $? "failed to send GRE packet (scapy installed?)" + + wait "$tcpdump_pid" 2>/dev/null + + ip link del "$devbond1" 2>/dev/null + ip link del "$devbond0" 2>/dev/null + ip link del "$devgre" 2>/dev/null + ip link del "$devdummy" 2>/dev/null + + log_test "Stacked bond header_parse does not recurse" +} + +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh index 195ef83cfbf1..167aa4a4a12a 100644 --- a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh @@ -39,6 +39,8 @@ g_ip4="192.0.2.254" s_ip6="2001:db8::1" c_ip6="2001:db8::10" g_ip6="2001:db8::254" +mac[0]="00:0a:0b:0c:0d:01" +mac[1]="00:0a:0b:0c:0d:02" gateway_create() { @@ -62,6 +64,7 @@ server_create() for i in $(seq 0 1); do ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns} + ip -n "${s_ns}" link set "eth${i}" addr "${mac[$i]}" ip -n ${g_ns} link set s${i} up ip -n ${g_ns} link set s${i} master br0 diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh index 3a1333d9a85b..23a2932301cc 100644 --- a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh @@ -26,6 +26,7 @@ # +-------------------------------------+ source bond_topo_2d1c.sh +mac[2]="00:0a:0b:0c:0d:03" setup_prepare() { @@ -36,6 +37,7 @@ setup_prepare() # Add the extra device as we use 3 down links for bond0 local i=2 ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns} + ip -n "${s_ns}" link set "eth${i}" addr "${mac[$i]}" ip -n ${g_ns} link set s${i} up ip -n ${g_ns} link set s${i} master br0 ip -n ${s_ns} link set eth${i} master bond0 diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config index dad4e5fda4db..b62c70715293 100644 --- a/tools/testing/selftests/drivers/net/bonding/config +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -1,11 +1,22 @@ CONFIG_BONDING=y CONFIG_BRIDGE=y +CONFIG_CONFIGFS_FS=y CONFIG_DUMMY=y +CONFIG_INET_ESP=y +CONFIG_INET_ESP_OFFLOAD=y CONFIG_IPV6=y -CONFIG_MACVLAN=y CONFIG_IPVLAN=y +CONFIG_MACVLAN=y CONFIG_NET_ACT_GACT=y CONFIG_NET_CLS_FLOWER=y +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NETCONSOLE=m +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_NETCONSOLE_EXTENDED_LOG=y +CONFIG_NETDEVSIM=m +CONFIG_NET_IPGRE=y CONFIG_NET_SCH_INGRESS=y CONFIG_NLMON=y CONFIG_VETH=y +CONFIG_VLAN_8021Q=m +CONFIG_XFRM_USER=m diff --git a/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh new file mode 100755 index 000000000000..477cc9379500 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh @@ -0,0 +1,361 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 +# +# This selftest exercises trying to have multiple netpoll users at the same +# time. +# +# This selftest has multiple smalls test inside, and the goal is to +# get interfaces with bonding and netconsole in different orders in order +# to catch any possible issue. +# +# The main test composes of four interfaces being created using netdevsim; two +# of them are bonded to serve as the netconsole's transmit interface. The +# remaining two interfaces are similarly bonded and assigned to a separate +# network namespace, which acts as the receive interface, where socat monitors +# for incoming messages. +# +# A netconsole message is then sent to ensure it is properly received across +# this configuration. +# +# Later, run a few other tests, to make sure that bonding and netconsole +# cannot coexist. +# +# The test's objective is to exercise netpoll usage when managed simultaneously +# by multiple subsystems (netconsole and bonding). +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true +modprobe bonding 2> /dev/null || true +modprobe veth 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup_bond EXIT + +FORMAT="extended" +IP_VERSION="ipv4" +VETH0="veth"$(( RANDOM % 256)) +VETH1="veth"$((256 + RANDOM % 256)) +TXNS="" +RXNS="" + +# Create "bond_tx_XX" and "bond_rx_XX" interfaces, and set DSTIF and SRCIF with +# the bonding interfaces +function setup_bonding_ifaces() { + local RAND=$(( RANDOM % 100 )) + BOND_TX_MAIN_IF="bond_tx_$RAND" + BOND_RX_MAIN_IF="bond_rx_$RAND" + + # Setup TX + if ! ip -n "${TXNS}" link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr + then + echo "Failed to create bond TX interface. Is CONFIG_BONDING set?" >&2 + # only clean nsim ifaces and namespace. Nothing else has been + # initialized + cleanup_bond_nsim + trap - EXIT + exit "${ksft_skip}" + fi + + # create_netdevsim() got the interface up, but it needs to be down + # before being enslaved. + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" down + ip -n "${TXNS}" \ + link set "${BOND_TX2_SLAVE_IF}" down + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + ip -n "${TXNS}" \ + link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + ip -n "${TXNS}" \ + link set "${BOND_TX_MAIN_IF}" up + + # Setup RX + ip -n "${RXNS}" \ + link add "${BOND_RX_MAIN_IF}" type bond mode balance-rr + ip -n "${RXNS}" \ + link set "${BOND_RX1_SLAVE_IF}" down + ip -n "${RXNS}" \ + link set "${BOND_RX2_SLAVE_IF}" down + ip -n "${RXNS}" \ + link set "${BOND_RX1_SLAVE_IF}" master "${BOND_RX_MAIN_IF}" + ip -n "${RXNS}" \ + link set "${BOND_RX2_SLAVE_IF}" master "${BOND_RX_MAIN_IF}" + ip -n "${RXNS}" \ + link set "${BOND_RX_MAIN_IF}" up + + export DSTIF="${BOND_RX_MAIN_IF}" + export SRCIF="${BOND_TX_MAIN_IF}" +} + +# Create 4 netdevsim interfaces. Two of them will be bound to TX bonding iface +# and the other two will be bond to the RX interface (on the other namespace) +function create_ifaces_bond() { + BOND_TX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_1}" "${TXNS}") + BOND_TX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_2}" "${TXNS}") + BOND_RX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_1}" "${RXNS}") + BOND_RX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_2}" "${RXNS}") +} + +# netdevsim link BOND_TX to BOND_RX interfaces +function link_ifaces_bond() { + local BOND_TX1_SLAVE_IFIDX + local BOND_TX2_SLAVE_IFIDX + local BOND_RX1_SLAVE_IFIDX + local BOND_RX2_SLAVE_IFIDX + local TXNS_FD + local RXNS_FD + + BOND_TX1_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \ + cat /sys/class/net/"$BOND_TX1_SLAVE_IF"/ifindex) + BOND_TX2_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \ + cat /sys/class/net/"$BOND_TX2_SLAVE_IF"/ifindex) + BOND_RX1_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \ + cat /sys/class/net/"$BOND_RX1_SLAVE_IF"/ifindex) + BOND_RX2_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \ + cat /sys/class/net/"$BOND_RX2_SLAVE_IF"/ifindex) + + exec {TXNS_FD}</var/run/netns/"${TXNS}" + exec {RXNS_FD}</var/run/netns/"${RXNS}" + + # Linking TX ifaces to the RX ones (on the other namespace) + echo "${TXNS_FD}:$BOND_TX1_SLAVE_IFIDX $RXNS_FD:$BOND_RX1_SLAVE_IFIDX" \ + > "$NSIM_DEV_SYS_LINK" + echo "${TXNS_FD}:$BOND_TX2_SLAVE_IFIDX $RXNS_FD:$BOND_RX2_SLAVE_IFIDX" \ + > "$NSIM_DEV_SYS_LINK" + + exec {TXNS_FD}<&- + exec {RXNS_FD}<&- +} + +function create_all_ifaces() { + # setup_ns function is coming from lib.sh + setup_ns TXNS RXNS + export NAMESPACE="${RXNS}" + + # Create two interfaces for RX and two for TX + create_ifaces_bond + # Link netlink ifaces + link_ifaces_bond +} + +# configure DSTIF and SRCIF IPs +function configure_ifaces_ips() { + local IP_VERSION=${1:-"ipv4"} + select_ipv4_or_ipv6 "${IP_VERSION}" + + ip -n "${RXNS}" addr add "${DSTIP}"/24 dev "${DSTIF}" + ip -n "${RXNS}" link set "${DSTIF}" up + + ip -n "${TXNS}" addr add "${SRCIP}"/24 dev "${SRCIF}" + ip -n "${TXNS}" link set "${SRCIF}" up +} + +function test_enable_netpoll_on_enslaved_iface() { + echo 0 > "${NETCONS_PATH}"/enabled + + # At this stage, BOND_TX1_SLAVE_IF is enslaved to BOND_TX_MAIN_IF, and + # linked to BOND_RX1_SLAVE_IF inside the namespace. + echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name + + # This should fail with the following message in dmesg: + # netpoll: netconsole: ethX is a slave device, aborting + set +e + enable_netcons_ns 2> /dev/null + set -e + + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]] + then + echo "test failed: Bonding and netpoll cannot co-exists." >&2 + exit "${ksft_fail}" + fi +} + +function test_delete_bond_and_reenable_target() { + ip -n "${TXNS}" \ + link delete "${BOND_TX_MAIN_IF}" type bond + + # BOND_TX1_SLAVE_IF is not attached to a bond interface anymore + # netpoll can be plugged in there + echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name + + # this should work, since the interface is not enslaved + enable_netcons_ns + + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] + then + echo "test failed: Unable to start netpoll on an unbond iface." >&2 + exit "${ksft_fail}" + fi +} + +# Send a netconsole message to the netconsole target +function test_send_netcons_msg_through_bond_iface() { + # Listen for netconsole port inside the namespace and + # destination interface + listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" & + # Wait for socat to start and listen to the port. + wait_for_port "${RXNS}" "${PORT}" "${IP_VERSION}" + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + # Make sure the message was received in the dst part + # and exit + validate_result "${OUTPUT_FILE}" "${FORMAT}" + # kill socat in case it is still running + pkill_socat +} + +# BOND_TX1_SLAVE_IF has netconsole enabled on it, bind it to BOND_TX_MAIN_IF. +# Given BOND_TX_MAIN_IF was deleted, recreate it first +function test_enslave_netcons_enabled_iface { + # netconsole got disabled while the interface was down + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] + then + echo "test failed: netconsole expected to be enabled against BOND_TX1_SLAVE_IF" >&2 + exit "${ksft_fail}" + fi + + # recreate the bonding iface. it got deleted by previous + # test (test_delete_bond_and_reenable_target) + ip -n "${TXNS}" \ + link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr + + # sub-interface need to be down before attaching to bonding + # This will also disable netconsole. + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" down + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + ip -n "${TXNS}" \ + link set "${BOND_TX_MAIN_IF}" up + + # netconsole got disabled while the interface was down + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]] + then + echo "test failed: Device is part of a bond iface, cannot have netcons enabled" >&2 + exit "${ksft_fail}" + fi +} + +# Get netconsole enabled on a bonding interface and attach a second +# sub-interface. +function test_enslave_iface_to_bond { + # BOND_TX_MAIN_IF has only BOND_TX1_SLAVE_IF right now + echo "${BOND_TX_MAIN_IF}" > "${NETCONS_PATH}"/dev_name + enable_netcons_ns + + # netcons is attached to bond0 and BOND_TX1_SLAVE_IF is + # part of BOND_TX_MAIN_IF. Attach BOND_TX2_SLAVE_IF to BOND_TX_MAIN_IF. + ip -n "${TXNS}" \ + link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] + then + echo "test failed: Netconsole should be enabled on bonding interface. Failed" >&2 + exit "${ksft_fail}" + fi +} + +function test_enslave_iff_disabled_netpoll_iface { + local ret + + # Create two interfaces. veth interfaces it known to have + # IFF_DISABLE_NETPOLL set + if ! ip link add "${VETH0}" type veth peer name "${VETH1}" + then + echo "Failed to create veth TX interface. Is CONFIG_VETH set?" >&2 + exit "${ksft_skip}" + fi + set +e + # This will print RTNETLINK answers: Device or resource busy + ip link set "${VETH0}" master "${BOND_TX_MAIN_IF}" 2> /dev/null + ret=$? + set -e + if [[ $ret -eq 0 ]] + then + echo "test failed: veth interface could not be enslaved" + exit "${ksft_fail}" + fi +} + +# Given that netconsole picks the current net namespace, we need to enable it +# from inside the TXNS namespace +function enable_netcons_ns() { + ip netns exec "${TXNS}" sh -c \ + "mount -t configfs configfs /sys/kernel/config && echo 1 > $NETCONS_PATH/enabled" +} + +#################### +# Tests start here # +#################### + +# Create regular interfaces using netdevsim and link them +create_all_ifaces + +# Setup the bonding interfaces +# BOND_RX_MAIN_IF has BOND_RX{1,2}_SLAVE_IF +# BOND_TX_MAIN_IF has BOND_TX{1,2}_SLAVE_IF +setup_bonding_ifaces + +# Configure the ips as BOND_RX1_SLAVE_IF and BOND_TX1_SLAVE_IF +configure_ifaces_ips "${IP_VERSION}" + +_create_dynamic_target "${FORMAT}" "${NETCONS_PATH}" +enable_netcons_ns +set_user_data + +# Test #1 : Create an bonding interface and attach netpoll into +# the bonding interface. Netconsole/netpoll should work on +# the bonding interface. +test_send_netcons_msg_through_bond_iface +echo "test #1: netpoll on bonding interface worked. Test passed" >&2 + +# Test #2: Attach netpoll to an enslaved interface +# Try to attach netpoll to an enslaved sub-interface (while still being part of +# a bonding interface), which shouldn't be allowed +test_enable_netpoll_on_enslaved_iface +echo "test #2: netpoll correctly rejected enslaved interface (expected behavior). Test passed." >&2 + +# Test #3: Unplug the sub-interface from bond and enable netconsole +# Detach the interface from a bonding interface and attach netpoll again +test_delete_bond_and_reenable_target +echo "test #3: Able to attach to an unbound interface. Test passed." >&2 + +# Test #4: Enslave a sub-interface that had netconsole enabled +# Try to enslave an interface that has netconsole/netpoll enabled. +# Previous test has netconsole enabled in BOND_TX1_SLAVE_IF, try to enslave it +test_enslave_netcons_enabled_iface +echo "test #4: Enslaving an interface with netpoll attached. Test passed." >&2 + +# Test #5: Enslave a sub-interface to a bonding interface +# Enslave an interface to a bond interface that has netpoll attached +# At this stage, BOND_TX_MAIN_IF is created and BOND_TX1_SLAVE_IF is part of +# it. Netconsole is currently disabled +test_enslave_iface_to_bond +echo "test #5: Enslaving an interface to bond+netpoll. Test passed." >&2 + +# Test #6: Enslave a IFF_DISABLE_NETPOLL sub-interface to a bonding interface +# At this stage, BOND_TX_MAIN_IF has both sub interface and netconsole is +# enabled. This test will try to enslave an a veth (IFF_DISABLE_NETPOLL) interface +# and it should fail, with netpoll: veth0 doesn't support polling +test_enslave_iff_disabled_netpoll_iface +echo "test #6: Enslaving IFF_DISABLE_NETPOLL ifaces to bond iface is not supported. Test passed." >&2 + +cleanup_bond +trap - EXIT +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config index a2d8af60876d..fd16994366f4 100644 --- a/tools/testing/selftests/drivers/net/config +++ b/tools/testing/selftests/drivers/net/config @@ -1,6 +1,12 @@ -CONFIG_IPV6=y -CONFIG_NETDEVSIM=m CONFIG_CONFIGFS_FS=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_BTF_MODULES=n +CONFIG_INET_PSP=y +CONFIG_IPV6=y +CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_NETCONSOLE_EXTENDED_LOG=y +CONFIG_NETDEVSIM=m +CONFIG_VLAN_8021Q=m +CONFIG_XDP_SOCKETS=y diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile index cd6817fe5be6..7994bd0e5c44 100644 --- a/tools/testing/selftests/drivers/net/dsa/Makefile +++ b/tools/testing/selftests/drivers/net/dsa/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT -TEST_PROGS = bridge_locked_port.sh \ +TEST_PROGS := \ + bridge_locked_port.sh \ bridge_mdb.sh \ bridge_mld.sh \ bridge_vlan_aware.sh \ @@ -9,11 +10,13 @@ TEST_PROGS = bridge_locked_port.sh \ local_termination.sh \ no_forwarding.sh \ tc_actions.sh \ - test_bridge_fdb_stress.sh + test_bridge_fdb_stress.sh \ +# end of TEST_PROGS TEST_FILES := \ + forwarding.config \ run_net_forwarding_test.sh \ - forwarding.config +# end of TEST_FILES TEST_INCLUDES := \ ../../../net/forwarding/bridge_locked_port.sh \ @@ -27,6 +30,7 @@ TEST_INCLUDES := \ ../../../net/forwarding/no_forwarding.sh \ ../../../net/forwarding/tc_actions.sh \ ../../../net/forwarding/tc_common.sh \ - ../../../net/lib.sh + ../../../net/lib.sh \ +# end of TEST_INCLUDES include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh new file mode 120000 index 000000000000..d16a65e7595d --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh @@ -0,0 +1 @@ +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py new file mode 100755 index 000000000000..221f27e57147 --- /dev/null +++ b/tools/testing/selftests/drivers/net/gro.py @@ -0,0 +1,446 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +GRO (Generic Receive Offload) conformance tests. + +Validates that GRO coalescing works correctly by running the gro +binary in different configurations and checking for correct packet +coalescing behavior. + +Test cases: + - data_same: Same size data packets coalesce + - data_lrg_sml: Large packet followed by smaller one coalesces + - data_lrg_1byte: Large packet followed by 1B one coalesces (Ethernet padding) + - data_sml_lrg: Small packet followed by larger one doesn't coalesce + - ack: Pure ACK packets do not coalesce + - flags_psh: Packets with PSH flag don't coalesce + - flags_syn: Packets with SYN flag don't coalesce + - flags_rst: Packets with RST flag don't coalesce + - flags_urg: Packets with URG flag don't coalesce + - flags_cwr: Packets with CWR flag don't coalesce + - tcp_csum: Packets with incorrect checksum don't coalesce + - tcp_seq: Packets with non-consecutive seqno don't coalesce + - tcp_ts: Packets with different timestamp options don't coalesce + - tcp_opt: Packets with different TCP options don't coalesce + - ip_ecn: Packets with different ECN don't coalesce + - ip_tos: Packets with different TOS don't coalesce + - ip_ttl: (IPv4) Packets with different TTL don't coalesce + - ip_opt: (IPv4) Packets with IP options don't coalesce + - ip_frag4: (IPv4) IPv4 fragments don't coalesce + - ip_id_df*: (IPv4) IP ID field coalescing tests + - ip_frag6: (IPv6) IPv6 fragments don't coalesce + - ip_v6ext_same: (IPv6) IPv6 ext header with same payload coalesces + - ip_v6ext_diff: (IPv6) IPv6 ext header with different payload doesn't coalesce + - large_max: Packets exceeding GRO_MAX_SIZE don't coalesce + - large_rem: Large packet remainder handling +""" + +import glob +import os +import re +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import NetDrvEpEnv, KsftXfailEx +from lib.py import NetdevFamily, EthtoolFamily +from lib.py import bkg, cmd, defer, ethtool, ip +from lib.py import ksft_variants, KsftNamedVariant + + +# gro.c uses hardcoded DPORT=8000 +GRO_DPORT = 8000 + + +def _resolve_dmac(cfg, ipver): + """ + Find the destination MAC address remote host should use to send packets + towards the local host. It may be a router / gateway address. + """ + + attr = "dmac" + ipver + # Cache the response across test cases + if hasattr(cfg, attr): + return getattr(cfg, attr) + + route = ip(f"-{ipver} route get {cfg.addr_v[ipver]}", + json=True, host=cfg.remote)[0] + gw = route.get("gateway") + # Local L2 segment, address directly + if not gw: + setattr(cfg, attr, cfg.dev['address']) + return getattr(cfg, attr) + + # ping to make sure neighbor is resolved, + # bind to an interface, for v6 the GW is likely link local + cmd(f"ping -c1 -W0 -I{cfg.remote_ifname} {gw}", host=cfg.remote) + + neigh = ip(f"neigh get {gw} dev {cfg.remote_ifname}", + json=True, host=cfg.remote)[0] + setattr(cfg, attr, neigh['lladdr']) + return getattr(cfg, attr) + + +def _write_defer_restore(cfg, path, val, defer_undo=False): + with open(path, "r", encoding="utf-8") as fp: + orig_val = fp.read().strip() + if str(val) == orig_val: + return + with open(path, "w", encoding="utf-8") as fp: + fp.write(val) + if defer_undo: + defer(_write_defer_restore, cfg, path, orig_val) + + +def _set_mtu_restore(dev, mtu, host): + if dev['mtu'] < mtu: + ip(f"link set dev {dev['ifname']} mtu {mtu}", host=host) + defer(ip, f"link set dev {dev['ifname']} mtu {dev['mtu']}", host=host) + + +def _set_ethtool_feat(dev, current, feats, host=None): + s2n = {True: "on", False: "off"} + + new = ["-K", dev] + old = ["-K", dev] + no_change = True + for name, state in feats.items(): + new += [name, s2n[state]] + old += [name, s2n[current[name]["active"]]] + + if current[name]["active"] != state: + no_change = False + if current[name]["fixed"]: + raise KsftXfailEx(f"Device does not support {name}") + if no_change: + return + + eth_cmd = ethtool(" ".join(new), host=host) + defer(ethtool, " ".join(old), host=host) + + # If ethtool printed something kernel must have modified some features + if eth_cmd.stdout: + ksft_pr(eth_cmd) + + +def _get_queue_stats(cfg, queue_id): + """Get stats for a specific Rx queue.""" + cfg.wait_hw_stats_settle() + data = cfg.netnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]}, + dump=True) + for q in data: + if q.get('queue-type') == 'rx' and q.get('queue-id') == queue_id: + return q + return {} + + +def _setup_isolated_queue(cfg): + """Set up an isolated queue for testing using ntuple filter. + + Remove queue 1 from the default RSS context and steer test traffic to it. + """ + test_queue = 1 + + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftXfailEx(f"Need at least 2 queues, have {qcnt}") + + # Remove queue 1 from default RSS context by setting its weight to 0 + weights = ["1"] * qcnt + weights[test_queue] = "0" + ethtool(f"-X {cfg.ifname} weight " + " ".join(weights)) + defer(ethtool, f"-X {cfg.ifname} default") + + # Set up ntuple filter to steer our test traffic to the isolated queue + flow = f"flow-type tcp{cfg.addr_ipver} " + flow += f"dst-ip {cfg.addr} dst-port {GRO_DPORT} action {test_queue}" + output = ethtool(f"-N {cfg.ifname} {flow}").stdout + ntuple_id = int(output.split()[-1]) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + return test_queue + + +def _setup_queue_count(cfg, num_queues): + """Configure the NIC to use a specific number of queues.""" + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ch_max = channels.get('combined-max', 0) + qcnt = channels['combined-count'] + + if ch_max < num_queues: + raise KsftXfailEx(f"Need at least {num_queues} queues, max={ch_max}") + + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + ethtool(f"-L {cfg.ifname} combined {num_queues}") + + +def _run_gro_bin(cfg, test_name, protocol=None, num_flows=None, + order_check=False, verbose=False, fail=False): + """Run gro binary with given test and return the process result.""" + if not hasattr(cfg, "bin_remote"): + cfg.bin_local = cfg.net_lib_dir / "gro" + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + if protocol is None: + ipver = cfg.addr_ipver + protocol = f"ipv{ipver}" + else: + ipver = "6" if protocol[-1] == "6" else "4" + + dmac = _resolve_dmac(cfg, ipver) + + base_args = [ + f"--{protocol}", + f"--dmac {dmac}", + f"--smac {cfg.remote_dev['address']}", + f"--daddr {cfg.addr_v[ipver]}", + f"--saddr {cfg.remote_addr_v[ipver]}", + f"--test {test_name}", + ] + if num_flows: + base_args.append(f"--num-flows {num_flows}") + if order_check: + base_args.append("--order-check") + if verbose: + base_args.append("--verbose") + + args = " ".join(base_args) + + rx_cmd = f"{cfg.bin_local} {args} --rx --iface {cfg.ifname}" + tx_cmd = f"{cfg.bin_remote} {args} --iface {cfg.remote_ifname}" + + with bkg(rx_cmd, ksft_ready=True, exit_wait=True, fail=fail) as rx_proc: + cmd(tx_cmd, host=cfg.remote) + + return rx_proc + + +def _setup(cfg, mode, test_name): + """ Setup hardware loopback mode for GRO testing. """ + + if not hasattr(cfg, "bin_remote"): + cfg.bin_local = cfg.net_lib_dir / "gro" + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + if not hasattr(cfg, "feat"): + cfg.feat = ethtool(f"-k {cfg.ifname}", json=True)[0] + cfg.remote_feat = ethtool(f"-k {cfg.remote_ifname}", + host=cfg.remote, json=True)[0] + + # "large_*" tests need at least 4k MTU + if test_name.startswith("large_"): + _set_mtu_restore(cfg.dev, 4096, None) + _set_mtu_restore(cfg.remote_dev, 4096, cfg.remote) + + if mode == "sw": + flush_path = f"/sys/class/net/{cfg.ifname}/gro_flush_timeout" + irq_path = f"/sys/class/net/{cfg.ifname}/napi_defer_hard_irqs" + + _write_defer_restore(cfg, flush_path, "200000", defer_undo=True) + _write_defer_restore(cfg, irq_path, "10", defer_undo=True) + + _set_ethtool_feat(cfg.ifname, cfg.feat, + {"generic-receive-offload": True, + "rx-gro-hw": False, + "large-receive-offload": False}) + elif mode == "hw": + _set_ethtool_feat(cfg.ifname, cfg.feat, + {"generic-receive-offload": False, + "rx-gro-hw": True, + "large-receive-offload": False}) + + # Some NICs treat HW GRO as a GRO sub-feature so disabling GRO + # will also clear HW GRO. Use a hack of installing XDP generic + # to skip SW GRO, even when enabled. + feat = ethtool(f"-k {cfg.ifname}", json=True)[0] + if not feat["rx-gro-hw"]["active"]: + ksft_pr("Driver clears HW GRO and SW GRO is cleared, using generic XDP workaround") + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" + ip(f"link set dev {cfg.ifname} xdpgeneric obj {prog} sec xdp") + defer(ip, f"link set dev {cfg.ifname} xdpgeneric off") + + # Attaching XDP may change features, fetch the latest state + feat = ethtool(f"-k {cfg.ifname}", json=True)[0] + + _set_ethtool_feat(cfg.ifname, feat, + {"generic-receive-offload": True, + "rx-gro-hw": True, + "large-receive-offload": False}) + elif mode == "lro": + # netdevsim advertises LRO for feature inheritance testing with + # bonding/team tests but it doesn't actually perform the offload + cfg.require_nsim(nsim_test=False) + + _set_ethtool_feat(cfg.ifname, cfg.feat, + {"generic-receive-offload": False, + "rx-gro-hw": False, + "large-receive-offload": True}) + + try: + # Disable TSO for local tests + cfg.require_nsim() # will raise KsftXfailEx if not running on nsim + + _set_ethtool_feat(cfg.remote_ifname, cfg.remote_feat, + {"tcp-segmentation-offload": False}, + host=cfg.remote) + except KsftXfailEx: + pass + + +def _gro_variants(): + """Generator that yields all combinations of protocol and test types.""" + + # Tests that work for all protocols + common_tests = [ + "data_same", "data_lrg_sml", "data_sml_lrg", "data_lrg_1byte", + "data_burst", + "ack", + "flags_psh", "flags_syn", "flags_rst", "flags_urg", "flags_cwr", + "tcp_csum", "tcp_seq", "tcp_ts", "tcp_opt", + "ip_ecn", "ip_tos", + "large_max", "large_rem", + ] + + # Tests specific to IPv4 + ipv4_tests = [ + "ip_csum", + "ip_ttl", "ip_opt", "ip_frag4", + "ip_id_df1_inc", "ip_id_df1_fixed", + "ip_id_df0_inc", "ip_id_df0_fixed", + "ip_id_df1_inc_fixed", "ip_id_df1_fixed_inc", + ] + + # Tests specific to IPv6 + ipv6_tests = [ + "ip_frag6", "ip_v6ext_same", "ip_v6ext_diff", + ] + + for mode in ["sw", "hw", "lro"]: + for protocol in ["ipv4", "ipv6", "ipip", "ip6ip6"]: + for test_name in common_tests: + yield mode, protocol, test_name + + if protocol in ["ipv4", "ipip"]: + for test_name in ipv4_tests: + yield mode, protocol, test_name + elif protocol == "ipv6": + for test_name in ipv6_tests: + yield mode, protocol, test_name + + +@ksft_variants(_gro_variants()) +def test(cfg, mode, protocol, test_name): + """Run a single GRO test with retries.""" + + ipver = "6" if protocol[-1] == "6" else "4" + cfg.require_ipver(ipver) + + _setup(cfg, mode, test_name) + + # Each test is run 6 times to deflake, because given the receive timing, + # not all packets that should coalesce will be considered in the same flow + # on every try. + max_retries = 6 + for attempt in range(max_retries): + fail_now = attempt >= max_retries - 1 + rx_proc = _run_gro_bin(cfg, test_name, protocol=protocol, + verbose=True, fail=fail_now) + + if rx_proc.ret == 0: + return + + ksft_pr(rx_proc) + + if test_name.startswith("large_") and os.environ.get("KSFT_MACHINE_SLOW"): + ksft_pr(f"Ignoring {protocol}/{test_name} failure due to slow environment") + return + + ksft_pr(f"Attempt {attempt + 1}/{max_retries} failed, retrying...") + + +def _capacity_variants(): + """Generate variants for capacity test: mode x queue setup.""" + setups = [ + ("isolated", _setup_isolated_queue), + ("1q", lambda cfg: _setup_queue_count(cfg, 1)), + ("8q", lambda cfg: _setup_queue_count(cfg, 8)), + ] + for mode in ["sw", "hw", "lro"]: + for name, func in setups: + yield KsftNamedVariant(f"{mode}_{name}", mode, func) + + +@ksft_variants(_capacity_variants()) +def test_gro_capacity(cfg, mode, setup_func): + """ + Probe GRO capacity. + + Start with 8 flows and increase by 2x on each successful run. + Retry up to 3 times on failure. + + Variants combine mode (sw, hw, lro) with queue setup: + - isolated: Use a single queue isolated from RSS + - 1q: Configure NIC to use 1 queue + - 8q: Configure NIC to use 8 queues + """ + max_retries = 3 + + _setup(cfg, mode, "capacity") + queue_id = setup_func(cfg) + + num_flows = 8 + while True: + success = False + for attempt in range(max_retries): + if queue_id is not None: + stats_before = _get_queue_stats(cfg, queue_id) + + rx_proc = _run_gro_bin(cfg, "capacity", num_flows=num_flows) + output = rx_proc.stdout + + if queue_id is not None: + stats_after = _get_queue_stats(cfg, queue_id) + qstat_pkts = (stats_after.get('rx-packets', 0) - + stats_before.get('rx-packets', 0)) + gro_pkts = (stats_after.get('rx-hw-gro-packets', 0) - + stats_before.get('rx-hw-gro-packets', 0)) + qstat_str = f" qstat={qstat_pkts} hw-gro={gro_pkts}" + else: + qstat_str = "" + + # Parse and print STATS line + match = re.search( + r'STATS: received=(\d+) wire=(\d+) coalesced=(\d+)', output) + if match: + received = int(match.group(1)) + wire = int(match.group(2)) + coalesced = int(match.group(3)) + status = "PASS" if received == num_flows else "MISS" + ksft_pr(f"flows={num_flows} attempt={attempt + 1} " + f"received={received} wire={wire} " + f"coalesced={coalesced}{qstat_str} [{status}]") + if received == num_flows: + success = True + break + else: + ksft_pr(rx_proc) + ksft_pr(f"flows={num_flows} attempt={attempt + 1}" + f"{qstat_str} [FAIL - can't parse stats]") + + if not success: + ksft_pr(f"Stopped at {num_flows} flows") + break + + num_flows *= 2 + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEpEnv(__file__) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + ksft_run(cases=[test, test_gro_capacity], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 873f5219e41d..c4fe049e9baa 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -3,6 +3,8 @@ import errno import os +import random +from typing import Union from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx from lib.py import CmdExitFailure, EthtoolFamily, NlError from lib.py import NetDrvEnv @@ -20,8 +22,7 @@ def _get_hds_mode(cfg, netnl) -> str: def _xdp_onoff(cfg): - test_dir = os.path.dirname(os.path.realpath(__file__)) - prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" ip("link set dev %s xdp obj %s sec xdp" % (cfg.ifname, prog)) ip("link set dev %s xdp off" % cfg.ifname) @@ -59,7 +60,39 @@ def get_hds_thresh(cfg, netnl) -> None: if 'hds-thresh' not in rings: raise KsftSkipEx('hds-thresh not supported by device') + +def _hds_reset(cfg, netnl, rings) -> None: + cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + + arg = {'header': {'dev-index': cfg.ifindex}} + if cur.get('tcp-data-split') != rings.get('tcp-data-split'): + # Try to reset to "unknown" first, we don't know if the setting + # was the default or user chose it. Default seems more likely. + arg['tcp-data-split'] = "unknown" + netnl.rings_set(arg) + cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + if cur['tcp-data-split'] == rings['tcp-data-split']: + del arg['tcp-data-split'] + else: + # Try the explicit setting + arg['tcp-data-split'] = rings['tcp-data-split'] + if cur.get('hds-thresh') != rings.get('hds-thresh'): + arg['hds-thresh'] = rings['hds-thresh'] + if len(arg) > 1: + netnl.rings_set(arg) + + +def _defer_reset_hds(cfg, netnl) -> Union[dict, None]: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + if 'hds-thresh' in rings or 'tcp-data-split' in rings: + defer(_hds_reset, cfg, netnl, rings) + except NlError as e: + pass + + def set_hds_enable(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'enabled'}) except NlError as e: @@ -77,6 +110,7 @@ def set_hds_enable(cfg, netnl) -> None: ksft_eq('enabled', rings['tcp-data-split']) def set_hds_disable(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'disabled'}) except NlError as e: @@ -94,6 +128,7 @@ def set_hds_disable(cfg, netnl) -> None: ksft_eq('disabled', rings['tcp-data-split']) def set_hds_thresh_zero(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': 0}) except NlError as e: @@ -110,7 +145,39 @@ def set_hds_thresh_zero(cfg, netnl) -> None: ksft_eq(0, rings['hds-thresh']) +def set_hds_thresh_random(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + if 'hds-thresh-max' not in rings: + raise KsftSkipEx('hds-thresh-max not defined by device') + + if rings['hds-thresh-max'] < 2: + raise KsftSkipEx('hds-thresh-max is too small') + elif rings['hds-thresh-max'] == 2: + hds_thresh = 1 + else: + while True: + hds_thresh = random.randint(1, rings['hds-thresh-max'] - 1) + if hds_thresh != rings['hds-thresh']: + break + + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_thresh}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("hds-thresh-set not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + ksft_eq(hds_thresh, rings['hds-thresh']) + def set_hds_thresh_max(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) except NlError as e: @@ -128,6 +195,7 @@ def set_hds_thresh_max(cfg, netnl) -> None: ksft_eq(rings['hds-thresh'], rings['hds-thresh-max']) def set_hds_thresh_gt(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) except NlError as e: @@ -149,6 +217,7 @@ def set_xdp(cfg, netnl) -> None: """ mode = _get_hds_mode(cfg, netnl) if mode == 'enabled': + _defer_reset_hds(cfg, netnl) netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'unknown'}) @@ -244,6 +313,7 @@ def main() -> None: get_hds_thresh, set_hds_disable, set_hds_enable, + set_hds_thresh_random, set_hds_thresh_zero, set_hds_thresh_max, set_hds_thresh_gt, diff --git a/tools/testing/selftests/drivers/net/hw/.gitignore b/tools/testing/selftests/drivers/net/hw/.gitignore index e9fe6ede681a..46540468a775 100644 --- a/tools/testing/selftests/drivers/net/hw/.gitignore +++ b/tools/testing/selftests/drivers/net/hw/.gitignore @@ -1 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +iou-zcrx ncdevmem +toeplitz diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 21ba64ce1e34..85ca4d1ecf9e 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -1,20 +1,53 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT +# Check if io_uring supports zero-copy receive +HAS_IOURING_ZCRX := $(shell \ + echo -e '#include <liburing.h>\n' \ + 'void *func = (void *)io_uring_register_ifq;\n' \ + 'int main() {return 0;}' | \ + $(CC) -luring -x c - -o /dev/null 2>&1 && echo y) + +ifeq ($(HAS_IOURING_ZCRX),y) +COND_GEN_FILES += iou-zcrx +else +$(warning excluding iouring tests, liburing not installed or too old) +endif + +TEST_GEN_FILES := \ + $(COND_GEN_FILES) \ +# end of TEST_GEN_FILES + TEST_PROGS = \ csum.py \ devlink_port_split.py \ + devlink_rate_tc_bw.py \ devmem.py \ ethtool.sh \ ethtool_extended_state.sh \ ethtool_mm.sh \ ethtool_rmon.sh \ + ethtool_std_stats.sh \ + gro_hw.py \ hw_stats_l3.sh \ hw_stats_l3_gre.sh \ + iou-zcrx.py \ + irq.py \ loopback.sh \ - nic_link_layer.py \ - nic_performance.py \ + nic_timestamp.py \ + nk_netns.py \ + nk_qlease.py \ + ntuple.py \ pp_alloc_fail.py \ + rss_api.py \ rss_ctx.py \ + rss_drv.py \ + rss_flow_label.py \ + rss_input_xfrm.py \ + toeplitz.py \ + tso.py \ + uso.py \ + xdp_metadata.py \ + xsk_reconfig.py \ # TEST_FILES := \ @@ -24,17 +57,31 @@ TEST_FILES := \ TEST_INCLUDES := \ $(wildcard lib/py/*.py ../lib/py/*.py) \ ../../../net/lib.sh \ - ../../../net/forwarding/lib.sh \ ../../../net/forwarding/ipip_lib.sh \ + ../../../net/forwarding/lib.sh \ ../../../net/forwarding/tc_common.sh \ # # YNL files, must be before "include ..lib.mk" -YNL_GEN_FILES := ncdevmem +YNL_GEN_FILES := \ + ncdevmem \ + toeplitz \ +# end of YNL_GEN_FILES TEST_GEN_FILES += $(YNL_GEN_FILES) +TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) include ../../../lib.mk # YNL build -YNL_GENS := ethtool netdev +YNL_GENS := \ + ethtool \ + netdev \ +# end of YNL_GENS + include ../../../net/ynl.mk + +include ../../../net/bpf.mk + +ifeq ($(HAS_IOURING_ZCRX),y) +$(OUTPUT)/iou-zcrx: LDLIBS += -luring +endif diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config new file mode 100644 index 000000000000..dd50cb8a7911 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/config @@ -0,0 +1,16 @@ +CONFIG_BPF_SYSCALL=y +CONFIG_FAIL_FUNCTION=y +CONFIG_FAULT_INJECTION=y +CONFIG_FAULT_INJECTION_DEBUG_FS=y +CONFIG_FUNCTION_ERROR_INJECTION=y +CONFIG_IO_URING=y +CONFIG_IPV6=y +CONFIG_IPV6_GRE=y +CONFIG_NET_CLS_ACT=y +CONFIG_NET_CLS_BPF=y +CONFIG_NET_IPGRE=y +CONFIG_NET_IPGRE_DEMUX=y +CONFIG_NETKIT=y +CONFIG_NET_SCH_INGRESS=y +CONFIG_UDMABUF=y +CONFIG_VXLAN=y diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py index cb40497faee4..3e3a89a34afe 100755 --- a/tools/testing/selftests/drivers/net/hw/csum.py +++ b/tools/testing/selftests/drivers/net/hw/csum.py @@ -9,41 +9,35 @@ from lib.py import ksft_run, ksft_exit, KsftSkipEx from lib.py import EthtoolFamily, NetDrvEpEnv from lib.py import bkg, cmd, wait_port_listen -def test_receive(cfg, ipv4=False, extra_args=None): +def test_receive(cfg, ipver="6", extra_args=None): """Test local nic checksum receive. Remote host sends crafted packets.""" if not cfg.have_rx_csum: raise KsftSkipEx(f"Test requires rx checksum offload on {cfg.ifname}") - if ipv4: - ip_args = f"-4 -S {cfg.remote_v4} -D {cfg.v4}" - else: - ip_args = f"-6 -S {cfg.remote_v6} -D {cfg.v6}" + ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}" rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}" - tx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -n 100 {ip_args} -r 1 -T {extra_args}" + tx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -n 100 {ip_args} -r 1 -T {extra_args}" with bkg(rx_cmd, exit_wait=True): wait_port_listen(34000, proto="udp") cmd(tx_cmd, host=cfg.remote) -def test_transmit(cfg, ipv4=False, extra_args=None): +def test_transmit(cfg, ipver="6", extra_args=None): """Test local nic checksum transmit. Remote host verifies packets.""" if (not cfg.have_tx_csum_generic and - not (cfg.have_tx_csum_ipv4 and ipv4) and - not (cfg.have_tx_csum_ipv6 and not ipv4)): + not (cfg.have_tx_csum_ipv4 and ipver == "4") and + not (cfg.have_tx_csum_ipv6 and ipver == "6")): raise KsftSkipEx(f"Test requires tx checksum offload on {cfg.ifname}") - if ipv4: - ip_args = f"-4 -S {cfg.v4} -D {cfg.remote_v4}" - else: - ip_args = f"-6 -S {cfg.v6} -D {cfg.remote_v6}" + ip_args = f"-{ipver} -S {cfg.addr_v[ipver]} -D {cfg.remote_addr_v[ipver]}" # Cannot randomize input when calculating zero checksum if extra_args != "-U -Z": extra_args += " -r 1" - rx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -L 1 -n 100 {ip_args} -R {extra_args}" + rx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -L 1 -n 100 {ip_args} -R {extra_args}" tx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -L 1 -n 100 {ip_args} -T {extra_args}" with bkg(rx_cmd, host=cfg.remote, exit_wait=True): @@ -51,26 +45,20 @@ def test_transmit(cfg, ipv4=False, extra_args=None): cmd(tx_cmd) -def test_builder(name, cfg, ipv4=False, tx=False, extra_args=""): +def test_builder(name, cfg, ipver="6", tx=False, extra_args=""): """Construct specific tests from the common template. Most tests follow the same basic pattern, differing only in Direction of the test and optional flags passed to csum.""" def f(cfg): - if ipv4: - cfg.require_v4() - else: - cfg.require_v6() + cfg.require_ipver(ipver) if tx: - test_transmit(cfg, ipv4, extra_args) + test_transmit(cfg, ipver, extra_args) else: - test_receive(cfg, ipv4, extra_args) + test_receive(cfg, ipver, extra_args) - if ipv4: - f.__name__ = "ipv4_" + name - else: - f.__name__ = "ipv6_" + name + f.__name__ = f"ipv{ipver}_" + name return f @@ -100,19 +88,19 @@ def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: check_nic_features(cfg) - cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../net/lib/csum") + cfg.bin_local = cfg.net_lib_dir / "csum" cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) cases = [] - for ipv4 in [True, False]: - cases.append(test_builder("rx_tcp", cfg, ipv4, False, "-t")) - cases.append(test_builder("rx_tcp_invalid", cfg, ipv4, False, "-t -E")) + for ipver in ["4", "6"]: + cases.append(test_builder("rx_tcp", cfg, ipver, False, "-t")) + cases.append(test_builder("rx_tcp_invalid", cfg, ipver, False, "-t -E")) - cases.append(test_builder("rx_udp", cfg, ipv4, False, "")) - cases.append(test_builder("rx_udp_invalid", cfg, ipv4, False, "-E")) + cases.append(test_builder("rx_udp", cfg, ipver, False, "")) + cases.append(test_builder("rx_udp_invalid", cfg, ipver, False, "-E")) - cases.append(test_builder("tx_udp_csum_offload", cfg, ipv4, True, "-U")) - cases.append(test_builder("tx_udp_zero_checksum", cfg, ipv4, True, "-U -Z")) + cases.append(test_builder("tx_udp_csum_offload", cfg, ipver, True, "-U")) + cases.append(test_builder("tx_udp_zero_checksum", cfg, ipver, True, "-U -Z")) ksft_run(cases=cases, args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py new file mode 100755 index 000000000000..4e4faa9275bb --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py @@ -0,0 +1,439 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Devlink Rate TC Bandwidth Test Suite +=================================== + +This test suite verifies the functionality of devlink-rate traffic class (TC) +bandwidth distribution in a virtualized environment. The tests validate that +bandwidth can be properly allocated between different traffic classes and +that TC mapping works as expected. + +Test Environment: +---------------- +- Creates 1 VF +- Establishes a bridge connecting the VF representor and the uplink representor +- Sets up 2 VLAN interfaces on the VF with different VLAN IDs (101, 102) +- Configures different traffic classes (TC3 and TC4) for each VLAN + +Test Cases: +---------- +1. test_no_tc_mapping_bandwidth: + - Verifies that without TC mapping, bandwidth is NOT distributed according to + the configured 20/80 split between TC3 and TC4 + - This test should fail if bandwidth matches the 20/80 split without TC + mapping + - Expected: Bandwidth should NOT be distributed as 20/80 + +2. test_tc_mapping_bandwidth: + - Configures TC mapping using mqprio qdisc + - Verifies that with TC mapping, bandwidth IS distributed according to the + configured 20/80 split between TC3 and TC4 + - Expected: Bandwidth should be distributed as 20/80 + +Bandwidth Distribution: +---------------------- +- TC3 (VLAN 101): Configured for 20% of total bandwidth +- TC4 (VLAN 102): Configured for 80% of total bandwidth +- Total bandwidth: 1Gbps +- Tolerance: +-12% + +Hardware-Specific Behavior (mlx5): +-------------------------- +mlx5 hardware enforces traffic class separation by ensuring that each transmit +queue (SQ) is associated with a single TC. If a packet is sent on a queue that +doesn't match the expected TC (based on DSCP or VLAN priority and hypervisor-set +mapping), the hardware moves the queue to the correct TC scheduler to preserve +traffic isolation. + +This behavior means that even without explicit TC-to-queue mapping, bandwidth +enforcement may still appear to work—because the hardware dynamically adjusts +the scheduling context. However, this can lead to performance issues in high +rates and HOL blocking if traffic from different TCs is mixed on the same queue. +""" + +import json +import os +import subprocess +import threading +import time + +from lib.py import ksft_pr, ksft_run, ksft_exit +from lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx +from lib.py import NetDrvEpEnv, DevlinkFamily +from lib.py import NlError +from lib.py import cmd, defer, ethtool, ip +from lib.py import Iperf3Runner + + +class BandwidthValidator: + """ + Validates total bandwidth and individual shares with tolerance + relative to the overall total. + """ + + def __init__(self, shares): + self.tolerance_percent = 12 + self.expected_total = sum(shares.values()) + self.bounds = {} + + for name, exp in shares.items(): + self.bounds[name] = (self.min_expected(exp), self.max_expected(exp)) + + def min_expected(self, value): + """Calculates the minimum acceptable value based on tolerance.""" + return value - (self.expected_total * self.tolerance_percent / 100) + + def max_expected(self, value): + """Calculates the maximum acceptable value based on tolerance.""" + return value + (self.expected_total * self.tolerance_percent / 100) + + def bound(self, values): + """ + Return True if all given values fall within tolerance. + """ + for name, value in values.items(): + low, high = self.bounds[name] + if not low <= value <= high: + return False + return True + + +def setup_vf(cfg, set_tc_mapping=True): + """ + Sets up a VF on the given network interface. + + Enables SR-IOV and switchdev mode, brings the VF interface up, + and optionally configures TC mapping using mqprio. + """ + try: + cmd(f"devlink dev eswitch set pci/{cfg.pci} mode switchdev") + defer(cmd, f"devlink dev eswitch set pci/{cfg.pci} mode legacy") + except Exception as exc: + raise KsftSkipEx(f"Failed to enable switchdev mode on {cfg.pci}") from exc + try: + cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True) + defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True) + except Exception as exc: + raise KsftSkipEx(f"Failed to enable SR-IOV on {cfg.ifname}") from exc + + time.sleep(2) + vf_ifc = (os.listdir( + f"/sys/class/net/{cfg.ifname}/device/virtfn0/net") or [None])[0] + if vf_ifc: + ip(f"link set dev {vf_ifc} up") + else: + raise KsftSkipEx("VF interface not found") + if set_tc_mapping: + cmd(f"tc qdisc add dev {vf_ifc} root handle 5 mqprio mode dcb hw 1 num_tc 8") + + return vf_ifc + + +def setup_vlans_on_vf(vf_ifc): + """ + Sets up two VLAN interfaces on the given VF, each mapped to a different TC. + """ + vlan_configs = [ + {"vlan_id": 101, "tc": 3, "ip": "198.51.100.1"}, + {"vlan_id": 102, "tc": 4, "ip": "198.51.100.9"}, + ] + + for config in vlan_configs: + vlan_dev = f"{vf_ifc}.{config['vlan_id']}" + ip(f"link add link {vf_ifc} name {vlan_dev} type vlan id {config['vlan_id']}") + ip(f"addr add {config['ip']}/29 dev {vlan_dev}") + ip(f"link set dev {vlan_dev} up") + ip(f"link set dev {vlan_dev} type vlan egress-qos-map 0:{config['tc']}") + ksft_pr(f"Created VLAN {vlan_dev} on {vf_ifc} with tc {config['tc']} and IP {config['ip']}") + + +def get_vf_info(cfg): + """ + Finds the VF representor interface and devlink port index + for the given PCI device used in the test environment. + """ + cfg.vf_representor = None + cfg.vf_port_index = None + out = subprocess.check_output(["devlink", "-j", "port", "show"], encoding="utf-8") + ports = json.loads(out)["port"] + + for port_name, props in ports.items(): + netdev = props.get("netdev") + + if (port_name.startswith(f"pci/{cfg.pci}/") and + props.get("vfnum") == 0): + cfg.vf_representor = netdev + cfg.vf_port_index = int(port_name.split("/")[-1]) + break + + +def setup_bridge(cfg): + """ + Creates and configures a Linux bridge, with both the uplink + and VF representor interfaces attached to it. + """ + bridge_name = f"br_{os.getpid()}" + ip(f"link add name {bridge_name} type bridge") + defer(cmd, f"ip link del name {bridge_name} type bridge") + + ip(f"link set dev {cfg.ifname} master {bridge_name}") + + rep_name = cfg.vf_representor + if rep_name: + ip(f"link set dev {rep_name} master {bridge_name}") + ip(f"link set dev {rep_name} up") + ksft_pr(f"Set representor {rep_name} up and added to bridge") + else: + raise KsftSkipEx("Could not find representor for the VF") + + ip(f"link set dev {bridge_name} up") + + +def setup_devlink_rate(cfg): + """ + Configures devlink rate tx_max and traffic class bandwidth for the VF. + """ + port_index = cfg.vf_port_index + if port_index is None: + raise KsftSkipEx("Could not find VF port index") + try: + cfg.devnl.rate_set({ + "bus-name": "pci", + "dev-name": cfg.pci, + "port-index": port_index, + "rate-tx-max": 125000000, + "rate-tc-bws": [ + {"index": 0, "bw": 0}, + {"index": 1, "bw": 0}, + {"index": 2, "bw": 0}, + {"index": 3, "bw": 20}, + {"index": 4, "bw": 80}, + {"index": 5, "bw": 0}, + {"index": 6, "bw": 0}, + {"index": 7, "bw": 0}, + ] + }) + except NlError as exc: + if exc.error == 95: # EOPNOTSUPP + raise KsftSkipEx("devlink rate configuration is not supported on the VF") from exc + raise KsftFailEx(f"rate_set failed on VF port {port_index}") from exc + + +def setup_remote_vlans(cfg): + """ + Sets up VLAN interfaces on the remote side. + """ + remote_dev = cfg.remote_ifname + vlan_ids = [101, 102] + remote_ips = ["198.51.100.2", "198.51.100.10"] + + for vlan_id, ip_addr in zip(vlan_ids, remote_ips): + vlan_dev = f"{remote_dev}.{vlan_id}" + cmd(f"ip link add link {remote_dev} name {vlan_dev} " + f"type vlan id {vlan_id}", host=cfg.remote) + cmd(f"ip addr add {ip_addr}/29 dev {vlan_dev}", host=cfg.remote) + cmd(f"ip link set dev {vlan_dev} up", host=cfg.remote) + defer(cmd, f"ip link del {vlan_dev}", host=cfg.remote) + + +def setup_test_environment(cfg, set_tc_mapping=True): + """ + Sets up the complete test environment including VF creation, VLANs, + bridge configuration and devlink rate setup. + """ + vf_ifc = setup_vf(cfg, set_tc_mapping) + ksft_pr(f"Created VF interface: {vf_ifc}") + + setup_vlans_on_vf(vf_ifc) + + get_vf_info(cfg) + setup_bridge(cfg) + + setup_devlink_rate(cfg) + setup_remote_vlans(cfg) + + +def measure_bandwidth(cfg, server_ip, client_ip, barrier): + """ + Synchronizes with peers and runs an iperf3-based bandwidth measurement + between the given endpoints. Returns average Gbps. + """ + runner = Iperf3Runner(cfg, server_ip=server_ip, client_ip=client_ip) + try: + barrier.wait(timeout=10) + except Exception as exc: + raise KsftFailEx("iperf3 barrier wait timed") from exc + + try: + bw_gbps = runner.measure_bandwidth(reverse=True) + except Exception as exc: + raise KsftFailEx("iperf3 bandwidth measurement failed") from exc + + return bw_gbps + + +def run_bandwidth_test(cfg): + """ + Runs parallel bandwidth measurements for each VLAN/TC pair and collects results. + """ + def _run_measure_bandwidth_thread(local_ip, remote_ip, results, barrier, tc_ix): + results[tc_ix] = measure_bandwidth(cfg, local_ip, remote_ip, barrier) + + vf_vlan_data = [ + # (local_ip, remote_ip, TC) + ("198.51.100.1", "198.51.100.2", 3), + ("198.51.100.9", "198.51.100.10", 4), + ] + + results = {} + threads = [] + start_barrier = threading.Barrier(len(vf_vlan_data)) + + for local_ip, remote_ip, tc_ix in vf_vlan_data: + thread = threading.Thread( + target=_run_measure_bandwidth_thread, + args=(local_ip, remote_ip, results, start_barrier, tc_ix) + ) + thread.start() + threads.append(thread) + + for thread in threads: + thread.join() + + for tc_ix, tc_bw in results.items(): + if tc_bw is None: + raise KsftFailEx("iperf3 failed; cannot evaluate bandwidth") + + return results + + +def calculate_bandwidth_percentages(results): + """ + Calculates the percentage of total bandwidth received by TC3 and TC4. + """ + if 3 not in results or 4 not in results: + raise KsftFailEx(f"Missing expected TC results in {results}") + + tc3_bw = results[3] + tc4_bw = results[4] + total_bw = tc3_bw + tc4_bw + tc3_percentage = (tc3_bw / total_bw) * 100 + tc4_percentage = (tc4_bw / total_bw) * 100 + + return { + 'tc3_bw': tc3_bw, + 'tc4_bw': tc4_bw, + 'tc3_percentage': tc3_percentage, + 'tc4_percentage': tc4_percentage, + 'total_bw': total_bw + } + + +def print_bandwidth_results(bw_data, test_name): + """ + Prints bandwidth measurements and TC usage summary for a given test. + """ + ksft_pr(f"Bandwidth check results {test_name}:") + ksft_pr(f"TC 3: {bw_data['tc3_bw']:.2f} Gbits/sec") + ksft_pr(f"TC 4: {bw_data['tc4_bw']:.2f} Gbits/sec") + ksft_pr(f"Total bandwidth: {bw_data['total_bw']:.2f} Gbits/sec") + ksft_pr(f"TC 3 percentage: {bw_data['tc3_percentage']:.1f}%") + ksft_pr(f"TC 4 percentage: {bw_data['tc4_percentage']:.1f}%") + + +def verify_total_bandwidth(bw_data, validator): + """ + Ensures the total measured bandwidth falls within the acceptable tolerance. + """ + total = bw_data['total_bw'] + + if validator.bound({"total": total}): + return + + low, high = validator.bounds["total"] + + if total < low: + raise KsftSkipEx( + f"Total bandwidth {total:.2f} Gbps < minimum " + f"{low:.2f} Gbps; " + f"parent tx_max ({validator.expected_total:.1f} G) " + f"not reached, cannot validate share" + ) + + raise KsftFailEx( + f"Total bandwidth {total:.2f} Gbps exceeds allowed ceiling " + f"{high:.2f} Gbps " + f"(VF tx_max set to {validator.expected_total:.1f} G)" + ) + + +def run_bandwidth_distribution_test(cfg, set_tc_mapping): + """ + Runs parallel bandwidth measurements for both TCs and collects results. + """ + setup_test_environment(cfg, set_tc_mapping) + bandwidths = run_bandwidth_test(cfg) + bw_data = calculate_bandwidth_percentages(bandwidths) + test_name = "with TC mapping" if set_tc_mapping else "without TC mapping" + print_bandwidth_results(bw_data, test_name) + + verify_total_bandwidth(bw_data, cfg.traffic_bw_validator) + + return cfg.tc_bw_validator.bound({"tc3": bw_data['tc3_percentage'], + "tc4": bw_data['tc4_percentage']}) + + +def test_no_tc_mapping_bandwidth(cfg): + """ + Verifies that bandwidth is not split 20/80 without traffic class mapping. + """ + pass_bw_msg = "Bandwidth is NOT distributed as 20/80 without TC mapping" + fail_bw_msg = "Bandwidth matched 20/80 split without TC mapping" + is_mlx5 = "driver: mlx5" in ethtool(f"-i {cfg.ifname}").stdout + + if run_bandwidth_distribution_test(cfg, set_tc_mapping=False): + if is_mlx5: + raise KsftXfailEx(fail_bw_msg) + raise KsftFailEx(fail_bw_msg) + if is_mlx5: + raise KsftFailEx("mlx5 behavior changed:" + pass_bw_msg) + ksft_pr(pass_bw_msg) + + +def test_tc_mapping_bandwidth(cfg): + """ + Verifies that bandwidth is correctly split 20/80 between TC3 and TC4 + when traffic class mapping is set. + """ + if run_bandwidth_distribution_test(cfg, set_tc_mapping=True): + ksft_pr("Bandwidth is distributed as 20/80 with TC mapping") + else: + raise KsftFailEx("Bandwidth did not match 20/80 split with TC mapping") + + +def main() -> None: + """ + Main entry point for running the test cases. + """ + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.devnl = DevlinkFamily() + + cfg.pci = os.path.basename( + os.path.realpath(f"/sys/class/net/{cfg.ifname}/device") + ) + if not cfg.pci: + raise KsftSkipEx("Could not get PCI address of the interface") + + cfg.traffic_bw_validator = BandwidthValidator({"total": 1}) + cfg.tc_bw_validator = BandwidthValidator({"tc3": 20, "tc4": 80}) + + cases = [test_no_tc_mapping_bandwidth, test_tc_mapping_bandwidth] + + ksft_run(cases=cases, args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index 1223f0f5c10c..ee863e90d1e0 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +from os import path from lib.py import ksft_run, ksft_exit from lib.py import ksft_eq, KsftSkipEx from lib.py import NetDrvEpEnv @@ -10,8 +11,7 @@ from lib.py import ksft_disruptive def require_devmem(cfg): if not hasattr(cfg, "_devmem_probed"): - port = rand_port() - probe_command = f"./ncdevmem -f {cfg.ifname}" + probe_command = f"{cfg.bin_local} -f {cfg.ifname}" cfg._devmem_supported = cmd(probe_command, fail=False, shell=True).ret == 0 cfg._devmem_probed = True @@ -21,22 +21,71 @@ def require_devmem(cfg): @ksft_disruptive def check_rx(cfg) -> None: - cfg.require_v6() require_devmem(cfg) port = rand_port() - listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.v6} -p {port}" + socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.baddr}:{port},bind={cfg.remote_baddr}:{port}" + listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7" - with bkg(listen_cmd) as socat: + with bkg(listen_cmd, exit_wait=True) as ncdevmem: wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.v6}]:{port}", host=cfg.remote, shell=True) + cmd(f"yes $(echo -e \x01\x02\x03\x04\x05\x06) | \ + head -c 1K | {socat}", host=cfg.remote, shell=True) + + ksft_eq(ncdevmem.ret, 0) + + +@ksft_disruptive +def check_tx(cfg) -> None: + require_devmem(cfg) + + port = rand_port() + listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" + + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: + wait_port_listen(port, host=cfg.remote) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port}", shell=True) + + ksft_eq(socat.stdout.strip(), "hello\nworld") + + +@ksft_disruptive +def check_tx_chunks(cfg) -> None: + require_devmem(cfg) + + port = rand_port() + listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" + + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: + wait_port_listen(port, host=cfg.remote) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port} -z 3", shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") +def check_rx_hds(cfg) -> None: + """Test HDS splitting across payload sizes.""" + require_devmem(cfg) + + for size in [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]: + port = rand_port() + listen_cmd = f"{cfg.bin_local} -L -l -f {cfg.ifname} -s {cfg.addr} -p {port}" + + with bkg(listen_cmd, exit_wait=True) as ncdevmem: + wait_port_listen(port) + cmd(f"dd if=/dev/zero bs={size} count=1 2>/dev/null | " + + f"socat -b {size} -u - TCP{cfg.addr_ipver}:{cfg.baddr}:{port},nodelay", + host=cfg.remote, shell=True) + + ksft_eq(ncdevmem.ret, 0, f"HDS failed for payload size {size}") + + def main() -> None: with NetDrvEpEnv(__file__) as cfg: - ksft_run([check_rx], + cfg.bin_local = path.abspath(path.dirname(__file__) + "/ncdevmem") + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + ksft_run([check_rx, check_tx, check_tx_chunks, check_rx_hds], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh index 8f60c1685ad4..2ec19edddfaa 100755 --- a/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh +++ b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh @@ -1,17 +1,23 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +#shellcheck disable=SC2034 # SC does not see the global variables +#shellcheck disable=SC2317,SC2329 # unused functions ALL_TESTS=" rmon_rx_histogram rmon_tx_histogram " +: "${DRIVER_TEST_CONFORMANT:=yes}" NUM_NETIFS=2 lib_dir=$(dirname "$0") source "$lib_dir"/../../../net/forwarding/lib.sh +source "$lib_dir"/../../../kselftest/ktap_helpers.sh +UINT32_MAX=$((2**32 - 1)) ETH_FCS_LEN=4 ETH_HLEN=$((6+6+2)) +TEST_NAME=$(basename "$0" .sh) declare -A netif_mtu @@ -19,11 +25,14 @@ ensure_mtu() { local iface=$1; shift local len=$1; shift - local current=$(ip -j link show dev $iface | jq -r '.[0].mtu') local required=$((len - ETH_HLEN - ETH_FCS_LEN)) + local current - if [ $current -lt $required ]; then - ip link set dev $iface mtu $required || return 1 + current=$(run_on "$iface" \ + ip -j link show dev "$iface" | jq -r '.[0].mtu') + if [ "$current" -lt "$required" ]; then + run_on "$iface" ip link set dev "$iface" mtu "$required" \ + || return 1 fi } @@ -46,23 +55,24 @@ bucket_test() len=$((len - ETH_FCS_LEN)) len=$((len > 0 ? len : 0)) - before=$(ethtool --json -S $iface --groups rmon | \ + before=$(run_on "$iface" ethtool --json -S "$iface" --groups rmon | \ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") # Send 10k one way and 20k in the other, to detect counters # mapped to the wrong direction - $MZ $neigh -q -c $num_rx -p $len -a own -b bcast -d 10us - $MZ $iface -q -c $num_tx -p $len -a own -b bcast -d 10us + run_on "$neigh" \ + "$MZ" "$neigh" -q -c "$num_rx" -p "$len" -a own -b bcast -d 10us + run_on "$iface" \ + "$MZ" "$iface" -q -c "$num_tx" -p "$len" -a own -b bcast -d 10us - after=$(ethtool --json -S $iface --groups rmon | \ + after=$(run_on "$iface" ethtool --json -S "$iface" --groups rmon | \ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") delta=$((after - before)) - expected=$([ $set = rx ] && echo $num_rx || echo $num_tx) + expected=$([ "$set" = rx ] && echo "$num_rx" || echo "$num_tx") - # Allow some extra tolerance for other packets sent by the stack - [ $delta -ge $expected ] && [ $delta -le $((expected + 100)) ] + [ "$delta" -ge "$expected" ] && [ "$delta" -le "$UINT32_MAX" ] } rmon_histogram() @@ -73,43 +83,40 @@ rmon_histogram() local nbuckets=0 local step= - RET=0 - while read -r -a bucket; do - step="$set-pkts${bucket[0]}to${bucket[1]} on $iface" + step="$set-pkts${bucket[0]}to${bucket[1]}" - for if in $iface $neigh; do - if ! ensure_mtu $if ${bucket[0]}; then - log_test_xfail "$if does not support the required MTU for $step" + for if in "$iface" "$neigh"; do + if ! ensure_mtu "$if" "${bucket[0]}"; then + ktap_print_msg "$if does not support the required MTU for $step" + ktap_test_xfail "$TEST_NAME.$step" return fi done - if ! bucket_test $iface $neigh $set $nbuckets ${bucket[0]}; then - check_err 1 "$step failed" + if ! bucket_test "$iface" "$neigh" "$set" "$nbuckets" "${bucket[0]}"; then + ktap_test_fail "$TEST_NAME.$step" return 1 fi - log_test "$step" + ktap_test_pass "$TEST_NAME.$step" nbuckets=$((nbuckets + 1)) - done < <(ethtool --json -S $iface --groups rmon | \ + done < <(run_on "$iface" ethtool --json -S "$iface" --groups rmon | \ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][]|[.low, .high]|@tsv" 2>/dev/null) - if [ $nbuckets -eq 0 ]; then - log_test_xfail "$iface does not support $set histogram counters" + if [ "$nbuckets" -eq 0 ]; then + ktap_print_msg "$iface does not support $set histogram counters" return fi } rmon_rx_histogram() { - rmon_histogram $h1 $h2 rx - rmon_histogram $h2 $h1 rx + rmon_histogram "$h1" "$h2" rx } rmon_tx_histogram() { - rmon_histogram $h1 $h2 tx - rmon_histogram $h2 $h1 tx + rmon_histogram "$h1" "$h2" tx } setup_prepare() @@ -117,9 +124,9 @@ setup_prepare() h1=${NETIFS[p1]} h2=${NETIFS[p2]} - for iface in $h1 $h2; do - netif_mtu[$iface]=$(ip -j link show dev $iface | jq -r '.[0].mtu') - ip link set dev $iface up + for iface in "$h1" "$h2"; do + netif_mtu["$iface"]=$(run_on "$iface" \ + ip -j link show dev "$iface" | jq -r '.[0].mtu') done } @@ -127,19 +134,26 @@ cleanup() { pre_cleanup - for iface in $h2 $h1; do - ip link set dev $iface \ - mtu ${netif_mtu[$iface]} \ - down + # Do not bring down the interfaces, just configure the initial MTU + for iface in "$h2" "$h1"; do + run_on "$iface" ip link set dev "$iface" \ + mtu "${netif_mtu[$iface]}" done } check_ethtool_counter_group_support trap cleanup EXIT +bucket_count=$(ethtool --json -S "${NETIFS[p1]}" --groups rmon | \ + jq -r '.[0].rmon | + "\((."rx-pktsNtoM" | length) + + (."tx-pktsNtoM" | length))"') +ktap_print_header +ktap_set_plan "$bucket_count" + setup_prepare setup_wait tests_run -exit $EXIT_STATUS +ktap_finished diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_std_stats.sh b/tools/testing/selftests/drivers/net/hw/ethtool_std_stats.sh new file mode 100755 index 000000000000..c085d2a4c989 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ethtool_std_stats.sh @@ -0,0 +1,206 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +#shellcheck disable=SC2034 # SC does not see the global variables +#shellcheck disable=SC2317,SC2329 # unused functions + +ALL_TESTS=" + test_eth_ctrl_stats + test_eth_mac_stats + test_pause_stats +" +: "${DRIVER_TEST_CONFORMANT:=yes}" +STABLE_MAC_ADDRS=yes +NUM_NETIFS=2 +lib_dir=$(dirname "$0") +# shellcheck source=./../../../net/forwarding/lib.sh +source "$lib_dir"/../../../net/forwarding/lib.sh +# shellcheck source=./../../../kselftest/ktap_helpers.sh +source "$lib_dir"/../../../kselftest/ktap_helpers.sh + +UINT32_MAX=$((2**32 - 1)) +SUBTESTS=0 +TEST_NAME=$(basename "$0" .sh) + +traffic_test() +{ + local iface=$1; shift + local neigh=$1; shift + local num_tx=$1; shift + local pkt_format="$1"; shift + local -a counters=("$@") + local int grp cnt target exact_check + local before after delta + local num_rx=$((num_tx * 2)) + local xfail_message + local src="aggregate" + local i + + for i in "${!counters[@]}"; do + read -r int grp cnt target exact_check xfail_message \ + <<< "${counters[$i]}" + + before[i]=$(ethtool_std_stats_get "$int" "$grp" "$cnt" "$src") + done + + # shellcheck disable=SC2086 # needs split options + run_on "$iface" "$MZ" "$iface" -q -c "$num_tx" $pkt_format + + # shellcheck disable=SC2086 # needs split options + run_on "$neigh" "$MZ" "$neigh" -q -c "$num_rx" $pkt_format + + for i in "${!counters[@]}"; do + read -r int grp cnt target exact_check xfail_message \ + <<< "${counters[$i]}" + + after[i]=$(ethtool_std_stats_get "$int" "$grp" "$cnt" "$src") + if [[ "${after[$i]}" == "null" ]]; then + ktap_test_skip "$TEST_NAME.$grp-$cnt" + continue; + fi + + delta=$((after[i] - before[i])) + + if [ "$exact_check" -ne 0 ]; then + [ "$delta" -eq "$target" ] + else + [ "$delta" -ge "$target" ] && \ + [ "$delta" -le "$UINT32_MAX" ] + fi + err="$?" + + if [[ $err != 0 ]] && [[ -n $xfail_message ]]; then + ktap_print_msg "$xfail_message" + ktap_test_xfail "$TEST_NAME.$grp-$cnt" + continue; + fi + + if [[ $err != 0 ]]; then + ktap_print_msg "$grp-$cnt is not valid on $int (expected $target, got $delta)" + ktap_test_fail "$TEST_NAME.$grp-$cnt" + else + ktap_test_pass "$TEST_NAME.$grp-$cnt" + fi + done +} + +test_eth_ctrl_stats() +{ + local pkt_format="-a own -b bcast 88:08 -p 64" + local num_pkts=1000 + local -a counters + + counters=("$h1 eth-ctrl MACControlFramesTransmitted $num_pkts 0") + traffic_test "$h1" "$h2" "$num_pkts" "$pkt_format" \ + "${counters[@]}" + + counters=("$h1 eth-ctrl MACControlFramesReceived $num_pkts 0") + traffic_test "$h2" "$h1" "$num_pkts" "$pkt_format" \ + "${counters[@]}" +} +SUBTESTS=$((SUBTESTS + 2)) + +test_eth_mac_stats() +{ + local pkt_size=100 + local pkt_size_fcs=$((pkt_size + 4)) + local bcast_pkt_format="-a own -b bcast -p $pkt_size" + local mcast_pkt_format="-a own -b 01:00:5E:00:00:01 -p $pkt_size" + local num_pkts=2000 + local octets=$((pkt_size_fcs * num_pkts)) + local -a counters error_cnt collision_cnt + + # Error counters should be exactly zero + counters=("$h1 eth-mac FrameCheckSequenceErrors 0 1" + "$h1 eth-mac AlignmentErrors 0 1" + "$h1 eth-mac FramesLostDueToIntMACXmitError 0 1" + "$h1 eth-mac CarrierSenseErrors 0 1" + "$h1 eth-mac FramesLostDueToIntMACRcvError 0 1" + "$h1 eth-mac InRangeLengthErrors 0 1" + "$h1 eth-mac OutOfRangeLengthField 0 1" + "$h1 eth-mac FrameTooLongErrors 0 1" + "$h1 eth-mac FramesAbortedDueToXSColls 0 1") + traffic_test "$h1" "$h2" "$num_pkts" "$bcast_pkt_format" \ + "${counters[@]}" + + # Collision related counters should also be zero + counters=("$h1 eth-mac SingleCollisionFrames 0 1" + "$h1 eth-mac MultipleCollisionFrames 0 1" + "$h1 eth-mac FramesWithDeferredXmissions 0 1" + "$h1 eth-mac LateCollisions 0 1" + "$h1 eth-mac FramesWithExcessiveDeferral 0 1") + traffic_test "$h1" "$h2" "$num_pkts" "$bcast_pkt_format" \ + "${counters[@]}" + + counters=("$h1 eth-mac BroadcastFramesXmittedOK $num_pkts 0" + "$h1 eth-mac OctetsTransmittedOK $octets 0") + traffic_test "$h1" "$h2" "$num_pkts" "$bcast_pkt_format" \ + "${counters[@]}" + + counters=("$h1 eth-mac BroadcastFramesReceivedOK $num_pkts 0" + "$h1 eth-mac OctetsReceivedOK $octets 0") + traffic_test "$h2" "$h1" "$num_pkts" "$bcast_pkt_format" \ + "${counters[@]}" + + counters=("$h1 eth-mac FramesTransmittedOK $num_pkts 0" + "$h1 eth-mac MulticastFramesXmittedOK $num_pkts 0") + traffic_test "$h1" "$h2" "$num_pkts" "$mcast_pkt_format" \ + "${counters[@]}" + + counters=("$h1 eth-mac FramesReceivedOK $num_pkts 0" + "$h1 eth-mac MulticastFramesReceivedOK $num_pkts 0") + traffic_test "$h2" "$h1" "$num_pkts" "$mcast_pkt_format" \ + "${counters[@]}" +} +SUBTESTS=$((SUBTESTS + 22)) + +test_pause_stats() +{ + local pkt_format="-a own -b 01:80:c2:00:00:01 88:08:00:01:00:01" + local xfail_message="software sent pause frames not detected" + local num_pkts=2000 + local -a counters + local int + local i + + # Check that there is pause frame support + for ((i = 1; i <= NUM_NETIFS; ++i)); do + int="${NETIFS[p$i]}" + if ! run_on "$int" ethtool -I --json -a "$int" > /dev/null 2>&1; then + ktap_test_skip "$TEST_NAME.tx_pause_frames" + ktap_test_skip "$TEST_NAME.rx_pause_frames" + return + fi + done + + counters=("$h1 pause tx_pause_frames $num_pkts 0 $xfail_message") + traffic_test "$h1" "$h2" "$num_pkts" "$pkt_format" \ + "${counters[@]}" + + counters=("$h1 pause rx_pause_frames $num_pkts 0") + traffic_test "$h2" "$h1" "$num_pkts" "$pkt_format" \ + "${counters[@]}" +} +SUBTESTS=$((SUBTESTS + 2)) + +setup_prepare() +{ + local iface + + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + h2_mac=$(mac_get "$h2") +} + +ktap_print_header +ktap_set_plan $SUBTESTS + +check_ethtool_counter_group_support +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +ktap_finished diff --git a/tools/testing/selftests/drivers/net/hw/gro_hw.py b/tools/testing/selftests/drivers/net/hw/gro_hw.py new file mode 100755 index 000000000000..10e08b22ee0e --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/gro_hw.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +HW GRO tests focusing on device machinery like stats, rather than protocol +processing. +""" + +import glob +import re + +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import ksft_eq, ksft_ge, ksft_variants +from lib.py import NetDrvEpEnv, NetdevFamily +from lib.py import KsftSkipEx +from lib.py import bkg, cmd, defer, ethtool, ip + + +# gro.c uses hardcoded DPORT=8000 +GRO_DPORT = 8000 + + +def _get_queue_stats(cfg, queue_id): + """Get stats for a specific Rx queue.""" + cfg.wait_hw_stats_settle() + data = cfg.netnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]}, + dump=True) + for q in data: + if q.get('queue-type') == 'rx' and q.get('queue-id') == queue_id: + return q + return {} + + +def _resolve_dmac(cfg, ipver): + """Find the destination MAC address for sending packets.""" + attr = "dmac" + ipver + if hasattr(cfg, attr): + return getattr(cfg, attr) + + route = ip(f"-{ipver} route get {cfg.addr_v[ipver]}", + json=True, host=cfg.remote)[0] + gw = route.get("gateway") + if not gw: + setattr(cfg, attr, cfg.dev['address']) + return getattr(cfg, attr) + + cmd(f"ping -c1 -W0 -I{cfg.remote_ifname} {gw}", host=cfg.remote) + neigh = ip(f"neigh get {gw} dev {cfg.remote_ifname}", + json=True, host=cfg.remote)[0] + setattr(cfg, attr, neigh['lladdr']) + return getattr(cfg, attr) + + +def _setup_isolated_queue(cfg): + """Set up an isolated queue for testing using ntuple filter. + + Remove queue 1 from the default RSS context and steer test traffic to it. + """ + test_queue = 1 + + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftSkipEx(f"Need at least 2 queues, have {qcnt}") + + # Remove queue 1 from default RSS context by setting its weight to 0 + weights = ["1"] * qcnt + weights[test_queue] = "0" + ethtool(f"-X {cfg.ifname} weight " + " ".join(weights)) + defer(ethtool, f"-X {cfg.ifname} default") + + # Set up ntuple filter to steer our test traffic to the isolated queue + flow = f"flow-type tcp{cfg.addr_ipver} " + flow += f"dst-ip {cfg.addr} dst-port {GRO_DPORT} action {test_queue}" + output = ethtool(f"-N {cfg.ifname} {flow}").stdout + ntuple_id = int(output.split()[-1]) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + return test_queue + + +def _run_gro_test(cfg, test_name, num_flows=None, ignore_fail=False, + order_check=False): + """Run gro binary with given test and return output.""" + if not hasattr(cfg, "bin_remote"): + cfg.bin_local = cfg.net_lib_dir / "gro" + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + ipver = cfg.addr_ipver + protocol = f"--ipv{ipver}" + dmac = _resolve_dmac(cfg, ipver) + + base_args = [ + protocol, + f"--dmac {dmac}", + f"--smac {cfg.remote_dev['address']}", + f"--daddr {cfg.addr}", + f"--saddr {cfg.remote_addr_v[ipver]}", + f"--test {test_name}", + ] + if num_flows: + base_args.append(f"--num-flows {num_flows}") + if order_check: + base_args.append("--order-check") + + args = " ".join(base_args) + + rx_cmd = f"{cfg.bin_local} {args} --rx --iface {cfg.ifname}" + tx_cmd = f"{cfg.bin_remote} {args} --iface {cfg.remote_ifname}" + + with bkg(rx_cmd, ksft_ready=True, exit_wait=True, fail=False) as rx_proc: + cmd(tx_cmd, host=cfg.remote) + + if not ignore_fail: + ksft_eq(rx_proc.ret, 0) + if rx_proc.ret != 0: + ksft_pr(rx_proc) + + return rx_proc.stdout + + +def _require_hw_gro_stats(cfg, queue_id): + """Check if device reports HW GRO stats for the queue.""" + stats = _get_queue_stats(cfg, queue_id) + required = ['rx-packets', 'rx-hw-gro-packets', 'rx-hw-gro-wire-packets'] + for stat in required: + if stat not in stats: + raise KsftSkipEx(f"Driver does not report '{stat}' via qstats") + + +def _set_ethtool_feat(cfg, current, feats): + """Set ethtool features with defer to restore original state.""" + s2n = {True: "on", False: "off"} + + new = ["-K", cfg.ifname] + old = ["-K", cfg.ifname] + no_change = True + for name, state in feats.items(): + new += [name, s2n[state]] + old += [name, s2n[current[name]["active"]]] + + if current[name]["active"] != state: + no_change = False + if current[name]["fixed"]: + raise KsftSkipEx(f"Device does not support {name}") + if no_change: + return + + eth_cmd = ethtool(" ".join(new)) + defer(ethtool, " ".join(old)) + + # If ethtool printed something kernel must have modified some features + if eth_cmd.stdout: + ksft_pr(eth_cmd) + + +def _setup_hw_gro(cfg): + """Enable HW GRO on the device, disabling SW GRO.""" + feat = ethtool(f"-k {cfg.ifname}", json=True)[0] + + # Try to disable SW GRO and enable HW GRO + _set_ethtool_feat(cfg, feat, + {"generic-receive-offload": False, + "rx-gro-hw": True, + "large-receive-offload": False}) + + # Some NICs treat HW GRO as a GRO sub-feature so disabling GRO + # will also clear HW GRO. Use a hack of installing XDP generic + # to skip SW GRO, even when enabled. + feat = ethtool(f"-k {cfg.ifname}", json=True)[0] + if not feat["rx-gro-hw"]["active"]: + ksft_pr("Driver clears HW GRO when SW GRO is cleared, using generic XDP workaround") + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" + ip(f"link set dev {cfg.ifname} xdpgeneric obj {prog} sec xdp") + defer(ip, f"link set dev {cfg.ifname} xdpgeneric off") + + # Attaching XDP may change features, fetch the latest state + feat = ethtool(f"-k {cfg.ifname}", json=True)[0] + + _set_ethtool_feat(cfg, feat, + {"generic-receive-offload": True, + "rx-gro-hw": True, + "large-receive-offload": False}) + + +def _check_gro_stats(cfg, test_queue, stats_before, + expect_rx, expect_gro, expect_wire): + """Validate GRO stats against expected values.""" + stats_after = _get_queue_stats(cfg, test_queue) + + rx_delta = (stats_after.get('rx-packets', 0) - + stats_before.get('rx-packets', 0)) + gro_delta = (stats_after.get('rx-hw-gro-packets', 0) - + stats_before.get('rx-hw-gro-packets', 0)) + wire_delta = (stats_after.get('rx-hw-gro-wire-packets', 0) - + stats_before.get('rx-hw-gro-wire-packets', 0)) + + ksft_eq(rx_delta, expect_rx, comment="rx-packets") + ksft_eq(gro_delta, expect_gro, comment="rx-hw-gro-packets") + ksft_eq(wire_delta, expect_wire, comment="rx-hw-gro-wire-packets") + + +def test_gro_stats_single(cfg): + """ + Test that a single packet doesn't affect GRO stats. + + Send a single packet that cannot be coalesced (nothing to coalesce with). + GRO stats should not increase since no coalescing occurred. + rx-packets should increase by 2 (1 data + 1 FIN). + """ + _setup_hw_gro(cfg) + + test_queue = _setup_isolated_queue(cfg) + _require_hw_gro_stats(cfg, test_queue) + + stats_before = _get_queue_stats(cfg, test_queue) + + _run_gro_test(cfg, "single") + + # 1 data + 1 FIN = 2 rx-packets, no coalescing + _check_gro_stats(cfg, test_queue, stats_before, + expect_rx=2, expect_gro=0, expect_wire=0) + + +def test_gro_stats_full(cfg): + """ + Test GRO stats when overwhelming HW GRO capacity. + + Send 500 flows to exceed HW GRO flow capacity on a single queue. + This should result in some packets not being coalesced. + Validate that qstats match what gro.c observed. + """ + _setup_hw_gro(cfg) + + test_queue = _setup_isolated_queue(cfg) + _require_hw_gro_stats(cfg, test_queue) + + num_flows = 500 + stats_before = _get_queue_stats(cfg, test_queue) + + # Run capacity test - will likely fail because not all packets coalesce + output = _run_gro_test(cfg, "capacity", num_flows=num_flows, + ignore_fail=True) + + # Parse gro.c output: "STATS: received=X wire=Y coalesced=Z" + match = re.search(r'STATS: received=(\d+) wire=(\d+) coalesced=(\d+)', + output) + if not match: + raise KsftSkipEx(f"Could not parse gro.c output: {output}") + + rx_frames = int(match.group(2)) + gro_coalesced = int(match.group(3)) + + ksft_ge(gro_coalesced, 1, + comment="At least some packets should coalesce") + + # received + 1 FIN, coalesced super-packets, coalesced * 2 wire packets + _check_gro_stats(cfg, test_queue, stats_before, + expect_rx=rx_frames + 1, + expect_gro=gro_coalesced, + expect_wire=gro_coalesced * 2) + + +@ksft_variants([4, 32, 512]) +def test_gro_order(cfg, num_flows): + """ + Test that HW GRO preserves packet ordering between flows. + + Packets may get delayed until the aggregate is released, + but reordering between aggregates and packet terminating + the aggregate and normal packets should not happen. + + Note that this test is stricter than truly required. + Reordering packets between flows should not cause issues. + This test will also fail if traffic is run over an ECMP fabric. + """ + _setup_hw_gro(cfg) + _setup_isolated_queue(cfg) + + _run_gro_test(cfg, "capacity", num_flows=num_flows, order_check=True) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.netnl = NetdevFamily() + ksft_run([test_gro_stats_single, + test_gro_stats_full, + test_gro_order], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c new file mode 100644 index 000000000000..240d13dbc54e --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c @@ -0,0 +1,512 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <assert.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <arpa/inet.h> +#include <linux/mman.h> +#include <linux/errqueue.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/wait.h> + +#include <liburing.h> + +#define SKIP_CODE 42 + +struct t_io_uring_zcrx_ifq_reg { + __u32 if_idx; + __u32 if_rxq; + __u32 rq_entries; + __u32 flags; + + __u64 area_ptr; /* pointer to struct io_uring_zcrx_area_reg */ + __u64 region_ptr; /* struct io_uring_region_desc * */ + + struct io_uring_zcrx_offsets offsets; + __u32 zcrx_id; + __u32 rx_buf_len; + __u64 __resv[3]; +}; + +static long page_size; +#define AREA_SIZE (8192 * page_size) +#define SEND_SIZE (512 * 4096) +#define min(a, b) \ + ({ \ + typeof(a) _a = (a); \ + typeof(b) _b = (b); \ + _a < _b ? _a : _b; \ + }) +#define min_t(t, a, b) \ + ({ \ + t _ta = (a); \ + t _tb = (b); \ + min(_ta, _tb); \ + }) + +#define ALIGN_UP(v, align) (((v) + (align) - 1) & ~((align) - 1)) + +static int cfg_server; +static int cfg_client; +static int cfg_port = 8000; +static int cfg_payload_len; +static const char *cfg_ifname; +static int cfg_queue_id = -1; +static bool cfg_oneshot; +static int cfg_oneshot_recvs; +static int cfg_send_size = SEND_SIZE; +static struct sockaddr_in6 cfg_addr; +static unsigned int cfg_rx_buf_len; +static bool cfg_dry_run; + +static char *payload; +static void *area_ptr; +static void *ring_ptr; +static size_t ring_size; +static struct io_uring_zcrx_rq rq_ring; +static unsigned long area_token; +static int connfd; +static bool stop; +static size_t received; + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) +{ + int ret; + + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + + ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); + if (ret != 1) { + /* fallback to plain IPv4 */ + ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); + if (ret != 1) + return -1; + + /* add ::ffff prefix */ + sin6->sin6_addr.s6_addr32[0] = 0; + sin6->sin6_addr.s6_addr32[1] = 0; + sin6->sin6_addr.s6_addr16[4] = 0; + sin6->sin6_addr.s6_addr16[5] = 0xffff; + } + + return 0; +} + +static inline size_t get_refill_ring_size(unsigned int rq_entries) +{ + size_t size; + + ring_size = rq_entries * sizeof(struct io_uring_zcrx_rqe); + /* add space for the header (head/tail/etc.) */ + ring_size += page_size; + return ALIGN_UP(ring_size, page_size); +} + +static void setup_zcrx(struct io_uring *ring) +{ + unsigned int ifindex; + unsigned int rq_entries = 4096; + int ret; + + ifindex = if_nametoindex(cfg_ifname); + if (!ifindex) + error(1, 0, "bad interface name: %s", cfg_ifname); + + if (cfg_rx_buf_len && cfg_rx_buf_len != page_size) { + area_ptr = mmap(NULL, + AREA_SIZE, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | + MAP_HUGETLB | MAP_HUGE_2MB, + -1, + 0); + if (area_ptr == MAP_FAILED) { + printf("Can't allocate huge pages\n"); + exit(SKIP_CODE); + } + } else { + area_ptr = mmap(NULL, + AREA_SIZE, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + 0, + 0); + if (area_ptr == MAP_FAILED) + error(1, 0, "mmap(): zero copy area"); + } + + ring_size = get_refill_ring_size(rq_entries); + ring_ptr = mmap(NULL, + ring_size, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + 0, + 0); + + struct io_uring_region_desc region_reg = { + .size = ring_size, + .user_addr = (__u64)(unsigned long)ring_ptr, + .flags = IORING_MEM_REGION_TYPE_USER, + }; + + struct io_uring_zcrx_area_reg area_reg = { + .addr = (__u64)(unsigned long)area_ptr, + .len = AREA_SIZE, + .flags = 0, + }; + + struct t_io_uring_zcrx_ifq_reg reg = { + .if_idx = ifindex, + .if_rxq = cfg_queue_id, + .rq_entries = rq_entries, + .area_ptr = (__u64)(unsigned long)&area_reg, + .region_ptr = (__u64)(unsigned long)®ion_reg, + .rx_buf_len = cfg_rx_buf_len, + }; + + ret = io_uring_register_ifq(ring, (void *)®); + if (cfg_rx_buf_len && (ret == -EINVAL || ret == -EOPNOTSUPP || + ret == -ERANGE)) { + printf("Large chunks are not supported %i\n", ret); + exit(SKIP_CODE); + } else if (ret) { + error(1, 0, "io_uring_register_ifq(): %d", ret); + } + + rq_ring.khead = (unsigned int *)((char *)ring_ptr + reg.offsets.head); + rq_ring.ktail = (unsigned int *)((char *)ring_ptr + reg.offsets.tail); + rq_ring.rqes = (struct io_uring_zcrx_rqe *)((char *)ring_ptr + reg.offsets.rqes); + rq_ring.rq_tail = 0; + rq_ring.ring_entries = reg.rq_entries; + + area_token = area_reg.rq_area_token; +} + +static void add_accept(struct io_uring *ring, int sockfd) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(ring); + + io_uring_prep_accept(sqe, sockfd, NULL, NULL, 0); + sqe->user_data = 1; +} + +static void add_recvzc(struct io_uring *ring, int sockfd) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(ring); + + io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, 0, 0); + sqe->ioprio |= IORING_RECV_MULTISHOT; + sqe->user_data = 2; +} + +static void add_recvzc_oneshot(struct io_uring *ring, int sockfd, size_t len) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(ring); + + io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, len, 0); + sqe->ioprio |= IORING_RECV_MULTISHOT; + sqe->user_data = 2; +} + +static void process_accept(struct io_uring *ring, struct io_uring_cqe *cqe) +{ + if (cqe->res < 0) + error(1, 0, "accept()"); + if (connfd) + error(1, 0, "Unexpected second connection"); + + connfd = cqe->res; + if (cfg_oneshot) + add_recvzc_oneshot(ring, connfd, page_size); + else + add_recvzc(ring, connfd); +} + +static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe) +{ + unsigned rq_mask = rq_ring.ring_entries - 1; + struct io_uring_zcrx_cqe *rcqe; + struct io_uring_zcrx_rqe *rqe; + struct io_uring_sqe *sqe; + uint64_t mask; + char *data; + ssize_t n; + int i; + + if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs == 0) { + stop = true; + return; + } + + if (cqe->res < 0) + error(1, 0, "recvzc(): %d", cqe->res); + + if (cfg_oneshot) { + if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs) { + add_recvzc_oneshot(ring, connfd, page_size); + cfg_oneshot_recvs--; + } + } else if (!(cqe->flags & IORING_CQE_F_MORE)) { + add_recvzc(ring, connfd); + } + + rcqe = (struct io_uring_zcrx_cqe *)(cqe + 1); + + n = cqe->res; + mask = (1ULL << IORING_ZCRX_AREA_SHIFT) - 1; + data = (char *)area_ptr + (rcqe->off & mask); + + for (i = 0; i < n; i++) { + if (*(data + i) != payload[(received + i)]) + error(1, 0, "payload mismatch at %d", i); + } + received += n; + + rqe = &rq_ring.rqes[(rq_ring.rq_tail & rq_mask)]; + rqe->off = (rcqe->off & ~IORING_ZCRX_AREA_MASK) | area_token; + rqe->len = cqe->res; + io_uring_smp_store_release(rq_ring.ktail, ++rq_ring.rq_tail); +} + +static void server_loop(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + unsigned int count = 0; + unsigned int head; + int i, ret; + + io_uring_submit_and_wait(ring, 1); + + io_uring_for_each_cqe(ring, head, cqe) { + if (cqe->user_data == 1) + process_accept(ring, cqe); + else if (cqe->user_data == 2) + process_recvzc(ring, cqe); + else + error(1, 0, "unknown cqe"); + count++; + } + io_uring_cq_advance(ring, count); +} + +static void run_server(void) +{ + unsigned int flags = 0; + struct io_uring ring; + int fd, enable, ret; + uint64_t tstop; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, 0, "socket()"); + + enable = 1; + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int)); + if (ret < 0) + error(1, 0, "setsockopt(SO_REUSEADDR)"); + + ret = bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)); + if (ret < 0) + error(1, 0, "bind()"); + + if (listen(fd, 1024) < 0) + error(1, 0, "listen()"); + + flags |= IORING_SETUP_COOP_TASKRUN; + flags |= IORING_SETUP_SINGLE_ISSUER; + flags |= IORING_SETUP_DEFER_TASKRUN; + flags |= IORING_SETUP_SUBMIT_ALL; + flags |= IORING_SETUP_CQE32; + + io_uring_queue_init(512, &ring, flags); + + setup_zcrx(&ring); + if (cfg_dry_run) + return; + + add_accept(&ring, fd); + + tstop = gettimeofday_ms() + 5000; + while (!stop && gettimeofday_ms() < tstop) + server_loop(&ring); + + if (!stop) + error(1, 0, "test failed\n"); +} + +static void run_client(void) +{ + ssize_t to_send = cfg_send_size; + ssize_t sent = 0; + ssize_t chunk, res; + int fd; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, 0, "socket()"); + + if (connect(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr))) + error(1, 0, "connect()"); + + while (to_send) { + void *src = &payload[sent]; + + chunk = min_t(ssize_t, cfg_payload_len, to_send); + res = send(fd, src, chunk, 0); + if (res < 0) + error(1, 0, "send(): %zd", sent); + sent += res; + to_send -= res; + } + + close(fd); +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s (-4|-6) (-s|-c) -h<server_ip> -p<port> " + "-l<payload_size> -i<ifname> -q<rxq_id>", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + const int max_payload_len = SEND_SIZE - + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) - + 40 /* max tcp options */; + struct sockaddr_in6 *addr6 = (void *) &cfg_addr; + char *addr = NULL; + int ret; + int c; + + if (argc <= 1) + usage(argv[0]); + cfg_payload_len = max_payload_len; + + while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:x:d")) != -1) { + switch (c) { + case 's': + if (cfg_client) + error(1, 0, "Pass one of -s or -c"); + cfg_server = 1; + break; + case 'c': + if (cfg_server) + error(1, 0, "Pass one of -s or -c"); + cfg_client = 1; + break; + case 'h': + addr = optarg; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 'l': + cfg_payload_len = strtoul(optarg, NULL, 0); + break; + case 'i': + cfg_ifname = optarg; + break; + case 'q': + cfg_queue_id = strtoul(optarg, NULL, 0); + break; + case 'o': { + cfg_oneshot = true; + cfg_oneshot_recvs = strtoul(optarg, NULL, 0); + break; + } + case 'z': + cfg_send_size = strtoul(optarg, NULL, 0); + break; + case 'x': + cfg_rx_buf_len = page_size * strtoul(optarg, NULL, 0); + break; + case 'd': + cfg_dry_run = true; + break; + } + } + + if (cfg_server && addr) + error(1, 0, "Receiver cannot have -h specified"); + + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + addr6->sin6_addr = in6addr_any; + if (addr) { + ret = parse_address(addr, cfg_port, addr6); + if (ret) + error(1, 0, "receiver address parse error: %s", addr); + } + + if (cfg_payload_len > max_payload_len) + error(1, 0, "-l: payload exceeds max (%d)", max_payload_len); +} + +int main(int argc, char **argv) +{ + const char *cfg_test = argv[argc - 1]; + int i; + + page_size = sysconf(_SC_PAGESIZE); + if (page_size < 0) + return 1; + + if (posix_memalign((void **)&payload, page_size, SEND_SIZE)) + return 1; + + parse_opts(argc, argv); + + for (i = 0; i < SEND_SIZE; i++) + payload[i] = 'a' + (i % 26); + + if (cfg_server) + run_server(); + else if (cfg_client) + run_client(); + + return 0; +} diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py new file mode 100755 index 000000000000..e81724cb5542 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import re +import time +from os import path +from lib.py import ksft_run, ksft_exit, KsftSkipEx, ksft_variants, KsftNamedVariant +from lib.py import NetDrvEpEnv +from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen +from lib.py import EthtoolFamily, NetdevFamily + +SKIP_CODE = 42 + + +def mp_clear_wait(cfg): + """Wait for io_uring memory providers to clear from all device queues.""" + deadline = time.time() + 5 + while time.time() < deadline: + queues = cfg.netnl.queue_get({'ifindex': cfg.ifindex}, dump=True) + if not any('io-uring' in q for q in queues): + return + time.sleep(0.1) + raise TimeoutError("Timed out waiting for memory provider to clear") + + +def create_rss_ctx(cfg): + output = ethtool(f"-X {cfg.ifname} context new start {cfg.target} equal 1").stdout + values = re.search(r'New RSS context is (\d+)', output).group(1) + return int(values) + + +def set_flow_rule(cfg): + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {cfg.port} action {cfg.target}").stdout + values = re.search(r'ID (\d+)', output).group(1) + return int(values) + + +def set_flow_rule_rss(cfg, rss_ctx_id): + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {cfg.port} context {rss_ctx_id}").stdout + values = re.search(r'ID (\d+)', output).group(1) + return int(values) + + +def single(cfg): + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + channels = channels['combined-count'] + if channels < 2: + raise KsftSkipEx('Test requires NETIF with at least 2 combined channels') + + rings = cfg.ethnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + rx_rings = rings['rx'] + hds_thresh = rings.get('hds-thresh', 0) + + cfg.ethnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'enabled', + 'hds-thresh': 0, + 'rx': 64}) + defer(cfg.ethnl.rings_set, {'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown', + 'hds-thresh': hds_thresh, + 'rx': rx_rings}) + defer(mp_clear_wait, cfg) + + cfg.target = channels - 1 + ethtool(f"-X {cfg.ifname} equal {cfg.target}") + defer(ethtool, f"-X {cfg.ifname} default") + + flow_rule_id = set_flow_rule(cfg) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") + + +def rss(cfg): + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + channels = channels['combined-count'] + if channels < 2: + raise KsftSkipEx('Test requires NETIF with at least 2 combined channels') + + rings = cfg.ethnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + rx_rings = rings['rx'] + hds_thresh = rings.get('hds-thresh', 0) + + cfg.ethnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'enabled', + 'hds-thresh': 0, + 'rx': 64}) + defer(cfg.ethnl.rings_set, {'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown', + 'hds-thresh': hds_thresh, + 'rx': rx_rings}) + defer(mp_clear_wait, cfg) + + cfg.target = channels - 1 + ethtool(f"-X {cfg.ifname} equal {cfg.target}") + defer(ethtool, f"-X {cfg.ifname} default") + + rss_ctx_id = create_rss_ctx(cfg) + defer(ethtool, f"-X {cfg.ifname} delete context {rss_ctx_id}") + + flow_rule_id = set_flow_rule_rss(cfg, rss_ctx_id) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") + + +@ksft_variants([ + KsftNamedVariant("single", single), + KsftNamedVariant("rss", rss), +]) +def test_zcrx(cfg, setup) -> None: + cfg.require_ipver('6') + + setup(cfg) + rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.target}" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {cfg.port} -l 12840" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(cfg.port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) + + +@ksft_variants([ + KsftNamedVariant("single", single), + KsftNamedVariant("rss", rss), +]) +def test_zcrx_oneshot(cfg, setup) -> None: + cfg.require_ipver('6') + + setup(cfg) + rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.target} -o 4" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {cfg.port} -l 4096 -z 16384" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(cfg.port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) + + +def test_zcrx_large_chunks(cfg) -> None: + """Test zcrx with large buffer chunks.""" + + cfg.require_ipver('6') + + hp_file = "/proc/sys/vm/nr_hugepages" + with open(hp_file, 'r+', encoding='utf-8') as f: + nr_hugepages = int(f.read().strip()) + if nr_hugepages < 64: + f.seek(0) + f.write("64") + defer(lambda: open(hp_file, 'w', encoding='utf-8').write(str(nr_hugepages))) + + single(cfg) + rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.target} -x 2" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {cfg.port} -l 12840" + + probe = cmd(rx_cmd + " -d", fail=False) + if probe.ret == SKIP_CODE: + raise KsftSkipEx(probe.stdout.strip()) + + mp_clear_wait(cfg) + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(cfg.port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) + + +def main() -> None: + with NetDrvEpEnv(__file__) as cfg: + cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx") + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + cfg.port = rand_port() + ksft_run(globs=globals(), cases=[test_zcrx, test_zcrx_oneshot, + test_zcrx_large_chunks], args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/irq.py b/tools/testing/selftests/drivers/net/hw/irq.py new file mode 100755 index 000000000000..0699d6a8b4e2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/irq.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_ge, ksft_eq +from lib.py import KsftSkipEx +from lib.py import ksft_disruptive +from lib.py import EthtoolFamily, NetdevFamily +from lib.py import NetDrvEnv +from lib.py import cmd, ip, defer + + +def read_affinity(irq) -> str: + with open(f'/proc/irq/{irq}/smp_affinity', 'r') as fp: + return fp.read().lstrip("0,").strip() + + +def write_affinity(irq, what) -> str: + if what != read_affinity(irq): + with open(f'/proc/irq/{irq}/smp_affinity', 'w') as fp: + fp.write(what) + + +def check_irqs_reported(cfg) -> None: + """ Check that device reports IRQs for NAPI instances """ + napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True) + irqs = sum(['irq' in x for x in napis]) + + ksft_ge(irqs, 1) + ksft_eq(irqs, len(napis)) + + +def _check_reconfig(cfg, reconfig_cb) -> None: + napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True) + for n in reversed(napis): + if 'irq' in n: + break + else: + raise KsftSkipEx(f"Device has no NAPI with IRQ attribute (#napis: {len(napis)}") + + old = read_affinity(n['irq']) + # pick an affinity that's not the current one + new = "3" if old != "3" else "5" + write_affinity(n['irq'], new) + defer(write_affinity, n['irq'], old) + + reconfig_cb(cfg) + + ksft_eq(read_affinity(n['irq']), new, comment="IRQ affinity changed after reconfig") + + +def check_reconfig_queues(cfg) -> None: + def reconfig(cfg) -> None: + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + if channels['combined-count'] == 0: + rx_type = 'rx' + else: + rx_type = 'combined' + cur_queue_cnt = channels[f'{rx_type}-count'] + max_queue_cnt = channels[f'{rx_type}-max'] + + cmd(f"ethtool -L {cfg.ifname} {rx_type} 1") + cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}") + cmd(f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}") + + _check_reconfig(cfg, reconfig) + + +def check_reconfig_xdp(cfg) -> None: + def reconfig(cfg) -> None: + ip(f"link set dev %s xdp obj %s sec xdp" % + (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o")) + ip(f"link set dev %s xdp off" % cfg.ifname) + + _check_reconfig(cfg, reconfig) + + +@ksft_disruptive +def check_down(cfg) -> None: + def reconfig(cfg) -> None: + ip("link set dev %s down" % cfg.ifname) + ip("link set dev %s up" % cfg.ifname) + + _check_reconfig(cfg, reconfig) + + +def main() -> None: + with NetDrvEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + + ksft_run([check_irqs_reported, check_reconfig_queues, + check_reconfig_xdp, check_down], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index 399789a9676a..84a4dab6c649 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -1,5 +1,14 @@ # SPDX-License-Identifier: GPL-2.0 +""" +Driver test environment (hardware-only tests). +NetDrvEnv and NetDrvEpEnv are the main environment classes. +NetDrvContEnv extends NetDrvEpEnv with netkit container support. +Former is for local host only tests, latter creates / connects +to a remote endpoint. See NIPA wiki for more information about +running and writing driver tests. +""" + import sys from pathlib import Path @@ -7,11 +16,41 @@ KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve() try: sys.path.append(KSFT_DIR.as_posix()) - from net.lib.py import * - from drivers.net.lib.py import * - from .linkconfig import LinkConfig + + # Import one by one to avoid pylint false positives + from net.lib.py import NetNS, NetNSEnter, NetdevSimDev + from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ + NlError, RtnlFamily, DevlinkFamily, PSPFamily, Netlink + from net.lib.py import CmdExitFailure + from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ + fd_read_timeout, ip, rand_port, rand_ports, wait_port_listen, \ + wait_file, tool + from net.lib.py import bpf_map_set, bpf_map_dump, bpf_prog_map_ids + from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx + from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ + ksft_setup, ksft_variants, KsftNamedVariant + from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ + ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none + from drivers.net.lib.py import GenerateTraffic, Remote, Iperf3Runner + from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv, NetDrvContEnv + + __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev", + "EthtoolFamily", "NetdevFamily", "NetshaperFamily", + "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily", "Netlink", + "CmdExitFailure", + "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool", + "fd_read_timeout", "ip", "rand_port", "rand_ports", + "wait_port_listen", "wait_file", "tool", + "bpf_map_set", "bpf_map_dump", "bpf_prog_map_ids", + "KsftSkipEx", "KsftFailEx", "KsftXfailEx", + "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", + "ksft_setup", "ksft_variants", "KsftNamedVariant", + "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt", + "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt", + "ksft_not_none", "ksft_not_none", + "NetDrvEnv", "NetDrvEpEnv", "NetDrvContEnv", "GenerateTraffic", + "Remote", "Iperf3Runner"] except ModuleNotFoundError as e: - ksft_pr("Failed importing `net` library from kernel sources") - ksft_pr(str(e)) - ktap_result(True, comment="SKIP") + print("Failed importing `net` library from kernel sources") + print(str(e)) sys.exit(4) diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py b/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py deleted file mode 100644 index 79fde603cbbc..000000000000 --- a/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py +++ /dev/null @@ -1,222 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -from lib.py import cmd, ethtool, ip -from lib.py import ksft_pr, ksft_eq, KsftSkipEx -from typing import Optional -import re -import time -import json - -#The LinkConfig class is implemented to handle the link layer configurations. -#Required minimum ethtool version is 6.10 - -class LinkConfig: - """Class for handling the link layer configurations""" - def __init__(self, cfg: object) -> None: - self.cfg = cfg - self.partner_netif = self.get_partner_netif_name() - - """Get the initial link configuration of local interface""" - self.common_link_modes = self.get_common_link_modes() - - def get_partner_netif_name(self) -> Optional[str]: - partner_netif = None - try: - if not self.verify_link_up(): - return None - """Get partner interface name""" - partner_json_output = ip("addr show", json=True, host=self.cfg.remote) - for interface in partner_json_output: - for addr in interface.get('addr_info', []): - if addr.get('local') == self.cfg.remote_addr: - partner_netif = interface['ifname'] - ksft_pr(f"Partner Interface name: {partner_netif}") - if partner_netif is None: - ksft_pr("Unable to get the partner interface name") - except Exception as e: - print(f"Unexpected error occurred while getting partner interface name: {e}") - self.partner_netif = partner_netif - return partner_netif - - def verify_link_up(self) -> bool: - """Verify whether the local interface link is up""" - with open(f"/sys/class/net/{self.cfg.ifname}/operstate", "r") as fp: - link_state = fp.read().strip() - - if link_state == "down": - ksft_pr(f"Link state of interface {self.cfg.ifname} is DOWN") - return False - else: - return True - - def reset_interface(self, local: bool = True, remote: bool = True) -> bool: - ksft_pr("Resetting interfaces in local and remote") - if remote: - if self.verify_link_up(): - if self.partner_netif is not None: - ifname = self.partner_netif - link_up_cmd = f"ip link set up {ifname}" - link_down_cmd = f"ip link set down {ifname}" - reset_cmd = f"{link_down_cmd} && sleep 5 && {link_up_cmd}" - try: - cmd(reset_cmd, host=self.cfg.remote) - except Exception as e: - ksft_pr(f"Unexpected error occurred while resetting remote: {e}") - else: - ksft_pr("Partner interface not available") - if local: - ifname = self.cfg.ifname - link_up_cmd = f"ip link set up {ifname}" - link_down_cmd = f"ip link set down {ifname}" - reset_cmd = f"{link_down_cmd} && sleep 5 && {link_up_cmd}" - try: - cmd(reset_cmd) - except Exception as e: - ksft_pr(f"Unexpected error occurred while resetting local: {e}") - time.sleep(10) - if self.verify_link_up() and self.get_ethtool_field("link-detected"): - ksft_pr("Local and remote interfaces reset to original state") - return True - else: - ksft_pr("Error occurred after resetting interfaces. Link is DOWN.") - return False - - def set_speed_and_duplex(self, speed: str, duplex: str, autoneg: bool = True) -> bool: - """Set the speed and duplex state for the interface""" - autoneg_state = "on" if autoneg is True else "off" - process = None - try: - process = ethtool(f"--change {self.cfg.ifname} speed {speed} duplex {duplex} autoneg {autoneg_state}") - except Exception as e: - ksft_pr(f"Unexpected error occurred while setting speed/duplex: {e}") - if process is None or process.ret != 0: - return False - else: - ksft_pr(f"Speed: {speed} Mbps, Duplex: {duplex} set for Interface: {self.cfg.ifname}") - return True - - def verify_speed_and_duplex(self, expected_speed: str, expected_duplex: str) -> bool: - if not self.verify_link_up(): - return False - """Verifying the speed and duplex state for the interface""" - with open(f"/sys/class/net/{self.cfg.ifname}/speed", "r") as fp: - actual_speed = fp.read().strip() - with open(f"/sys/class/net/{self.cfg.ifname}/duplex", "r") as fp: - actual_duplex = fp.read().strip() - - ksft_eq(actual_speed, expected_speed) - ksft_eq(actual_duplex, expected_duplex) - return True - - def set_autonegotiation_state(self, state: str, remote: bool = False) -> bool: - common_link_modes = self.common_link_modes - speeds, duplex_modes = self.get_speed_duplex_values(self.common_link_modes) - speed = speeds[0] - duplex = duplex_modes[0] - if not speed or not duplex: - ksft_pr("No speed or duplex modes found") - return False - - speed_duplex_cmd = f"speed {speed} duplex {duplex}" if state == "off" else "" - if remote: - if not self.verify_link_up(): - return False - """Set the autonegotiation state for the partner""" - command = f"-s {self.partner_netif} {speed_duplex_cmd} autoneg {state}" - partner_autoneg_change = None - """Set autonegotiation state for interface in remote pc""" - try: - partner_autoneg_change = ethtool(command, host=self.cfg.remote) - except Exception as e: - ksft_pr(f"Unexpected error occurred while changing auto-neg in remote: {e}") - if partner_autoneg_change is None or partner_autoneg_change.ret != 0: - ksft_pr(f"Not able to set autoneg parameter for interface {self.partner_netif}.") - return False - ksft_pr(f"Autoneg set as {state} for {self.partner_netif}") - else: - """Set the autonegotiation state for the interface""" - try: - process = ethtool(f"-s {self.cfg.ifname} {speed_duplex_cmd} autoneg {state}") - if process.ret != 0: - ksft_pr(f"Not able to set autoneg parameter for interface {self.cfg.ifname}") - return False - except Exception as e: - ksft_pr(f"Unexpected error occurred while changing auto-neg in local: {e}") - return False - ksft_pr(f"Autoneg set as {state} for {self.cfg.ifname}") - return True - - def check_autoneg_supported(self, remote: bool = False) -> bool: - if not remote: - local_autoneg = self.get_ethtool_field("supports-auto-negotiation") - if local_autoneg is None: - ksft_pr(f"Unable to fetch auto-negotiation status for interface {self.cfg.ifname}") - """Return autoneg status of the local interface""" - return local_autoneg - else: - if not self.verify_link_up(): - raise KsftSkipEx("Link is DOWN") - """Check remote auto-negotiation support status""" - partner_autoneg = False - if self.partner_netif is not None: - partner_autoneg = self.get_ethtool_field("supports-auto-negotiation", remote=True) - if partner_autoneg is None: - ksft_pr(f"Unable to fetch auto-negotiation status for interface {self.partner_netif}") - return partner_autoneg - - def get_common_link_modes(self) -> set[str]: - common_link_modes = [] - """Populate common link modes""" - link_modes = self.get_ethtool_field("supported-link-modes") - partner_link_modes = self.get_ethtool_field("link-partner-advertised-link-modes") - if link_modes is None: - raise KsftSkipEx(f"Link modes not available for {self.cfg.ifname}") - if partner_link_modes is None: - raise KsftSkipEx(f"Partner link modes not available for {self.cfg.ifname}") - common_link_modes = set(link_modes) and set(partner_link_modes) - return common_link_modes - - def get_speed_duplex_values(self, link_modes: list[str]) -> tuple[list[str], list[str]]: - speed = [] - duplex = [] - """Check the link modes""" - for data in link_modes: - parts = data.split('/') - speed_value = re.match(r'\d+', parts[0]) - if speed_value: - speed.append(speed_value.group()) - else: - ksft_pr(f"No speed value found for interface {self.ifname}") - return None, None - duplex.append(parts[1].lower()) - return speed, duplex - - def get_ethtool_field(self, field: str, remote: bool = False) -> Optional[str]: - process = None - if not remote: - """Get the ethtool field value for the local interface""" - try: - process = ethtool(self.cfg.ifname, json=True) - except Exception as e: - ksft_pr("Required minimum ethtool version is 6.10") - ksft_pr(f"Unexpected error occurred while getting ethtool field in local: {e}") - return None - else: - if not self.verify_link_up(): - return None - """Get the ethtool field value for the remote interface""" - self.cfg.require_cmd("ethtool", remote=True) - if self.partner_netif is None: - ksft_pr(f"Partner interface name is unavailable.") - return None - try: - process = ethtool(self.partner_netif, json=True, host=self.cfg.remote) - except Exception as e: - ksft_pr("Required minimum ethtool version is 6.10") - ksft_pr(f"Unexpected error occurred while getting ethtool field in remote: {e}") - return None - json_data = process[0] - """Check if the field exist in the json data""" - if field not in json_data: - raise KsftSkipEx(f'Field {field} does not exist in the output of interface {json_data["ifname"]}') - return json_data[field] diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 19a6969643f4..e098d6534c3c 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -9,27 +9,37 @@ * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 * * On client: - * echo -n "hello\nworld" | nc -s <server IP> 5201 -p 5201 + * echo -n "hello\nworld" | \ + * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1 * - * Test data validation: + * Note this is compatible with regular netcat. i.e. the sender or receiver can + * be replaced with regular netcat to test the RX or TX path in isolation. + * + * Test data validation (devmem TCP on RX only): * * On server: * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 -v 7 * * On client: * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \ - * tr \\n \\0 | \ - * head -c 5G | \ + * head -c 1G | \ * nc <server IP> 5201 -p 5201 * + * Test data validation (devmem TCP on RX and TX, validation happens on RX): * - * Note this is compatible with regular netcat. i.e. the sender or receiver can - * be replaced with regular netcat to test the RX or TX path in isolation. + * On server: + * ncdevmem -s <server IP> [-c <client IP>] -l -p 5201 -v 8 -f eth1 + * + * On client: + * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06\\x07) | \ + * head -c 1M | \ + * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1 */ #define _GNU_SOURCE #define __EXPORTED_HEADERS__ #include <linux/uio.h> +#include <stdarg.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -38,19 +48,22 @@ #include <errno.h> #define __iovec_defined #include <fcntl.h> +#include <limits.h> #include <malloc.h> #include <error.h> +#include <poll.h> #include <arpa/inet.h> #include <sys/socket.h> #include <sys/mman.h> #include <sys/ioctl.h> #include <sys/syscall.h> +#include <sys/time.h> #include <linux/memfd.h> #include <linux/dma-buf.h> +#include <linux/errqueue.h> #include <linux/udmabuf.h> -#include <libmnl/libmnl.h> #include <linux/types.h> #include <linux/netlink.h> #include <linux/genetlink.h> @@ -71,6 +84,9 @@ #define MSG_SOCK_DEVMEM 0x2000000 #endif +#define MAX_IOV 1024 + +static size_t max_chunk; static char *server_ip; static char *client_ip; static char *port; @@ -80,6 +96,13 @@ static int num_queues = -1; static char *ifname; static unsigned int ifindex; static unsigned int dmabuf_id; +static uint32_t tx_dmabuf_id; +static int waittime_ms = 500; +static bool fail_on_linear; + +/* System state loaded by current_config_load() */ +#define MAX_FLOWS 8 +static int ntuple_ids[MAX_FLOWS] = { -1, -1, -1, -1, -1, -1, -1, -1, }; struct memory_buffer { int fd; @@ -93,10 +116,27 @@ struct memory_buffer { struct memory_provider { struct memory_buffer *(*alloc)(size_t size); void (*free)(struct memory_buffer *ctx); + void (*memcpy_to_device)(struct memory_buffer *dst, size_t off, + void *src, int n); void (*memcpy_from_device)(void *dst, struct memory_buffer *src, size_t off, int n); }; +static void pr_err(const char *fmt, ...) +{ + va_list args; + + fprintf(stderr, "%s: ", TEST_PREFIX); + + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + + if (errno != 0) + fprintf(stderr, ": %s", strerror(errno)); + fprintf(stderr, "\n"); +} + static struct memory_buffer *udmabuf_alloc(size_t size) { struct udmabuf_create create; @@ -105,27 +145,33 @@ static struct memory_buffer *udmabuf_alloc(size_t size) ctx = malloc(sizeof(*ctx)); if (!ctx) - error(1, ENOMEM, "malloc failed"); + return NULL; ctx->size = size; ctx->devfd = open("/dev/udmabuf", O_RDWR); - if (ctx->devfd < 0) - error(1, errno, - "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", - TEST_PREFIX); + if (ctx->devfd < 0) { + pr_err("[skip,no-udmabuf: Unable to access DMA buffer device file]"); + goto err_free_ctx; + } ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); - if (ctx->memfd < 0) - error(1, errno, "%s: [skip,no-memfd]\n", TEST_PREFIX); + if (ctx->memfd < 0) { + pr_err("[skip,no-memfd]"); + goto err_close_dev; + } ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK); - if (ret < 0) - error(1, errno, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); + if (ret < 0) { + pr_err("[skip,fcntl-add-seals]"); + goto err_close_memfd; + } ret = ftruncate(ctx->memfd, size); - if (ret == -1) - error(1, errno, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + if (ret == -1) { + pr_err("[FAIL,memfd-truncate]"); + goto err_close_memfd; + } memset(&create, 0, sizeof(create)); @@ -133,15 +179,29 @@ static struct memory_buffer *udmabuf_alloc(size_t size) create.offset = 0; create.size = size; ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create); - if (ctx->fd < 0) - error(1, errno, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX); + if (ctx->fd < 0) { + pr_err("[FAIL, create udmabuf]"); + goto err_close_fd; + } ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, ctx->fd, 0); - if (ctx->buf_mem == MAP_FAILED) - error(1, errno, "%s: [FAIL, map udmabuf]\n", TEST_PREFIX); + if (ctx->buf_mem == MAP_FAILED) { + pr_err("[FAIL, map udmabuf]"); + goto err_close_fd; + } return ctx; + +err_close_fd: + close(ctx->fd); +err_close_memfd: + close(ctx->memfd); +err_close_dev: + close(ctx->devfd); +err_free_ctx: + free(ctx); + return NULL; } static void udmabuf_free(struct memory_buffer *ctx) @@ -153,6 +213,20 @@ static void udmabuf_free(struct memory_buffer *ctx) free(ctx); } +static void udmabuf_memcpy_to_device(struct memory_buffer *dst, size_t off, + void *src, int n) +{ + struct dma_buf_sync sync = {}; + + sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE; + ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync); + + memcpy(dst->buf_mem + off, src, n); + + sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE; + ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync); +} + static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src, size_t off, int n) { @@ -170,6 +244,7 @@ static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src, static struct memory_provider udmabuf_memory_provider = { .alloc = udmabuf_alloc, .free = udmabuf_free, + .memcpy_to_device = udmabuf_memcpy_to_device, .memcpy_from_device = udmabuf_memcpy_from_device, }; @@ -184,21 +259,25 @@ static void print_nonzero_bytes(void *ptr, size_t size) putchar(p[i]); } -void validate_buffer(void *line, size_t size) +int validate_buffer(void *line, size_t size) { static unsigned char seed = 1; unsigned char *ptr = line; - int errors = 0; + unsigned char expected; + static int errors; size_t i; for (i = 0; i < size; i++) { - if (ptr[i] != seed) { + expected = seed ? seed : '\n'; + if (ptr[i] != expected) { fprintf(stderr, "Failed validation: expected=%u, actual=%u, index=%lu\n", - seed, ptr[i], i); + expected, ptr[i], i); errors++; - if (errors > 20) - error(1, 0, "validation failed."); + if (errors > 20) { + pr_err("validation failed"); + return -1; + } } seed++; if (seed == do_validation) @@ -206,6 +285,86 @@ void validate_buffer(void *line, size_t size) } fprintf(stdout, "Validated buffer\n"); + return 0; +} + +static int +__run_command(char *out, size_t outlen, const char *cmd, va_list args) +{ + char command[256]; + FILE *fp; + + vsnprintf(command, sizeof(command), cmd, args); + + fprintf(stderr, "Running: %s\n", command); + fp = popen(command, "r"); + if (!fp) + return -1; + if (out) { + size_t len; + + if (!fgets(out, outlen, fp)) + return -1; + + /* Remove trailing newline if present */ + len = strlen(out); + if (len && out[len - 1] == '\n') + out[len - 1] = '\0'; + } + return pclose(fp); +} + +static int run_command(const char *cmd, ...) +{ + va_list args; + int ret; + + va_start(args, cmd); + ret = __run_command(NULL, 0, cmd, args); + va_end(args); + + return ret; +} + +static int ethtool_add_flow(const char *format, ...) +{ + char local_output[256], cmd[256]; + const char *id_start; + int flow_idx, ret; + char *endptr; + long flow_id; + va_list args; + + for (flow_idx = 0; flow_idx < MAX_FLOWS; flow_idx++) + if (ntuple_ids[flow_idx] == -1) + break; + if (flow_idx == MAX_FLOWS) { + fprintf(stderr, "Error: too many flows\n"); + return -1; + } + + snprintf(cmd, sizeof(cmd), "ethtool -N %s %s", ifname, format); + + va_start(args, format); + ret = __run_command(local_output, sizeof(local_output), cmd, args); + va_end(args); + + if (ret != 0) + return ret; + + /* Extract the ID from the output */ + id_start = strstr(local_output, "Added rule with ID "); + if (!id_start) + return -1; + id_start += strlen("Added rule with ID "); + + flow_id = strtol(id_start, &endptr, 10); + if (endptr == id_start || flow_id < 0 || flow_id > INT_MAX) + return -1; + + fprintf(stderr, "Added flow rule with ID %ld\n", flow_id); + ntuple_ids[flow_idx] = flow_id; + return flow_id; } static int rxq_num(int ifindex) @@ -235,29 +394,17 @@ static int rxq_num(int ifindex) return num; } -#define run_command(cmd, ...) \ - ({ \ - char command[256]; \ - memset(command, 0, sizeof(command)); \ - snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \ - fprintf(stderr, "Running: %s\n", command); \ - system(command); \ - }) - -static int reset_flow_steering(void) -{ - /* Depending on the NIC, toggling ntuple off and on might not - * be allowed. Additionally, attempting to delete existing filters - * will fail if no filters are present. Therefore, do not enforce - * the exit status. - */ - - run_command("sudo ethtool -K %s ntuple off >&2", ifname); - run_command("sudo ethtool -K %s ntuple on >&2", ifname); - run_command( - "sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 ethtool -N %s delete >&2", - ifname, ifname); - return 0; +static void reset_flow_steering(void) +{ + int i; + + for (i = 0; i < MAX_FLOWS; i++) { + if (ntuple_ids[i] == -1) + continue; + run_command("ethtool -N %s delete %d", + ifname, ntuple_ids[i]); + ntuple_ids[i] = -1; + } } static const char *tcp_data_split_str(int val) @@ -274,7 +421,81 @@ static const char *tcp_data_split_str(int val) } } -static int configure_headersplit(bool on) +static struct ethtool_rings_get_rsp *get_ring_config(void) +{ + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ynl_error yerr; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return NULL; + } + + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + + ynl_sock_destroy(ys); + + return get_rsp; +} + +static void restore_ring_config(const struct ethtool_rings_get_rsp *config) +{ + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ethtool_rings_set_req *req; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + if (!config) + return; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return; + } + + req = ethtool_rings_set_req_alloc(); + ethtool_rings_set_req_set_header_dev_index(req, ifindex); + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN); + if (config->_present.hds_thresh) + ethtool_rings_set_req_set_hds_thresh(req, config->hds_thresh); + + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL restoring HDS cfg: %s\n", ys->err.msg); + + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + + /* use explicit value if UKNOWN didn't give us the previous */ + if (get_rsp->tcp_data_split != config->tcp_data_split) { + ethtool_rings_set_req_set_tcp_data_split(req, + config->tcp_data_split); + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL restoring expl HDS cfg: %s\n", + ys->err.msg); + } + + ethtool_rings_get_rsp_free(get_rsp); + ethtool_rings_set_req_free(req); + + ynl_sock_destroy(ys); +} + +static int +configure_headersplit(const struct ethtool_rings_get_rsp *old, bool on) { struct ethtool_rings_get_req *get_req; struct ethtool_rings_get_rsp *get_rsp; @@ -291,8 +512,15 @@ static int configure_headersplit(bool on) req = ethtool_rings_set_req_alloc(); ethtool_rings_set_req_set_header_dev_index(req, ifindex); - /* 0 - off, 1 - auto, 2 - on */ - ethtool_rings_set_req_set_tcp_data_split(req, on ? 2 : 0); + if (on) { + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_ENABLED); + if (old->_present.hds_thresh) + ethtool_rings_set_req_set_hds_thresh(req, 0); + } else { + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN); + } ret = ethtool_rings_set(ys, req); if (ret < 0) fprintf(stderr, "YNL failed: %s\n", ys->err.msg); @@ -316,12 +544,103 @@ static int configure_headersplit(bool on) static int configure_rss(void) { - return run_command("sudo ethtool -X %s equal %d >&2", ifname, start_queue); + return run_command("ethtool -X %s equal %d >&2", ifname, start_queue); +} + +static void reset_rss(void) +{ + run_command("ethtool -X %s default >&2", ifname, start_queue); } -static int configure_channels(unsigned int rx, unsigned int tx) +static int check_changing_channels(unsigned int rx, unsigned int tx) { - return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx); + struct ethtool_channels_get_req *gchan; + struct ethtool_channels_set_req *schan; + struct ethtool_channels_get_rsp *chan; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + fprintf(stderr, "setting channel count rx:%u tx:%u\n", rx, tx); + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + gchan = ethtool_channels_get_req_alloc(); + if (!gchan) { + ret = -1; + goto exit_close_sock; + } + + ethtool_channels_get_req_set_header_dev_index(gchan, ifindex); + chan = ethtool_channels_get(ys, gchan); + ethtool_channels_get_req_free(gchan); + if (!chan) { + fprintf(stderr, "YNL get channels: %s\n", ys->err.msg); + ret = -1; + goto exit_close_sock; + } + + schan = ethtool_channels_set_req_alloc(); + if (!schan) { + ret = -1; + goto exit_free_chan; + } + + ethtool_channels_set_req_set_header_dev_index(schan, ifindex); + + if (chan->_present.combined_count) { + if (chan->_present.rx_count || chan->_present.tx_count) { + ethtool_channels_set_req_set_rx_count(schan, 0); + ethtool_channels_set_req_set_tx_count(schan, 0); + } + + if (rx == tx) { + ethtool_channels_set_req_set_combined_count(schan, rx); + } else if (rx > tx) { + ethtool_channels_set_req_set_combined_count(schan, tx); + ethtool_channels_set_req_set_rx_count(schan, rx - tx); + } else { + ethtool_channels_set_req_set_combined_count(schan, rx); + ethtool_channels_set_req_set_tx_count(schan, tx - rx); + } + + } else if (chan->_present.rx_count) { + ethtool_channels_set_req_set_rx_count(schan, rx); + ethtool_channels_set_req_set_tx_count(schan, tx); + } else { + fprintf(stderr, "Error: device has neither combined nor rx channels\n"); + ret = -1; + goto exit_free_schan; + } + + ret = ethtool_channels_set(ys, schan); + if (ret) { + fprintf(stderr, "YNL set channels: %s\n", ys->err.msg); + } else { + /* We were expecting a failure, go back to previous settings */ + ethtool_channels_set_req_set_combined_count(schan, + chan->combined_count); + ethtool_channels_set_req_set_rx_count(schan, chan->rx_count); + ethtool_channels_set_req_set_tx_count(schan, chan->tx_count); + + ret = ethtool_channels_set(ys, schan); + if (ret) + fprintf(stderr, "YNL un-setting channels: %s\n", + ys->err.msg); + } + +exit_free_schan: + ethtool_channels_set_req_free(schan); +exit_free_chan: + ethtool_channels_get_rsp_free(chan); +exit_close_sock: + ynl_sock_destroy(ys); + + return ret; } static int configure_flow_steering(struct sockaddr_in6 *server_sin) @@ -329,6 +648,7 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin) const char *type = "tcp6"; const char *server_addr; char buf[40]; + int flow_id; inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf)); server_addr = buf; @@ -338,15 +658,25 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin) server_addr = strrchr(server_addr, ':') + 1; } - return run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2", - ifname, - type, - client_ip ? "src-ip" : "", - client_ip ?: "", - server_addr, - client_ip ? "src-port" : "", - client_ip ? port : "", - port, start_queue); + /* Try configure 5-tuple */ + flow_id = ethtool_add_flow("flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d", + type, + client_ip ? "src-ip" : "", + client_ip ?: "", + server_addr, + client_ip ? "src-port" : "", + client_ip ? port : "", + port, start_queue); + if (flow_id < 0) { + /* If that fails, try configure 3-tuple */ + flow_id = ethtool_add_flow("flow-type %s dst-ip %s dst-port %s queue %d", + type, server_addr, port, start_queue); + if (flow_id < 0) + /* If that fails, return error */ + return -1; + } + + return 0; } static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, @@ -359,6 +689,7 @@ static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, *ys = ynl_sock_create(&ynl_netdev_family, &yerr); if (!*ys) { + netdev_queue_id_free(queues); fprintf(stderr, "YNL: %s\n", yerr.msg); return -1; } @@ -394,18 +725,67 @@ err_close: return -1; } -static void enable_reuseaddr(int fd) +static int bind_tx_queue(unsigned int ifindex, unsigned int dmabuf_fd, + struct ynl_sock **ys) +{ + struct netdev_bind_tx_req *req = NULL; + struct netdev_bind_tx_rsp *rsp = NULL; + struct ynl_error yerr; + + *ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!*ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = netdev_bind_tx_req_alloc(); + netdev_bind_tx_req_set_ifindex(req, ifindex); + netdev_bind_tx_req_set_fd(req, dmabuf_fd); + + rsp = netdev_bind_tx(*ys, req); + if (!rsp) { + perror("netdev_bind_tx"); + goto err_close; + } + + if (!rsp->_present.id) { + perror("id not present"); + goto err_close; + } + + fprintf(stderr, "got tx dmabuf id=%d\n", rsp->id); + tx_dmabuf_id = rsp->id; + + netdev_bind_tx_req_free(req); + netdev_bind_tx_rsp_free(rsp); + + return 0; + +err_close: + fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg); + netdev_bind_tx_req_free(req); + ynl_sock_destroy(*ys); + return -1; +} + +static int enable_reuseaddr(int fd) { int opt = 1; int ret; ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); - if (ret) - error(1, errno, "%s: [FAIL, SO_REUSEPORT]\n", TEST_PREFIX); + if (ret) { + pr_err("SO_REUSEPORT failed"); + return -1; + } ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); - if (ret) - error(1, errno, "%s: [FAIL, SO_REUSEADDR]\n", TEST_PREFIX); + if (ret) { + pr_err("SO_REUSEADDR failed"); + return -1; + } + + return 0; } static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) @@ -432,10 +812,24 @@ static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) return 0; } -int do_server(struct memory_buffer *mem) +static struct netdev_queue_id *create_queues(void) { - char ctrl_data[sizeof(int) * 20000]; struct netdev_queue_id *queues; + size_t i = 0; + + queues = netdev_queue_id_alloc(num_queues); + for (i = 0; i < num_queues; i++) { + netdev_queue_id_set_type(&queues[i], NETDEV_QUEUE_TYPE_RX); + netdev_queue_id_set_id(&queues[i], start_queue + i); + } + + return queues; +} + +static int do_server(struct memory_buffer *mem) +{ + struct ethtool_rings_get_rsp *ring_config; + char ctrl_data[sizeof(int) * 20000]; size_t non_page_aligned_frags = 0; struct sockaddr_in6 client_addr; struct sockaddr_in6 server_sin; @@ -446,64 +840,72 @@ int do_server(struct memory_buffer *mem) char *tmp_mem = NULL; struct ynl_sock *ys; char iobuf[819200]; + int ret, err = -1; char buffer[256]; int socket_fd; int client_fd; - size_t i = 0; - int ret; ret = parse_address(server_ip, atoi(port), &server_sin); - if (ret < 0) - error(1, 0, "parse server address"); + if (ret < 0) { + pr_err("parse server address"); + return -1; + } - if (reset_flow_steering()) - error(1, 0, "Failed to reset flow steering\n"); + ring_config = get_ring_config(); + if (!ring_config) { + pr_err("Failed to get current ring configuration"); + return -1; + } - if (configure_headersplit(1)) - error(1, 0, "Failed to enable TCP header split\n"); + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to enable TCP header split"); + goto err_free_ring_config; + } /* Configure RSS to divert all traffic from our devmem queues */ - if (configure_rss()) - error(1, 0, "Failed to configure rss\n"); + if (configure_rss()) { + pr_err("Failed to configure rss"); + goto err_reset_headersplit; + } /* Flow steer our devmem flows to start_queue */ - if (configure_flow_steering(&server_sin)) - error(1, 0, "Failed to configure flow steering\n"); - - sleep(1); - - queues = malloc(sizeof(*queues) * num_queues); - - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; + if (configure_flow_steering(&server_sin)) { + pr_err("Failed to configure flow steering"); + goto err_reset_rss; } - if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) - error(1, 0, "Failed to bind\n"); + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) { + pr_err("Failed to bind"); + goto err_reset_flow_steering; + } tmp_mem = malloc(mem->size); if (!tmp_mem) - error(1, ENOMEM, "malloc failed"); + goto err_unbind; socket_fd = socket(AF_INET6, SOCK_STREAM, 0); - if (socket_fd < 0) - error(1, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); + if (socket_fd < 0) { + pr_err("Failed to create socket"); + goto err_free_tmp; + } - enable_reuseaddr(socket_fd); + if (enable_reuseaddr(socket_fd)) + goto err_close_socket; fprintf(stderr, "binding to address %s:%d\n", server_ip, ntohs(server_sin.sin6_port)); ret = bind(socket_fd, &server_sin, sizeof(server_sin)); - if (ret) - error(1, errno, "%s: [FAIL, bind]\n", TEST_PREFIX); + if (ret) { + pr_err("Failed to bind"); + goto err_close_socket; + } ret = listen(socket_fd, 1); - if (ret) - error(1, errno, "%s: [FAIL, listen]\n", TEST_PREFIX); + if (ret) { + pr_err("Failed to listen"); + goto err_close_socket; + } client_addr_len = sizeof(client_addr); @@ -512,6 +914,10 @@ int do_server(struct memory_buffer *mem) fprintf(stderr, "Waiting or connection on %s:%d\n", buffer, ntohs(server_sin.sin6_port)); client_fd = accept(socket_fd, &client_addr, &client_addr_len); + if (client_fd < 0) { + pr_err("Failed to accept"); + goto err_close_socket; + } inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer, sizeof(buffer)); @@ -539,14 +945,18 @@ int do_server(struct memory_buffer *mem) continue; if (ret < 0) { perror("recvmsg"); + if (errno == EFAULT) { + pr_err("received EFAULT, won't recover"); + goto err_close_client; + } continue; } if (ret == 0) { - fprintf(stderr, "client exited\n"); + errno = 0; + pr_err("client exited"); goto cleanup; } - i++; for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { if (cm->cmsg_level != SOL_SOCKET || (cm->cmsg_type != SCM_DEVMEM_DMABUF && @@ -566,6 +976,11 @@ int do_server(struct memory_buffer *mem) "SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n", dmabuf_cmsg->frag_size); + if (fail_on_linear) { + pr_err("received SCM_DEVMEM_LINEAR but --fail-on-linear (-L) set"); + goto err_close_client; + } + continue; } @@ -581,9 +996,10 @@ int do_server(struct memory_buffer *mem) dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token, total_received, dmabuf_cmsg->dmabuf_id); - if (dmabuf_cmsg->dmabuf_id != dmabuf_id) - error(1, 0, - "received on wrong dmabuf_id: flow steering error\n"); + if (dmabuf_cmsg->dmabuf_id != dmabuf_id) { + pr_err("received on wrong dmabuf_id: flow steering error"); + goto err_close_client; + } if (dmabuf_cmsg->frag_size % getpagesize()) non_page_aligned_frags++; @@ -594,22 +1010,27 @@ int do_server(struct memory_buffer *mem) dmabuf_cmsg->frag_offset, dmabuf_cmsg->frag_size); - if (do_validation) - validate_buffer(tmp_mem, - dmabuf_cmsg->frag_size); - else + if (do_validation) { + if (validate_buffer(tmp_mem, + dmabuf_cmsg->frag_size)) + goto err_close_client; + } else { print_nonzero_bytes(tmp_mem, dmabuf_cmsg->frag_size); + } ret = setsockopt(client_fd, SOL_SOCKET, SO_DEVMEM_DONTNEED, &token, sizeof(token)); - if (ret != 1) - error(1, 0, - "SO_DEVMEM_DONTNEED not enough tokens"); + if (ret != 1) { + pr_err("SO_DEVMEM_DONTNEED not enough tokens"); + goto err_close_client; + } + } + if (!is_devmem) { + pr_err("flow steering error"); + goto err_close_client; } - if (!is_devmem) - error(1, 0, "flow steering error\n"); fprintf(stderr, "total_received=%lu\n", total_received); } @@ -620,80 +1041,374 @@ int do_server(struct memory_buffer *mem) page_aligned_frags, non_page_aligned_frags); cleanup: + err = 0; - free(tmp_mem); +err_close_client: close(client_fd); +err_close_socket: close(socket_fd); +err_free_tmp: + free(tmp_mem); +err_unbind: ynl_sock_destroy(ys); - - return 0; +err_reset_flow_steering: + reset_flow_steering(); +err_reset_rss: + reset_rss(); +err_reset_headersplit: + restore_ring_config(ring_config); +err_free_ring_config: + ethtool_rings_get_rsp_free(ring_config); + return err; } -void run_devmem_tests(void) +int run_devmem_tests(void) { + struct ethtool_rings_get_rsp *ring_config; struct netdev_queue_id *queues; struct memory_buffer *mem; struct ynl_sock *ys; - size_t i = 0; + int err = -1; mem = provider->alloc(getpagesize() * NUM_PAGES); + if (!mem) { + pr_err("Failed to allocate memory buffer"); + return -1; + } + + ring_config = get_ring_config(); + if (!ring_config) { + pr_err("Failed to get current ring configuration"); + goto err_free_mem; + } /* Configure RSS to divert all traffic from our devmem queues */ - if (configure_rss()) - error(1, 0, "rss error\n"); + if (configure_rss()) { + pr_err("rss error"); + goto err_free_ring_config; + } - queues = calloc(num_queues, sizeof(*queues)); + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to configure header split"); + goto err_reset_rss; + } - if (configure_headersplit(1)) - error(1, 0, "Failed to configure header split\n"); + queues = netdev_queue_id_alloc(num_queues); + if (!queues) { + pr_err("Failed to allocate empty queues array"); + goto err_reset_headersplit; + } - if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) - error(1, 0, "Binding empty queues array should have failed\n"); + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Binding empty queues array should have failed"); + goto err_unbind; + } - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; + if (configure_headersplit(ring_config, 0)) { + pr_err("Failed to configure header split"); + goto err_reset_headersplit; } - if (configure_headersplit(0)) - error(1, 0, "Failed to configure header split\n"); + queues = create_queues(); + if (!queues) { + pr_err("Failed to create queues"); + goto err_reset_headersplit; + } - if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) - error(1, 0, "Configure dmabuf with header split off should have failed\n"); + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Configure dmabuf with header split off should have failed"); + goto err_unbind; + } - if (configure_headersplit(1)) - error(1, 0, "Failed to configure header split\n"); + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to configure header split"); + goto err_reset_headersplit; + } - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; + queues = create_queues(); + if (!queues) { + pr_err("Failed to create queues"); + goto err_reset_headersplit; } - if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) - error(1, 0, "Failed to bind\n"); + if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Failed to bind"); + goto err_reset_headersplit; + } /* Deactivating a bound queue should not be legal */ - if (!configure_channels(num_queues, num_queues - 1)) - error(1, 0, "Deactivating a bound queue should be illegal.\n"); + if (!check_changing_channels(num_queues, num_queues)) { + pr_err("Deactivating a bound queue should be illegal"); + goto err_unbind; + } - /* Closing the netlink socket does an implicit unbind */ - ynl_sock_destroy(ys); + err = 0; + goto err_unbind; +err_unbind: + ynl_sock_destroy(ys); +err_reset_headersplit: + restore_ring_config(ring_config); +err_reset_rss: + reset_rss(); +err_free_ring_config: + ethtool_rings_get_rsp_free(ring_config); +err_free_mem: provider->free(mem); + return err; +} + +static uint64_t gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000ULL); +} + +static int do_poll(int fd) +{ + struct pollfd pfd; + int ret; + + pfd.revents = 0; + pfd.fd = fd; + + ret = poll(&pfd, 1, waittime_ms); + if (ret == -1) { + pr_err("poll"); + return -1; + } + + return ret && (pfd.revents & POLLERR); +} + +static int wait_compl(int fd) +{ + int64_t tstop = gettimeofday_ms() + waittime_ms; + char control[CMSG_SPACE(100)] = {}; + struct sock_extended_err *serr; + struct msghdr msg = {}; + struct cmsghdr *cm; + __u32 hi, lo; + int ret; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + while (gettimeofday_ms() < tstop) { + ret = do_poll(fd); + if (ret < 0) + return ret; + if (!ret) + continue; + + ret = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (ret < 0) { + if (errno == EAGAIN) + continue; + pr_err("recvmsg(MSG_ERRQUEUE)"); + return -1; + } + if (msg.msg_flags & MSG_CTRUNC) { + pr_err("MSG_CTRUNC"); + return -1; + } + + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { + if (cm->cmsg_level != SOL_IP && + cm->cmsg_level != SOL_IPV6) + continue; + if (cm->cmsg_level == SOL_IP && + cm->cmsg_type != IP_RECVERR) + continue; + if (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type != IPV6_RECVERR) + continue; + + serr = (void *)CMSG_DATA(cm); + if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { + pr_err("wrong origin %u", serr->ee_origin); + return -1; + } + if (serr->ee_errno != 0) { + pr_err("wrong errno %d", serr->ee_errno); + return -1; + } + + hi = serr->ee_data; + lo = serr->ee_info; + + fprintf(stderr, "tx complete [%d,%d]\n", lo, hi); + return 0; + } + } + + pr_err("did not receive tx completion"); + return -1; +} + +static int do_client(struct memory_buffer *mem) +{ + char ctrl_data[CMSG_SPACE(sizeof(__u32))]; + struct sockaddr_in6 server_sin; + struct sockaddr_in6 client_sin; + struct ynl_sock *ys = NULL; + struct iovec iov[MAX_IOV]; + struct msghdr msg = {}; + ssize_t line_size = 0; + struct cmsghdr *cmsg; + char *line = NULL; + int ret, err = -1; + size_t len = 0; + int socket_fd; + __u32 ddmabuf; + int opt = 1; + + ret = parse_address(server_ip, atoi(port), &server_sin); + if (ret < 0) { + pr_err("parse server address"); + return -1; + } + + if (client_ip) { + ret = parse_address(client_ip, atoi(port), &client_sin); + if (ret < 0) { + pr_err("parse client address"); + return ret; + } + } + + socket_fd = socket(AF_INET6, SOCK_STREAM, 0); + if (socket_fd < 0) { + pr_err("create socket"); + return -1; + } + + if (enable_reuseaddr(socket_fd)) + goto err_close_socket; + + ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname, + strlen(ifname) + 1); + if (ret) { + pr_err("bindtodevice"); + goto err_close_socket; + } + + if (bind_tx_queue(ifindex, mem->fd, &ys)) { + pr_err("Failed to bind"); + goto err_close_socket; + } + + if (client_ip) { + ret = bind(socket_fd, &client_sin, sizeof(client_sin)); + if (ret) { + pr_err("bind"); + goto err_unbind; + } + } + + ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt)); + if (ret) { + pr_err("set sock opt"); + goto err_unbind; + } + + fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip, + ntohs(server_sin.sin6_port), ifname); + + ret = connect(socket_fd, &server_sin, sizeof(server_sin)); + if (ret) { + pr_err("connect"); + goto err_unbind; + } + + while (1) { + free(line); + line = NULL; + line_size = getline(&line, &len, stdin); + + if (line_size < 0) + break; + + if (max_chunk) { + msg.msg_iovlen = + (line_size + max_chunk - 1) / max_chunk; + if (msg.msg_iovlen > MAX_IOV) { + pr_err("can't partition %zd bytes into maximum of %d chunks", + line_size, MAX_IOV); + goto err_free_line; + } + + for (int i = 0; i < msg.msg_iovlen; i++) { + iov[i].iov_base = (void *)(i * max_chunk); + iov[i].iov_len = max_chunk; + } + + iov[msg.msg_iovlen - 1].iov_len = + line_size - (msg.msg_iovlen - 1) * max_chunk; + } else { + iov[0].iov_base = 0; + iov[0].iov_len = line_size; + msg.msg_iovlen = 1; + } + + msg.msg_iov = iov; + provider->memcpy_to_device(mem, 0, line, line_size); + + msg.msg_control = ctrl_data; + msg.msg_controllen = sizeof(ctrl_data); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_DEVMEM_DMABUF; + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); + + ddmabuf = tx_dmabuf_id; + + *((__u32 *)CMSG_DATA(cmsg)) = ddmabuf; + + ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY); + if (ret < 0) { + pr_err("Failed sendmsg"); + goto err_free_line; + } + + fprintf(stderr, "sendmsg_ret=%d\n", ret); + + if (ret != line_size) { + pr_err("Did not send all bytes %d vs %zd", ret, line_size); + goto err_free_line; + } + + if (wait_compl(socket_fd)) + goto err_free_line; + } + + fprintf(stderr, "%s: tx ok\n", TEST_PREFIX); + + err = 0; + +err_free_line: + free(line); +err_unbind: + ynl_sock_destroy(ys); +err_close_socket: + close(socket_fd); + return err; } int main(int argc, char *argv[]) { struct memory_buffer *mem; int is_server = 0, opt; - int ret; + int ret, err = 1; - while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) { + while ((opt = getopt(argc, argv, "Lls:c:p:v:q:t:f:z:")) != -1) { switch (opt) { + case 'L': + fail_on_linear = true; + break; case 'l': is_server = 1; break; @@ -718,47 +1433,62 @@ int main(int argc, char *argv[]) case 'f': ifname = optarg; break; + case 'z': + max_chunk = atoi(optarg); + break; case '?': fprintf(stderr, "unknown option: %c\n", optopt); break; } } - if (!ifname) - error(1, 0, "Missing -f argument\n"); + if (!ifname) { + pr_err("Missing -f argument"); + return 1; + } ifindex = if_nametoindex(ifname); + fprintf(stderr, "using ifindex=%u\n", ifindex); + if (!server_ip && !client_ip) { if (start_queue < 0 && num_queues < 0) { num_queues = rxq_num(ifindex); - if (num_queues < 0) - error(1, 0, "couldn't detect number of queues\n"); - if (num_queues < 2) - error(1, 0, - "number of device queues is too low\n"); + if (num_queues < 0) { + pr_err("couldn't detect number of queues"); + return 1; + } + if (num_queues < 2) { + pr_err("number of device queues is too low"); + return 1; + } /* make sure can bind to multiple queues */ start_queue = num_queues / 2; num_queues /= 2; } - if (start_queue < 0 || num_queues < 0) - error(1, 0, "Both -t and -q are required\n"); + if (start_queue < 0 || num_queues < 0) { + pr_err("Both -t and -q are required"); + return 1; + } - run_devmem_tests(); - return 0; + return run_devmem_tests(); } if (start_queue < 0 && num_queues < 0) { num_queues = rxq_num(ifindex); - if (num_queues < 2) - error(1, 0, "number of device queues is too low\n"); + if (num_queues < 2) { + pr_err("number of device queues is too low"); + return 1; + } num_queues = 1; start_queue = rxq_num(ifindex) - num_queues; - if (start_queue < 0) - error(1, 0, "couldn't detect number of queues\n"); + if (start_queue < 0) { + pr_err("couldn't detect number of queues"); + return 1; + } fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues); } @@ -766,21 +1496,39 @@ int main(int argc, char *argv[]) for (; optind < argc; optind++) fprintf(stderr, "extra arguments: %s\n", argv[optind]); - if (start_queue < 0) - error(1, 0, "Missing -t argument\n"); + if (start_queue < 0) { + pr_err("Missing -t argument"); + return 1; + } - if (num_queues < 0) - error(1, 0, "Missing -q argument\n"); + if (num_queues < 0) { + pr_err("Missing -q argument"); + return 1; + } - if (!server_ip) - error(1, 0, "Missing -s argument\n"); + if (!server_ip) { + pr_err("Missing -s argument"); + return 1; + } - if (!port) - error(1, 0, "Missing -p argument\n"); + if (!port) { + pr_err("Missing -p argument"); + return 1; + } mem = provider->alloc(getpagesize() * NUM_PAGES); - ret = is_server ? do_server(mem) : 1; - provider->free(mem); + if (!mem) { + pr_err("Failed to allocate memory buffer"); + return 1; + } - return ret; + ret = is_server ? do_server(mem) : do_client(mem); + if (ret) + goto err_free_mem; + + err = 0; + +err_free_mem: + provider->free(mem); + return err; } diff --git a/tools/testing/selftests/drivers/net/hw/nic_link_layer.py b/tools/testing/selftests/drivers/net/hw/nic_link_layer.py deleted file mode 100644 index efd921180532..000000000000 --- a/tools/testing/selftests/drivers/net/hw/nic_link_layer.py +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 - -#Introduction: -#This file has basic link layer tests for generic NIC drivers. -#The test comprises of auto-negotiation, speed and duplex checks. -# -#Setup: -#Connect the DUT PC with NIC card to partner pc back via ethernet medium of your choice(RJ45, T1) -# -# DUT PC Partner PC -#┌───────────────────────┐ ┌──────────────────────────┐ -#│ │ │ │ -#│ │ │ │ -#│ ┌───────────┐ │ │ -#│ │DUT NIC │ Eth │ │ -#│ │Interface ─┼─────────────────────────┼─ any eth Interface │ -#│ └───────────┘ │ │ -#│ │ │ │ -#│ │ │ │ -#└───────────────────────┘ └──────────────────────────┘ -# -#Configurations: -#Required minimum ethtool version is 6.10 (supports json) -#Default values: -#time_delay = 8 #time taken to wait for transitions to happen, in seconds. - -import time -import argparse -from lib.py import ksft_run, ksft_exit, ksft_pr, ksft_eq -from lib.py import KsftFailEx, KsftSkipEx -from lib.py import NetDrvEpEnv -from lib.py import LinkConfig - -def _pre_test_checks(cfg: object, link_config: LinkConfig) -> None: - if link_config.partner_netif is None: - KsftSkipEx("Partner interface is not available") - if not link_config.check_autoneg_supported() or not link_config.check_autoneg_supported(remote=True): - KsftSkipEx(f"Auto-negotiation not supported for interface {cfg.ifname} or {link_config.partner_netif}") - if not link_config.verify_link_up(): - raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") - -def verify_autonegotiation(cfg: object, expected_state: str, link_config: LinkConfig) -> None: - if not link_config.verify_link_up(): - raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") - """Verifying the autonegotiation state in partner""" - partner_autoneg_output = link_config.get_ethtool_field("auto-negotiation", remote=True) - if partner_autoneg_output is None: - KsftSkipEx(f"Auto-negotiation state not available for interface {link_config.partner_netif}") - partner_autoneg_state = "on" if partner_autoneg_output is True else "off" - - ksft_eq(partner_autoneg_state, expected_state) - - """Verifying the autonegotiation state of local""" - autoneg_output = link_config.get_ethtool_field("auto-negotiation") - if autoneg_output is None: - KsftSkipEx(f"Auto-negotiation state not available for interface {cfg.ifname}") - actual_state = "on" if autoneg_output is True else "off" - - ksft_eq(actual_state, expected_state) - - """Verifying the link establishment""" - link_available = link_config.get_ethtool_field("link-detected") - if link_available is None: - KsftSkipEx(f"Link status not available for interface {cfg.ifname}") - if link_available != True: - raise KsftSkipEx("Link not established at interface {cfg.ifname} after changing auto-negotiation") - -def test_autonegotiation(cfg: object, link_config: LinkConfig, time_delay: int) -> None: - _pre_test_checks(cfg, link_config) - for state in ["off", "on"]: - if not link_config.set_autonegotiation_state(state, remote=True): - raise KsftSkipEx(f"Unable to set auto-negotiation state for interface {link_config.partner_netif}") - if not link_config.set_autonegotiation_state(state): - raise KsftSkipEx(f"Unable to set auto-negotiation state for interface {cfg.ifname}") - time.sleep(time_delay) - verify_autonegotiation(cfg, state, link_config) - -def test_network_speed(cfg: object, link_config: LinkConfig, time_delay: int) -> None: - _pre_test_checks(cfg, link_config) - common_link_modes = link_config.common_link_modes - if not common_link_modes: - KsftSkipEx("No common link modes exist") - speeds, duplex_modes = link_config.get_speed_duplex_values(common_link_modes) - - if speeds and duplex_modes and len(speeds) == len(duplex_modes): - for idx in range(len(speeds)): - speed = speeds[idx] - duplex = duplex_modes[idx] - if not link_config.set_speed_and_duplex(speed, duplex): - raise KsftFailEx(f"Unable to set speed and duplex parameters for {cfg.ifname}") - time.sleep(time_delay) - if not link_config.verify_speed_and_duplex(speed, duplex): - raise KsftSkipEx(f"Error occurred while verifying speed and duplex states for interface {cfg.ifname}") - else: - if not speeds or not duplex_modes: - KsftSkipEx(f"No supported speeds or duplex modes found for interface {cfg.ifname}") - else: - KsftSkipEx("Mismatch in the number of speeds and duplex modes") - -def main() -> None: - parser = argparse.ArgumentParser(description="Run basic link layer tests for NIC driver") - parser.add_argument('--time-delay', type=int, default=8, help='Time taken to wait for transitions to happen(in seconds). Default is 8 seconds.') - args = parser.parse_args() - time_delay = args.time_delay - with NetDrvEpEnv(__file__, nsim_test=False) as cfg: - link_config = LinkConfig(cfg) - ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, link_config, time_delay,)) - link_config.reset_interface() - ksft_exit() - -if __name__ == "__main__": - main() diff --git a/tools/testing/selftests/drivers/net/hw/nic_performance.py b/tools/testing/selftests/drivers/net/hw/nic_performance.py deleted file mode 100644 index 201403b76ea3..000000000000 --- a/tools/testing/selftests/drivers/net/hw/nic_performance.py +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 - -#Introduction: -#This file has basic performance test for generic NIC drivers. -#The test comprises of throughput check for TCP and UDP streams. -# -#Setup: -#Connect the DUT PC with NIC card to partner pc back via ethernet medium of your choice(RJ45, T1) -# -# DUT PC Partner PC -#┌───────────────────────┐ ┌──────────────────────────┐ -#│ │ │ │ -#│ │ │ │ -#│ ┌───────────┐ │ │ -#│ │DUT NIC │ Eth │ │ -#│ │Interface ─┼─────────────────────────┼─ any eth Interface │ -#│ └───────────┘ │ │ -#│ │ │ │ -#│ │ │ │ -#└───────────────────────┘ └──────────────────────────┘ -# -#Configurations: -#To prevent interruptions, Add ethtool, ip to the sudoers list in remote PC and get the ssh key from remote. -#Required minimum ethtool version is 6.10 -#Change the below configuration based on your hw needs. -# """Default values""" -#time_delay = 8 #time taken to wait for transitions to happen, in seconds. -#test_duration = 10 #performance test duration for the throughput check, in seconds. -#send_throughput_threshold = 80 #percentage of send throughput required to pass the check -#receive_throughput_threshold = 50 #percentage of receive throughput required to pass the check - -import time -import json -import argparse -from lib.py import ksft_run, ksft_exit, ksft_pr, ksft_true -from lib.py import KsftFailEx, KsftSkipEx, GenerateTraffic -from lib.py import NetDrvEpEnv, bkg, wait_port_listen -from lib.py import cmd -from lib.py import LinkConfig - -class TestConfig: - def __init__(self, time_delay: int, test_duration: int, send_throughput_threshold: int, receive_throughput_threshold: int) -> None: - self.time_delay = time_delay - self.test_duration = test_duration - self.send_throughput_threshold = send_throughput_threshold - self.receive_throughput_threshold = receive_throughput_threshold - -def _pre_test_checks(cfg: object, link_config: LinkConfig) -> None: - if not link_config.verify_link_up(): - KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") - common_link_modes = link_config.common_link_modes - if common_link_modes is None: - KsftSkipEx("No common link modes found") - if link_config.partner_netif == None: - KsftSkipEx("Partner interface is not available") - if link_config.check_autoneg_supported(): - KsftSkipEx("Auto-negotiation not supported by local") - if link_config.check_autoneg_supported(remote=True): - KsftSkipEx("Auto-negotiation not supported by remote") - cfg.require_cmd("iperf3", remote=True) - -def check_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, protocol: str, traffic: GenerateTraffic) -> None: - common_link_modes = link_config.common_link_modes - speeds, duplex_modes = link_config.get_speed_duplex_values(common_link_modes) - """Test duration in seconds""" - duration = test_config.test_duration - - ksft_pr(f"{protocol} test") - test_type = "-u" if protocol == "UDP" else "" - - send_throughput = [] - receive_throughput = [] - for idx in range(0, len(speeds)): - if link_config.set_speed_and_duplex(speeds[idx], duplex_modes[idx]) == False: - raise KsftFailEx(f"Not able to set speed and duplex parameters for {cfg.ifname}") - time.sleep(test_config.time_delay) - if not link_config.verify_link_up(): - raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") - - send_command=f"{test_type} -b 0 -t {duration} --json" - receive_command=f"{test_type} -b 0 -t {duration} --reverse --json" - - send_result = traffic.run_remote_test(cfg, command=send_command) - if send_result.ret != 0: - raise KsftSkipEx("Error occurred during data transmit: {send_result.stdout}") - - send_output = send_result.stdout - send_data = json.loads(send_output) - - """Convert throughput to Mbps""" - send_throughput.append(round(send_data['end']['sum_sent']['bits_per_second'] / 1e6, 2)) - ksft_pr(f"{protocol}: Send throughput: {send_throughput[idx]} Mbps") - - receive_result = traffic.run_remote_test(cfg, command=receive_command) - if receive_result.ret != 0: - raise KsftSkipEx("Error occurred during data receive: {receive_result.stdout}") - - receive_output = receive_result.stdout - receive_data = json.loads(receive_output) - - """Convert throughput to Mbps""" - receive_throughput.append(round(receive_data['end']['sum_received']['bits_per_second'] / 1e6, 2)) - ksft_pr(f"{protocol}: Receive throughput: {receive_throughput[idx]} Mbps") - - """Check whether throughput is not below the threshold (default values set at start)""" - for idx in range(0, len(speeds)): - send_threshold = float(speeds[idx]) * float(test_config.send_throughput_threshold / 100) - receive_threshold = float(speeds[idx]) * float(test_config.receive_throughput_threshold / 100) - ksft_true(send_throughput[idx] >= send_threshold, f"{protocol}: Send throughput is below threshold for {speeds[idx]} Mbps in {duplex_modes[idx]} duplex") - ksft_true(receive_throughput[idx] >= receive_threshold, f"{protocol}: Receive throughput is below threshold for {speeds[idx]} Mbps in {duplex_modes[idx]} duplex") - -def test_tcp_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, traffic: GenerateTraffic) -> None: - _pre_test_checks(cfg, link_config) - check_throughput(cfg, link_config, test_config, 'TCP', traffic) - -def test_udp_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, traffic: GenerateTraffic) -> None: - _pre_test_checks(cfg, link_config) - check_throughput(cfg, link_config, test_config, 'UDP', traffic) - -def main() -> None: - parser = argparse.ArgumentParser(description="Run basic performance test for NIC driver") - parser.add_argument('--time-delay', type=int, default=8, help='Time taken to wait for transitions to happen(in seconds). Default is 8 seconds.') - parser.add_argument('--test-duration', type=int, default=10, help='Performance test duration for the throughput check, in seconds. Default is 10 seconds.') - parser.add_argument('--stt', type=int, default=80, help='Send throughput Threshold: Percentage of send throughput upon actual throughput required to pass the throughput check (in percentage). Default is 80.') - parser.add_argument('--rtt', type=int, default=50, help='Receive throughput Threshold: Percentage of receive throughput upon actual throughput required to pass the throughput check (in percentage). Default is 50.') - args=parser.parse_args() - test_config = TestConfig(args.time_delay, args.test_duration, args.stt, args.rtt) - with NetDrvEpEnv(__file__, nsim_test=False) as cfg: - traffic = GenerateTraffic(cfg) - link_config = LinkConfig(cfg) - ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, link_config, test_config, traffic, )) - link_config.reset_interface() - ksft_exit() - -if __name__ == "__main__": - main() diff --git a/tools/testing/selftests/drivers/net/hw/nic_timestamp.py b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py new file mode 100755 index 000000000000..c632b41e7a23 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# pylint: disable=locally-disabled, invalid-name, attribute-defined-outside-init, too-few-public-methods + +""" +Tests related to configuration of HW timestamping +""" + +import errno +import ctypes +import fcntl +import socket +from lib.py import ksft_run, ksft_exit, ksft_ge, ksft_eq, KsftSkipEx +from lib.py import NetDrvEnv, EthtoolFamily, NlError + + +SIOCSHWTSTAMP = 0x89b0 +SIOCGHWTSTAMP = 0x89b1 +class hwtstamp_config(ctypes.Structure): + """ Python copy of struct hwtstamp_config """ + _fields_ = [ + ("flags", ctypes.c_int), + ("tx_type", ctypes.c_int), + ("rx_filter", ctypes.c_int), + ] + + +class ifreq(ctypes.Structure): + """ Python copy of struct ifreq """ + _fields_ = [ + ("ifr_name", ctypes.c_char * 16), + ("ifr_data", ctypes.POINTER(hwtstamp_config)), + ] + + +def __get_hwtimestamp_support(cfg): + """ Retrieve supported configuration information """ + + try: + tsinfo = cfg.ethnl.tsinfo_get({'header': {'dev-name': cfg.ifname}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported") from e + raise + + ctx = {} + tx = tsinfo.get('tx-types', {}) + rx = tsinfo.get('rx-filters', {}) + + bits = tx.get('bits', {}) + ctx['tx'] = bits.get('bit', []) + bits = rx.get('bits', {}) + ctx['rx'] = bits.get('bit', []) + return ctx + + +def __get_hwtimestamp_config_ioctl(cfg): + """ Retrieve current TS configuration information (via ioctl) """ + + config = hwtstamp_config() + + req = ifreq() + req.ifr_name = cfg.ifname.encode() + req.ifr_data = ctypes.pointer(config) + + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + fcntl.ioctl(sock.fileno(), SIOCGHWTSTAMP, req) + sock.close() + + except OSError as e: + if e.errno == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported via ioctl") from e + raise + return config + + +def __get_hwtimestamp_config(cfg): + """ Retrieve current TS configuration information (via netLink) """ + + try: + tscfg = cfg.ethnl.tsconfig_get({'header': {'dev-name': cfg.ifname}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported via netlink") from e + raise + return tscfg + + +def __set_hwtimestamp_config_ioctl(cfg, ts): + """ Setup new TS configuration information (via ioctl) """ + config = hwtstamp_config() + config.rx_filter = ts['rx-filters']['bits']['bit'][0]['index'] + config.tx_type = ts['tx-types']['bits']['bit'][0]['index'] + req = ifreq() + req.ifr_name = cfg.ifname.encode() + req.ifr_data = ctypes.pointer(config) + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + fcntl.ioctl(sock.fileno(), SIOCSHWTSTAMP, req) + sock.close() + + except OSError as e: + if e.errno == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported via ioctl") from e + raise + + +def __set_hwtimestamp_config(cfg, ts): + """ Setup new TS configuration information (via netlink) """ + + ts['header'] = {'dev-name': cfg.ifname} + try: + res = cfg.ethnl.tsconfig_set(ts) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported via netlink") from e + raise + return res + + +def __perform_hwtstamp_tx(cfg, is_ioctl): + """ + Test TX timestamp configuration via either netlink or ioctl. + The driver should apply provided config and report back proper state. + """ + + orig_tscfg = __get_hwtimestamp_config(cfg) + ts = __get_hwtimestamp_support(cfg) + tx = ts['tx'] + for t in tx: + res = None + tscfg = orig_tscfg + tscfg['tx-types']['bits']['bit'] = [t] + if is_ioctl: + __set_hwtimestamp_config_ioctl(cfg, tscfg) + else: + res = __set_hwtimestamp_config(cfg, tscfg) + if res is None: + res = __get_hwtimestamp_config(cfg) + resioctl = __get_hwtimestamp_config_ioctl(cfg) + ksft_eq(res['tx-types']['bits']['bit'], [t]) + ksft_eq(resioctl.tx_type, t['index']) + __set_hwtimestamp_config(cfg, orig_tscfg) + +def test_hwtstamp_tx_netlink(cfg): + """ + Test TX timestamp configuration setup via netlink. + The driver should apply provided config and report back proper state. + """ + __perform_hwtstamp_tx(cfg, False) + + +def test_hwtstamp_tx_ioctl(cfg): + """ + Test TX timestamp configuration setup via ioctl. + The driver should apply provided config and report back proper state. + """ + __perform_hwtstamp_tx(cfg, True) + + +def __perform_hwtstamp_rx(cfg, is_ioctl): + """ + Test RX timestamp configuration. + The filter configuration is taken from the list of supported filters. + The driver should apply the config without error and report back proper state. + Some extension of the timestamping scope is allowed for PTP filters. + """ + + orig_tscfg = __get_hwtimestamp_config(cfg) + ts = __get_hwtimestamp_support(cfg) + rx = ts['rx'] + for r in rx: + res = None + tscfg = orig_tscfg + tscfg['rx-filters']['bits']['bit'] = [r] + if is_ioctl: + __set_hwtimestamp_config_ioctl(cfg, tscfg) + else: + res = __set_hwtimestamp_config(cfg, tscfg) + if res is None: + res = __get_hwtimestamp_config(cfg) + resioctl = __get_hwtimestamp_config_ioctl(cfg) + ksft_eq(resioctl.rx_filter, res['rx-filters']['bits']['bit'][0]['index']) + if r['index'] == 0 or r['index'] == 1: + ksft_eq(res['rx-filters']['bits']['bit'][0]['index'], r['index']) + else: + # the driver can fallback to some value which has higher coverage for timestamping + ksft_ge(res['rx-filters']['bits']['bit'][0]['index'], r['index']) + __set_hwtimestamp_config(cfg, orig_tscfg) + + +def test_hwtstamp_rx_netlink(cfg): + """ + Test RX timestamp configuration via netlink. + The filter configuration is taken from the list of supported filters. + The driver should apply the config without error and report back proper state. + Some extension of the timestamping scope is allowed for PTP filters. + """ + __perform_hwtstamp_rx(cfg, False) + + +def test_hwtstamp_rx_ioctl(cfg): + """ + Test RX timestamp configuration via ioctl. + The filter configuration is taken from the list of supported filters. + The driver should apply the config without error and report back proper state. + Some extension of the timestamping scope is allowed for PTP filters. + """ + __perform_hwtstamp_rx(cfg, True) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + ksft_run([test_hwtstamp_tx_ioctl, test_hwtstamp_tx_netlink, + test_hwtstamp_rx_ioctl, test_hwtstamp_rx_netlink], + args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/nk_forward.bpf.c b/tools/testing/selftests/drivers/net/hw/nk_forward.bpf.c new file mode 100644 index 000000000000..86ebfc1445b6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/nk_forward.bpf.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <linux/pkt_cls.h> +#include <linux/if_ether.h> +#include <linux/ipv6.h> +#include <linux/in6.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +#define TC_ACT_OK 0 +#define ETH_P_IPV6 0x86DD + +#define ctx_ptr(field) ((void *)(long)(field)) + +#define v6_p64_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \ + a.s6_addr32[1] == b.s6_addr32[1]) + +volatile __u32 netkit_ifindex; +volatile __u8 ipv6_prefix[16]; + +SEC("tc/ingress") +int tc_redirect_peer(struct __sk_buff *skb) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + struct in6_addr *peer_addr; + struct ipv6hdr *ip6h; + struct ethhdr *eth; + + peer_addr = (struct in6_addr *)ipv6_prefix; + + if (skb->protocol != bpf_htons(ETH_P_IPV6)) + return TC_ACT_OK; + + eth = data; + if ((void *)(eth + 1) > data_end) + return TC_ACT_OK; + + ip6h = data + sizeof(struct ethhdr); + if ((void *)(ip6h + 1) > data_end) + return TC_ACT_OK; + + if (!v6_p64_equal(ip6h->daddr, (*peer_addr))) + return TC_ACT_OK; + + return bpf_redirect_peer(netkit_ifindex, 0); +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/drivers/net/hw/nk_netns.py b/tools/testing/selftests/drivers/net/hw/nk_netns.py new file mode 100755 index 000000000000..8b7ab75aa27f --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/nk_netns.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Test exercising NetDrvContEnv() itself, a NetDrvContEnv() selftest. +""" + +from lib.py import ksft_run, ksft_exit +from lib.py import NetDrvContEnv +from lib.py import cmd + + +def test_ping(cfg) -> None: + """ Run ping between the container and the remote system. """ + cfg.require_ipver("6") + + cmd(f"ping -c 1 -W5 {cfg.nk_guest_ipv6}", host=cfg.remote) + cmd(f"ping -c 1 -W5 {cfg.remote_addr_v['6']}", ns=cfg.netns) + + +def main() -> None: + """ Ksft boiler plate main """ + with NetDrvContEnv(__file__) as cfg: + ksft_run([test_ping], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/nk_qlease.py b/tools/testing/selftests/drivers/net/hw/nk_qlease.py new file mode 100755 index 000000000000..aa83dc321328 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/nk_qlease.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import re +import time +import threading +from os import path +from lib.py import ( + ksft_run, + ksft_exit, + ksft_eq, + ksft_in, + ksft_not_in, + ksft_raises, +) +from lib.py import ( + NetDrvContEnv, + NetNSEnter, + EthtoolFamily, + NetdevFamily, +) +from lib.py import ( + bkg, + cmd, + defer, + ethtool, + ip, + rand_port, + wait_port_listen, +) +from lib.py import KsftSkipEx, CmdExitFailure + + +def set_flow_rule(cfg): + output = ethtool( + f"-N {cfg.ifname} flow-type tcp6 dst-port {cfg.port} action {cfg.src_queue}" + ).stdout + values = re.search(r"ID (\d+)", output).group(1) + return int(values) + + +def test_iou_zcrx(cfg) -> None: + cfg.require_ipver("6") + ethnl = EthtoolFamily() + + rings = ethnl.rings_get({"header": {"dev-index": cfg.ifindex}}) + rx_rings = rings["rx"] + hds_thresh = rings.get("hds-thresh", 0) + + ethnl.rings_set( + { + "header": {"dev-index": cfg.ifindex}, + "tcp-data-split": "enabled", + "hds-thresh": 0, + "rx": 64, + } + ) + defer( + ethnl.rings_set, + { + "header": {"dev-index": cfg.ifindex}, + "tcp-data-split": "unknown", + "hds-thresh": hds_thresh, + "rx": rx_rings, + }, + ) + + ethtool(f"-X {cfg.ifname} equal {cfg.src_queue}") + defer(ethtool, f"-X {cfg.ifname} default") + + flow_rule_id = set_flow_rule(cfg) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") + + rx_cmd = f"ip netns exec {cfg.netns.name} {cfg.bin_local} -s -p {cfg.port} -i {cfg._nk_guest_ifname} -q {cfg.nk_queue}" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.nk_guest_ipv6} -p {cfg.port} -l 12840" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(cfg.port, proto="tcp", ns=cfg.netns) + cmd(tx_cmd, host=cfg.remote) + + +def test_attrs(cfg) -> None: + cfg.require_ipver("6") + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": cfg.ifindex, "id": cfg.src_queue, "type": "rx"} + ) + + ksft_eq(queue_info["id"], cfg.src_queue) + ksft_eq(queue_info["type"], "rx") + ksft_eq(queue_info["ifindex"], cfg.ifindex) + + ksft_in("lease", queue_info) + lease = queue_info["lease"] + ksft_eq(lease["ifindex"], cfg.nk_guest_ifindex) + ksft_eq(lease["queue"]["id"], cfg.nk_queue) + ksft_eq(lease["queue"]["type"], "rx") + ksft_in("netns-id", lease) + + +def test_attach_xdp_with_mp(cfg) -> None: + cfg.require_ipver("6") + ethnl = EthtoolFamily() + + rings = ethnl.rings_get({"header": {"dev-index": cfg.ifindex}}) + rx_rings = rings["rx"] + hds_thresh = rings.get("hds-thresh", 0) + + ethnl.rings_set( + { + "header": {"dev-index": cfg.ifindex}, + "tcp-data-split": "enabled", + "hds-thresh": 0, + "rx": 64, + } + ) + defer( + ethnl.rings_set, + { + "header": {"dev-index": cfg.ifindex}, + "tcp-data-split": "unknown", + "hds-thresh": hds_thresh, + "rx": rx_rings, + }, + ) + + ethtool(f"-X {cfg.ifname} equal {cfg.src_queue}") + defer(ethtool, f"-X {cfg.ifname} default") + + netdevnl = NetdevFamily() + + rx_cmd = f"ip netns exec {cfg.netns.name} {cfg.bin_local} -s -p {cfg.port} -i {cfg._nk_guest_ifname} -q {cfg.nk_queue}" + with bkg(rx_cmd): + wait_port_listen(cfg.port, proto="tcp", ns=cfg.netns) + + time.sleep(0.1) + queue_info = netdevnl.queue_get( + {"ifindex": cfg.ifindex, "id": cfg.src_queue, "type": "rx"} + ) + ksft_in("io-uring", queue_info) + + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" + with ksft_raises(CmdExitFailure): + ip(f"link set dev {cfg.ifname} xdp obj {prog} sec xdp.frags") + + time.sleep(0.1) + queue_info = netdevnl.queue_get( + {"ifindex": cfg.ifindex, "id": cfg.src_queue, "type": "rx"} + ) + ksft_not_in("io-uring", queue_info) + + +def test_destroy(cfg) -> None: + cfg.require_ipver("6") + ethnl = EthtoolFamily() + + rings = ethnl.rings_get({"header": {"dev-index": cfg.ifindex}}) + rx_rings = rings["rx"] + hds_thresh = rings.get("hds-thresh", 0) + + ethnl.rings_set( + { + "header": {"dev-index": cfg.ifindex}, + "tcp-data-split": "enabled", + "hds-thresh": 0, + "rx": 64, + } + ) + defer( + ethnl.rings_set, + { + "header": {"dev-index": cfg.ifindex}, + "tcp-data-split": "unknown", + "hds-thresh": hds_thresh, + "rx": rx_rings, + }, + ) + + ethtool(f"-X {cfg.ifname} equal {cfg.src_queue}") + defer(ethtool, f"-X {cfg.ifname} default") + + rx_cmd = f"ip netns exec {cfg.netns.name} {cfg.bin_local} -s -p {cfg.port} -i {cfg._nk_guest_ifname} -q {cfg.nk_queue}" + rx_proc = cmd(rx_cmd, background=True) + wait_port_listen(cfg.port, proto="tcp", ns=cfg.netns) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": cfg.ifindex, "id": cfg.src_queue, "type": "rx"} + ) + ksft_in("io-uring", queue_info) + + # ip link del will wait for all refs to drop first, but iou-zcrx is holding + # onto a ref. Terminate iou-zcrx async via a thread after a delay. + kill_timer = threading.Timer(1, rx_proc.proc.terminate) + kill_timer.start() + + ip(f"link del dev {cfg._nk_host_ifname}") + kill_timer.join() + cfg._nk_host_ifname = None + cfg._nk_guest_ifname = None + + queue_info = netdevnl.queue_get( + {"ifindex": cfg.ifindex, "id": cfg.src_queue, "type": "rx"} + ) + ksft_not_in("io-uring", queue_info) + + cmd(f"tc filter del dev {cfg.ifname} ingress pref {cfg._bpf_prog_pref}") + cfg._tc_attached = False + + flow_rule_id = set_flow_rule(cfg) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") + + rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.src_queue}" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {cfg.port} -l 12840" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(cfg.port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) + # Short delay since iou cleanup is async and takes a bit of time. + time.sleep(0.1) + queue_info = netdevnl.queue_get( + {"ifindex": cfg.ifindex, "id": cfg.src_queue, "type": "rx"} + ) + ksft_not_in("io-uring", queue_info) + + +def main() -> None: + with NetDrvContEnv(__file__, rxqueues=2) as cfg: + cfg.bin_local = path.abspath( + path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx" + ) + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + cfg.port = rand_port() + + ethnl = EthtoolFamily() + channels = ethnl.channels_get({"header": {"dev-index": cfg.ifindex}}) + channels = channels["combined-count"] + if channels < 2: + raise KsftSkipEx("Test requires NETIF with at least 2 combined channels") + + cfg.src_queue = channels - 1 + + with NetNSEnter(str(cfg.netns)): + netdevnl = NetdevFamily() + bind_result = netdevnl.queue_create( + { + "ifindex": cfg.nk_guest_ifindex, + "type": "rx", + "lease": { + "ifindex": cfg.ifindex, + "queue": {"id": cfg.src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + cfg.nk_queue = bind_result["id"] + + # test_destroy must be last because it destroys the netkit devices + ksft_run( + [test_iou_zcrx, test_attrs, test_attach_xdp_with_mp, test_destroy], + args=(cfg,), + ) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/ntuple.py b/tools/testing/selftests/drivers/net/hw/ntuple.py new file mode 100755 index 000000000000..232733142c02 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ntuple.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +"""Test ethtool NFC (ntuple) flow steering rules.""" + +import random +from enum import Enum, auto +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, ksft_ge +from lib.py import ksft_variants, KsftNamedVariant +from lib.py import EthtoolFamily, NetDrvEpEnv, NetdevFamily +from lib.py import KsftSkipEx +from lib.py import cmd, ethtool, defer, rand_ports, bkg, wait_port_listen + + +class NtupleField(Enum): + SRC_IP = auto() + DST_IP = auto() + SRC_PORT = auto() + DST_PORT = auto() + + +def _require_ntuple(cfg): + features = ethtool(f"-k {cfg.ifname}", json=True)[0] + if not features["ntuple-filters"]["active"]: + raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"])) + + +def _get_rx_cnts(cfg, prev=None): + """Get Rx packet counts for all queues, as a simple list of integers + if @prev is specified the prev counts will be subtracted""" + cfg.wait_hw_stats_settle() + data = cfg.netdevnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]}, dump=True) + data = [x for x in data if x['queue-type'] == "rx"] + max_q = max([x["queue-id"] for x in data]) + queue_stats = [0] * (max_q + 1) + for q in data: + queue_stats[q["queue-id"]] = q["rx-packets"] + if prev and q["queue-id"] < len(prev): + queue_stats[q["queue-id"]] -= prev[q["queue-id"]] + return queue_stats + + +def _ntuple_rule_add(cfg, flow_spec): + """Install an NFC rule via ethtool.""" + + output = ethtool(f"-N {cfg.ifname} {flow_spec}").stdout + rule_id = int(output.split()[-1]) + defer(ethtool, f"-N {cfg.ifname} delete {rule_id}") + + +def _setup_isolated_queue(cfg): + """Default all traffic to queue 0, and pick a random queue to + steer NFC traffic to.""" + + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ch_max = channels['combined-max'] + qcnt = channels['combined-count'] + + if ch_max < 2: + raise KsftSkipEx(f"Need at least 2 combined channels, max is {ch_max}") + + desired_queues = min(ch_max, 4) + if qcnt >= desired_queues: + desired_queues = qcnt + else: + ethtool(f"-L {cfg.ifname} combined {desired_queues}") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + + ethtool(f"-X {cfg.ifname} equal 1") + defer(ethtool, f"-X {cfg.ifname} default") + + return random.randint(1, desired_queues - 1) + + +def _send_traffic(cfg, ipver, proto, dst_port, src_port, pkt_cnt=40): + """Generate traffic with the desired flow signature.""" + + cfg.require_cmd("socat", remote=True) + + socat_proto = proto.upper() + dst_addr = f"[{cfg.addr_v['6']}]" if ipver == '6' else cfg.addr_v['4'] + + extra_opts = ",nodelay" if proto == "tcp" else ",shut-null" + + listen_cmd = (f"socat -{ipver} -t 2 -u " + f"{socat_proto}-LISTEN:{dst_port},reuseport /dev/null") + with bkg(listen_cmd, exit_wait=True): + wait_port_listen(dst_port, proto=proto) + send_cmd = f""" + bash -c 'for i in $(seq {pkt_cnt}); do echo msg; sleep 0.02; done' | + socat -{ipver} -u - \ + {socat_proto}:{dst_addr}:{dst_port},sourceport={src_port},reuseaddr{extra_opts} + """ + cmd(send_cmd, shell=True, host=cfg.remote) + + +def _add_ntuple_rule_and_send_traffic(cfg, ipver, proto, fields, test_queue): + ports = rand_ports(2) + src_port = ports[0] + dst_port = ports[1] + flow_parts = [f"flow-type {proto}{ipver}"] + + for field in fields: + if field == NtupleField.SRC_IP: + flow_parts.append(f"src-ip {cfg.remote_addr_v[ipver]}") + elif field == NtupleField.DST_IP: + flow_parts.append(f"dst-ip {cfg.addr_v[ipver]}") + elif field == NtupleField.SRC_PORT: + flow_parts.append(f"src-port {src_port}") + elif field == NtupleField.DST_PORT: + flow_parts.append(f"dst-port {dst_port}") + + flow_parts.append(f"action {test_queue}") + _ntuple_rule_add(cfg, " ".join(flow_parts)) + _send_traffic(cfg, ipver, proto, dst_port=dst_port, src_port=src_port) + + +def _ntuple_variants(): + for ipver in ["4", "6"]: + for proto in ["tcp", "udp"]: + for fields in [[NtupleField.SRC_IP], + [NtupleField.DST_IP], + [NtupleField.SRC_PORT], + [NtupleField.DST_PORT], + [NtupleField.SRC_IP, NtupleField.DST_IP], + [NtupleField.SRC_IP, NtupleField.DST_IP, + NtupleField.SRC_PORT, NtupleField.DST_PORT]]: + name = ".".join(f.name.lower() for f in fields) + yield KsftNamedVariant(f"{proto}{ipver}.{name}", + ipver, proto, fields) + + +@ksft_variants(_ntuple_variants()) +def queue(cfg, ipver, proto, fields): + """Test that an NFC rule steers traffic to the correct queue.""" + + cfg.require_ipver(ipver) + _require_ntuple(cfg) + + test_queue = _setup_isolated_queue(cfg) + + cnts = _get_rx_cnts(cfg) + _add_ntuple_rule_and_send_traffic(cfg, ipver, proto, fields, test_queue) + cnts = _get_rx_cnts(cfg, prev=cnts) + + ksft_ge(cnts[test_queue], 40, f"Traffic on test queue {test_queue}: {cnts}") + sum_idle = sum(cnts) - cnts[0] - cnts[test_queue] + ksft_eq(sum_idle, 0, f"Traffic on idle queues: {cnts}") + + +def main() -> None: + """Ksft boilerplate main.""" + + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netdevnl = NetdevFamily() + ksft_run([queue], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py index ad192fef3117..2a51b60df8a1 100755 --- a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py +++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py @@ -1,8 +1,13 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +""" +Test driver resilience vs page pool allocation failures. +""" + import errno import time +import math import os from lib.py import ksft_run, ksft_exit, ksft_pr from lib.py import KsftSkipEx, KsftFailEx @@ -13,7 +18,8 @@ from lib.py import cmd, tool, GenerateTraffic def _write_fail_config(config): for key, value in config.items(): - with open("/sys/kernel/debug/fail_function/" + key, "w") as fp: + path = "/sys/kernel/debug/fail_function/" + with open(path + key, "w", encoding='ascii') as fp: fp.write(str(value) + "\n") @@ -22,8 +28,7 @@ def _enable_pp_allocation_fail(): raise KsftSkipEx("Kernel built without function error injection (or DebugFS)") if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"): - with open("/sys/kernel/debug/fail_function/inject", "w") as fp: - fp.write("page_pool_alloc_netmems\n") + _write_fail_config({"inject": "page_pool_alloc_netmems"}) _write_fail_config({ "verbose": 0, @@ -38,8 +43,7 @@ def _disable_pp_allocation_fail(): return if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"): - with open("/sys/kernel/debug/fail_function/inject", "w") as fp: - fp.write("\n") + _write_fail_config({"inject": ""}) _write_fail_config({ "probability": 0, @@ -48,6 +52,10 @@ def _disable_pp_allocation_fail(): def test_pp_alloc(cfg, netdevnl): + """ + Configure page pool allocation fail injection while traffic is running. + """ + def get_stats(): return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] @@ -55,7 +63,7 @@ def test_pp_alloc(cfg, netdevnl): stat1 = get_stats() time.sleep(1) stat2 = get_stats() - if stat2['rx-packets'] - stat1['rx-packets'] < 15000: + if stat2['rx-packets'] - stat1['rx-packets'] < 4000: raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets']) @@ -82,11 +90,16 @@ def test_pp_alloc(cfg, netdevnl): time.sleep(3) s2 = get_stats() - if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 1: + seen_fails = s2['rx-alloc-fail'] - s1['rx-alloc-fail'] + if seen_fails < 1: raise KsftSkipEx("Allocation failures not increasing") - if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 100: - raise KsftSkipEx("Allocation increasing too slowly", s2['rx-alloc-fail'] - s1['rx-alloc-fail'], - "packets:", s2['rx-packets'] - s1['rx-packets']) + pkts = s2['rx-packets'] - s1['rx-packets'] + # Expecting one failure per 512 buffers, 3.1x safety margin + want_fails = math.floor(pkts / 512 / 3.1) + if seen_fails < want_fails: + raise KsftSkipEx("Allocation increasing too slowly", seen_fails, + "packets:", pkts) + ksft_pr(f"Seen: pkts:{pkts} fails:{seen_fails} (pass thrs:{want_fails})") # Basic failures are fine, try to wobble some settings to catch extra failures check_traffic_flowing() @@ -105,7 +118,7 @@ def test_pp_alloc(cfg, netdevnl): else: ksft_pr("ethtool -G change retval: did not succeed", new_g) else: - ksft_pr("ethtool -G change retval: did not try") + ksft_pr("ethtool -G change retval: did not try") time.sleep(0.1) check_traffic_flowing() @@ -119,6 +132,7 @@ def test_pp_alloc(cfg, netdevnl): def main() -> None: + """ Ksft boiler plate main """ netdevnl = NetdevFamily() with NetDrvEpEnv(__file__, nsim_test=False) as cfg: diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py new file mode 100755 index 000000000000..19847f3d4a00 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_api.py @@ -0,0 +1,476 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +API level tests for RSS (mostly Netlink vs IOCTL). +""" + +import errno +import glob +import random +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_is, ksft_ne, ksft_raises +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import defer, ethtool, CmdExitFailure +from lib.py import EthtoolFamily, NlError +from lib.py import NetDrvEnv + + +def _require_2qs(cfg): + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftSkipEx(f"Local has only {qcnt} queues") + return qcnt + + +def _ethtool_create(cfg, act, opts): + output = ethtool(f"{act} {cfg.ifname} {opts}").stdout + # Output will be something like: "New RSS context is 1" or + # "Added rule with ID 7", we want the integer from the end + return int(output.split()[-1]) + + +def _ethtool_get_cfg(cfg, fl_type, to_nl=False): + descr = ethtool(f"-n {cfg.ifname} rx-flow-hash {fl_type}").stdout + + if to_nl: + converter = { + "IP SA": "ip-src", + "IP DA": "ip-dst", + "L4 bytes 0 & 1 [TCP/UDP src port]": "l4-b-0-1", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "l4-b-2-3", + } + + ret = set() + else: + converter = { + "IP SA": "s", + "IP DA": "d", + "L3 proto": "t", + "L4 bytes 0 & 1 [TCP/UDP src port]": "f", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "n", + } + + ret = "" + + for line in descr.split("\n")[1:-2]: + # if this raises we probably need to add more keys to converter above + if to_nl: + ret.add(converter[line]) + else: + ret += converter[line] + return ret + + +def test_rxfh_nl_set_fail(cfg): + """ + Test error path of Netlink SET. + """ + _require_2qs(cfg) + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + with ksft_raises(NlError): + ethnl.rss_set({"header": {"dev-name": "lo"}, + "indir": None}) + + with ksft_raises(NlError): + ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [100000]}) + ntf = next(ethnl.poll_ntf(duration=0.2), None) + ksft_is(ntf, None) + + +def test_rxfh_nl_set_indir(cfg): + """ + Test setting indirection table via Netlink. + """ + qcnt = _require_2qs(cfg) + + # Test some SETs with a value + reset = defer(cfg.ethnl.rss_set, + {"header": {"dev-index": cfg.ifindex}, "indir": None}) + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(set(rss.get("indir", [-1])), {1}) + + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [0, 1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(set(rss.get("indir", [-1])), {0, 1}) + + # Make sure we can't set the queue count below max queue used + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 0 rx 1") + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 1 rx 0") + + # Test reset back to default + reset.exec() + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(set(rss.get("indir", [-1])), set(range(qcnt))) + + +def test_rxfh_nl_set_indir_ctx(cfg): + """ + Test setting indirection table for a custom context via Netlink. + """ + _require_2qs(cfg) + + # Get setting for ctx 0, we'll make sure they don't get clobbered + dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + + # Create context + ctx_id = _ethtool_create(cfg, "-X", "context new") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id, "indir": [1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id}) + ksft_eq(set(rss.get("indir", [-1])), {1}) + + ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(ctx0, dflt) + + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id, "indir": [0, 1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id}) + ksft_eq(set(rss.get("indir", [-1])), {0, 1}) + + ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(ctx0, dflt) + + # Make sure we can't set the queue count below max queue used + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 0 rx 1") + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 1 rx 0") + + +def test_rxfh_indir_ntf(cfg): + """ + Check that Netlink notifications are generated when RSS indirection + table was modified. + """ + _require_2qs(cfg) + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + ethtool(f"--disable-netlink -X {cfg.ifname} weight 0 1") + reset = defer(ethtool, f"-X {cfg.ifname} default") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received") + ksft_eq(ntf["name"], "rss-ntf") + ksft_eq(set(ntf["msg"]["indir"]), {1}) + + reset.exec() + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received after reset") + ksft_eq(ntf["name"], "rss-ntf") + ksft_is(ntf["msg"].get("context"), None) + ksft_ne(set(ntf["msg"]["indir"]), {1}) + + +def test_rxfh_indir_ctx_ntf(cfg): + """ + Check that Netlink notifications are generated when RSS indirection + table was modified on an additional RSS context. + """ + _require_2qs(cfg) + + ctx_id = _ethtool_create(cfg, "-X", "context new") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} weight 0 1") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received") + ksft_eq(ntf["name"], "rss-ntf") + ksft_eq(ntf["msg"].get("context"), ctx_id) + ksft_eq(set(ntf["msg"]["indir"]), {1}) + + +def test_rxfh_nl_set_key(cfg): + """ + Test setting hashing key via Netlink. + """ + + dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + defer(cfg.ethnl.rss_set, + {"header": {"dev-index": cfg.ifindex}, + "hkey": dflt["hkey"], "indir": None}) + + # Empty key should error out + with ksft_raises(NlError) as cm: + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "hkey": None}) + ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.hkey') + + # Set key to random + mod = random.randbytes(len(dflt["hkey"])) + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "hkey": mod}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(rss.get("hkey", [-1]), mod) + + # Set key to random and indir tbl to something at once + mod = random.randbytes(len(dflt["hkey"])) + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [0, 1], "hkey": mod}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(rss.get("hkey", [-1]), mod) + ksft_eq(set(rss.get("indir", [-1])), {0, 1}) + + +def test_rxfh_fields(cfg): + """ + Test reading Rx Flow Hash over Netlink. + """ + + flow_types = ["tcp4", "tcp6", "udp4", "udp6"] + ethnl = EthtoolFamily() + + cfg_nl = ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + for fl_type in flow_types: + one = _ethtool_get_cfg(cfg, fl_type, to_nl=True) + ksft_eq(one, cfg_nl["flow-hash"][fl_type], + comment="Config for " + fl_type) + + +def test_rxfh_fields_set(cfg): + """ Test configuring Rx Flow Hash over Netlink. """ + + flow_types = ["tcp4", "tcp6", "udp4", "udp6"] + + # Collect current settings + cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + # symmetric hashing is config-order-sensitive make sure we leave + # symmetric mode, or make the flow-hash sym-compatible first + changes = [{"flow-hash": cfg_old["flow-hash"],}, + {"input-xfrm": cfg_old.get("input-xfrm", {}),}] + if cfg_old.get("input-xfrm"): + changes = list(reversed(changes)) + for old in changes: + defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old) + + # symmetric hashing prevents some of the configs below + if cfg_old.get("input-xfrm"): + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "input-xfrm": {}}) + + for fl_type in flow_types: + cur = _ethtool_get_cfg(cfg, fl_type) + if cur == "sdfn": + change_nl = {"ip-src", "ip-dst"} + change_ic = "sd" + else: + change_nl = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"} + change_ic = "sdfn" + + cfg.ethnl.rss_set({ + "header": {"dev-index": cfg.ifindex}, + "flow-hash": {fl_type: change_nl} + }) + reset = defer(ethtool, f"--disable-netlink -N {cfg.ifname} " + f"rx-flow-hash {fl_type} {cur}") + + cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(change_nl, cfg_nl["flow-hash"][fl_type], + comment=f"Config for {fl_type} over Netlink") + cfg_ic = _ethtool_get_cfg(cfg, fl_type) + ksft_eq(change_ic, cfg_ic, + comment=f"Config for {fl_type} over IOCTL") + + reset.exec() + cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(cfg_old["flow-hash"][fl_type], cfg_nl["flow-hash"][fl_type], + comment=f"Un-config for {fl_type} over Netlink") + cfg_ic = _ethtool_get_cfg(cfg, fl_type) + ksft_eq(cur, cfg_ic, comment=f"Un-config for {fl_type} over IOCTL") + + # Try to set multiple at once, the defer was already installed at the start + change = {"ip-src"} + if change == cfg_old["flow-hash"]["tcp4"]: + change = {"ip-dst"} + cfg.ethnl.rss_set({ + "header": {"dev-index": cfg.ifindex}, + "flow-hash": {x: change for x in flow_types} + }) + + cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + for fl_type in flow_types: + ksft_eq(change, cfg_nl["flow-hash"][fl_type], + comment=f"multi-config for {fl_type} over Netlink") + + +def test_rxfh_fields_set_xfrm(cfg): + """ Test changing Rx Flow Hash vs xfrm_input at once. """ + + def set_rss(cfg, xfrm, fh): + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "input-xfrm": xfrm, "flow-hash": fh}) + + # Install the reset handler + cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + # symmetric hashing is config-order-sensitive make sure we leave + # symmetric mode, or make the flow-hash sym-compatible first + changes = [{"flow-hash": cfg_old["flow-hash"],}, + {"input-xfrm": cfg_old.get("input-xfrm", {}),}] + if cfg_old.get("input-xfrm"): + changes = list(reversed(changes)) + for old in changes: + defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old) + + # Make sure we start with input-xfrm off, and tcp4 config non-sym + set_rss(cfg, {}, {}) + set_rss(cfg, {}, {"tcp4": {"ip-src"}}) + + # Setting sym and fixing tcp4 config not expected to pass right now + with ksft_raises(NlError): + set_rss(cfg, {"sym-xor"}, {"tcp4": {"ip-src", "ip-dst"}}) + # One at a time should work, hopefully + set_rss(cfg, 0, {"tcp4": {"ip-src", "ip-dst"}}) + no_support = False + try: + set_rss(cfg, {"sym-xor"}, {}) + except NlError: + try: + set_rss(cfg, {"sym-or-xor"}, {}) + except NlError: + no_support = True + if no_support: + raise KsftSkipEx("no input-xfrm supported") + # Disabling two at once should not work either without kernel changes + with ksft_raises(NlError): + set_rss(cfg, {}, {"tcp4": {"ip-src"}}) + + +def test_rxfh_fields_ntf(cfg): + """ Test Rx Flow Hash notifications. """ + + cur = _ethtool_get_cfg(cfg, "tcp4") + if cur == "sdfn": + change = {"ip-src", "ip-dst"} + else: + change = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"} + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + ethnl.rss_set({ + "header": {"dev-index": cfg.ifindex}, + "flow-hash": {"tcp4": change} + }) + reset = defer(ethtool, + f"--disable-netlink -N {cfg.ifname} rx-flow-hash tcp4 {cur}") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received after IOCTL change") + ksft_eq(ntf["name"], "rss-ntf") + ksft_eq(ntf["msg"]["flow-hash"]["tcp4"], change) + ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None) + + reset.exec() + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received after Netlink change") + ksft_eq(ntf["name"], "rss-ntf") + ksft_ne(ntf["msg"]["flow-hash"]["tcp4"], change) + ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None) + + +def test_rss_ctx_add(cfg): + """ Test creating an additional RSS context via Netlink """ + + _require_2qs(cfg) + + # Test basic creation + ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}}) + d = defer(ethtool, f"-X {cfg.ifname} context {ctx.get('context')} delete") + ksft_ne(ctx.get("context", 0), 0) + ksft_ne(set(ctx.get("indir", [0])), {0}, + comment="Driver should init the indirection table") + + # Try requesting the ID we just got allocated + with ksft_raises(NlError) as cm: + ctx = cfg.ethnl.rss_create_act({ + "header": {"dev-index": cfg.ifindex}, + "context": ctx.get("context"), + }) + ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete") + d.exec() + ksft_eq(cm.exception.nl_msg.error, -errno.EBUSY) + + # Test creating with a specified RSS table, and context ID + ctx_id = ctx.get("context") + ctx = cfg.ethnl.rss_create_act({ + "header": {"dev-index": cfg.ifindex}, + "context": ctx_id, + "indir": [1], + }) + ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete") + ksft_eq(ctx.get("context"), ctx_id) + ksft_eq(set(ctx.get("indir", [0])), {1}) + + +def test_rss_ctx_ntf(cfg): + """ Test notifications for creating additional RSS contexts """ + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + # Create / delete via Netlink + ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}}) + cfg.ethnl.rss_delete_act({ + "header": {"dev-index": cfg.ifindex}, + "context": ctx["context"], + }) + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[NL] No notification after context creation") + ksft_eq(ntf["name"], "rss-create-ntf") + ksft_eq(ctx, ntf["msg"]) + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[NL] No notification after context deletion") + ksft_eq(ntf["name"], "rss-delete-ntf") + + # Create / deleve via IOCTL + ctx_id = _ethtool_create(cfg, "--disable-netlink -X", "context new") + ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} delete") + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[IOCTL] No notification after context creation") + ksft_eq(ntf["name"], "rss-create-ntf") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[IOCTL] No notification after context deletion") + ksft_eq(ntf["name"], "rss-delete-ntf") + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 319aaa004c40..51f4e7bc3e5d 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -4,12 +4,16 @@ import datetime import random import re -from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt, ksft_true +import time +from lib.py import ksft_disruptive +from lib.py import ksft_run, ksft_pr, ksft_exit +from lib.py import ksft_eq, ksft_ne, ksft_ge, ksft_in, ksft_lt, ksft_true, ksft_raises from lib.py import NetDrvEpEnv from lib.py import EthtoolFamily, NetdevFamily from lib.py import KsftSkipEx, KsftFailEx -from lib.py import rand_port -from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure +from lib.py import rand_port, rand_ports +from lib.py import cmd, ethtool, ip, defer, CmdExitFailure, wait_file +from lib.py import GenerateTraffic def _rss_key_str(key): @@ -58,6 +62,14 @@ def require_ntuple(cfg): raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"])) +def require_context_cnt(cfg, need_cnt): + # There's no good API to get the context count, so the tests + # which try to add a lot opportunisitically set the count they + # discovered. Careful with test ordering! + if need_cnt and cfg.context_cnt and cfg.context_cnt < need_cnt: + raise KsftSkipEx(f"Test requires at least {need_cnt} contexts, but device only has {cfg.context_cnt}") + + # Get Rx packet counts for all queues, as a simple list of integers # if @prev is specified the prev counts will be subtracted def _get_rx_cnts(cfg, prev=None): @@ -109,7 +121,7 @@ def test_rss_key_indir(cfg): qcnt = len(_get_rx_cnts(cfg)) if qcnt < 3: - KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)") + raise KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)") data = get_rss(cfg) want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table'] @@ -154,9 +166,17 @@ def test_rss_key_indir(cfg): ksft_eq(1, max(data['rss-indirection-table'])) # Check we only get traffic on the first 2 queues - cnts = _get_rx_cnts(cfg) - GenerateTraffic(cfg).wait_pkts_and_stop(20000) - cnts = _get_rx_cnts(cfg, prev=cnts) + + # Retry a few times in case the flows skew to a single queue. + attempts = 3 + for attempt in range(attempts): + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + if cnts[0] >= 5000 and cnts[1] >= 5000: + break + ksft_pr(f"Skewed queue distribution, attempt {attempt + 1}/{attempts}: " + str(cnts)) + # 2 queues, 20k packets, must be at least 5k per queue ksft_ge(cnts[0], 5000, "traffic on main context (1/2): " + str(cnts)) ksft_ge(cnts[1], 5000, "traffic on main context (2/2): " + str(cnts)) @@ -166,11 +186,25 @@ def test_rss_key_indir(cfg): # Restore, and check traffic gets spread again reset_indir.exec() - cnts = _get_rx_cnts(cfg) - GenerateTraffic(cfg).wait_pkts_and_stop(20000) - cnts = _get_rx_cnts(cfg, prev=cnts) - # First two queues get less traffic than all the rest - ksft_lt(sum(cnts[:2]), sum(cnts[2:]), "traffic distributed: " + str(cnts)) + for attempt in range(attempts): + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + if qcnt > 4: + if sum(cnts[:2]) < sum(cnts[2:]): + break + else: + if cnts[2] >= 3500: + break + ksft_pr(f"Skewed queue distribution, attempt {attempt + 1}/{attempts}: " + str(cnts)) + + if qcnt > 4: + # First two queues get less traffic than all the rest + ksft_lt(sum(cnts[:2]), sum(cnts[2:]), + "traffic distributed: " + str(cnts)) + else: + # When queue count is low make sure third queue got significant pkts + ksft_ge(cnts[2], 3500, "traffic distributed: " + str(cnts)) def test_rss_queue_reconfigure(cfg, main_ctx=True): @@ -326,20 +360,21 @@ def test_hitless_key_update(cfg): data = get_rss(cfg) key_len = len(data['rss-hash-key']) - key = _rss_key_rand(key_len) + ethnl = EthtoolFamily() + key = random.randbytes(key_len) tgen = GenerateTraffic(cfg) try: errors0, carrier0 = get_drop_err_sum(cfg) t0 = datetime.datetime.now() - ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key)) + ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, "hkey": key}) t1 = datetime.datetime.now() errors1, carrier1 = get_drop_err_sum(cfg) finally: tgen.wait_pkts_and_stop(5000) - ksft_lt((t1 - t0).total_seconds(), 0.2) - ksft_eq(errors1 - errors1, 0) + ksft_lt((t1 - t0).total_seconds(), 0.15) + ksft_eq(errors1 - errors0, 0) ksft_eq(carrier1 - carrier0, 0) @@ -383,7 +418,7 @@ def test_rss_context_dump(cfg): # Sanity-check the results for data in ctxs: - ksft_ne(set(data['indir']), {0}, "indir table is all zero") + ksft_ne(set(data.get('indir', [1])), {0}, "indir table is all zero") ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero") # More specific checks @@ -437,7 +472,7 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): except: raise KsftSkipEx("Not enough queues for the test") - ports = [] + ports = rand_ports(ctx_cnt) # Use queues 0 and 1 for normal traffic ethtool(f"-X {cfg.ifname} equal 2") @@ -456,6 +491,8 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): raise ksft_pr(f"Failed to create context {i + 1}, trying to test what we got") ctx_cnt = i + if cfg.context_cnt is None: + cfg.context_cnt = ctx_cnt break _rss_key_check(cfg, context=ctx_id) @@ -469,7 +506,6 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): ksft_eq(min(data['rss-indirection-table']), 2 + i * 2, "Unexpected context cfg: " + str(data)) ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data)) - ports.append(rand_port()) flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {ports[i]} context {ctx_id}" ntuple = ethtool_create(cfg, "-N", flow) defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") @@ -511,8 +547,7 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): """ require_ntuple(cfg) - - requested_ctx_cnt = ctx_cnt + require_context_cnt(cfg, 4) # Try to allocate more queues when necessary qcnt = len(_get_rx_cnts(cfg)) @@ -526,7 +561,7 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): ntuple = [] ctx = [] - ports = [] + ports = rand_ports(ctx_cnt) def remove_ctx(idx): ntuple[idx].exec() @@ -558,7 +593,6 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): ctx_id = ethtool_create(cfg, "-X", f"context new start {2 + i * 2} equal 2") ctx.append(defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")) - ports.append(rand_port()) flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {ports[i]} context {ctx_id}" ntuple_id = ethtool_create(cfg, "-N", flow) ntuple.append(defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")) @@ -577,9 +611,6 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): remove_ctx(-1) check_traffic() - if requested_ctx_cnt != ctx_cnt: - raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}") - def test_rss_context_overlap(cfg, other_ctx=0): """ @@ -588,6 +619,8 @@ def test_rss_context_overlap(cfg, other_ctx=0): """ require_ntuple(cfg) + if other_ctx: + require_context_cnt(cfg, 2) queue_cnt = len(_get_rx_cnts(cfg)) if queue_cnt < 4: @@ -649,6 +682,29 @@ def test_rss_context_overlap2(cfg): test_rss_context_overlap(cfg, True) +def test_flow_add_context_missing(cfg): + """ + Test that we are not allowed to add a rule pointing to an RSS context + which was never created. + """ + + require_ntuple(cfg) + + # Find a context which doesn't exist + for ctx_id in range(1, 100): + try: + get_rss(cfg, context=ctx_id) + except CmdExitFailure: + break + + with ksft_raises(CmdExitFailure) as cm: + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port 1234 context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + ethtool(f"-N {cfg.ifname} delete {ntuple_id}") + if cm.exception: + ksft_in('Invalid argument', cm.exception.cmd.stderr) + + def test_delete_rss_context_busy(cfg): """ Test that deletion returns -EBUSY when an rss context is being used @@ -715,8 +771,158 @@ def test_rss_ntuple_addition(cfg): 'noise' : (0,) }) +def test_rss_default_context_rule(cfg): + """ + Allocate a port, direct this port to context 0, then create a new RSS + context and steer all TCP traffic to it (context 1). Verify that: + * Traffic to the specific port continues to use queues of the main + context (0/1). + * Traffic to any other TCP port is redirected to the new context + (queues 2/3). + """ + + require_ntuple(cfg) + + queue_cnt = len(_get_rx_cnts(cfg)) + if queue_cnt < 4: + try: + ksft_pr(f"Increasing queue count {queue_cnt} -> 4") + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}") + except Exception as exc: + raise KsftSkipEx("Not enough queues for the test") from exc + + # Use queues 0 and 1 for the main context + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + + # Create a new RSS context that uses queues 2 and 3 + ctx_id = ethtool_create(cfg, "-X", "context new start 2 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + # Generic low-priority rule: redirect all TCP traffic to the new context. + # Give it an explicit higher location number (lower priority). + flow_generic = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} context {ctx_id} loc 1" + ethtool(f"-N {cfg.ifname} {flow_generic}") + defer(ethtool, f"-N {cfg.ifname} delete 1") + + ports = rand_ports(2) + # Specific high-priority rule for a random port that should stay on context 0. + # Assign loc 0 so it is evaluated before the generic rule. + port_main = ports[0] + flow_main = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port_main} context 0 loc 0" + ethtool(f"-N {cfg.ifname} {flow_main}") + defer(ethtool, f"-N {cfg.ifname} delete 0") + + _ntuple_rule_check(cfg, 1, ctx_id) + + # Verify that traffic matching the specific rule still goes to queues 0/1 + _send_traffic_check(cfg, port_main, "context 0", + { 'target': (0, 1), + 'empty' : (2, 3) }) + + # And that traffic for any other port is steered to the new context + port_other = ports[1] + _send_traffic_check(cfg, port_other, f"context {ctx_id}", + { 'target': (2, 3), + 'noise' : (0, 1) }) + + +@ksft_disruptive +def test_rss_context_persist_ifupdown(cfg, pre_down=False): + """ + Test that RSS contexts and their associated ntuple filters persist across + an interface down/up cycle. + + """ + + require_ntuple(cfg) + + qcnt = len(_get_rx_cnts(cfg)) + if qcnt < 6: + try: + ethtool(f"-L {cfg.ifname} combined 6") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + except Exception as exc: + raise KsftSkipEx("Not enough queues for the test") from exc + + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + + ifup = defer(ip, f"link set dev {cfg.ifname} up") + if pre_down: + ip(f"link set dev {cfg.ifname} down") + + try: + ctx1_id = ethtool_create(cfg, "-X", "context new start 2 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx1_id} delete") + except CmdExitFailure as exc: + raise KsftSkipEx("Create context not supported with interface down") from exc + + ctx2_id = ethtool_create(cfg, "-X", "context new start 4 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx2_id} delete") + + port_ctx2 = rand_port() + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port_ctx2} context {ctx2_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + if not pre_down: + ip(f"link set dev {cfg.ifname} down") + ifup.exec() + + wait_file(f"/sys/class/net/{cfg.ifname}/carrier", + lambda x: x.strip() == "1", deadline=20) + + remote_addr = cfg.remote_addr_v[cfg.addr_ipver] + for _ in range(10): + if cmd(f"ping -c 1 -W 1 {remote_addr}", fail=False).ret == 0: + break + time.sleep(1) + else: + raise KsftSkipEx("Cannot reach remote host after interface up") + + ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}, dump=True) + + data1 = [c for c in ctxs if c.get('context') == ctx1_id] + ksft_eq(len(data1), 1, f"Context {ctx1_id} should persist after ifup") + + data2 = [c for c in ctxs if c.get('context') == ctx2_id] + ksft_eq(len(data2), 1, f"Context {ctx2_id} should persist after ifup") + + _ntuple_rule_check(cfg, ntuple_id, ctx2_id) + + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + main_traffic = sum(cnts[0:2]) + ksft_ge(main_traffic, 18000, f"Main context traffic distribution: {cnts}") + ksft_lt(sum(cnts[2:6]), 500, f"Other context queues should be mostly empty: {cnts}") + + _send_traffic_check(cfg, port_ctx2, f"context {ctx2_id}", + {'target': (4, 5), + 'noise': (0, 1), + 'empty': (2, 3)}) + + +def test_rss_context_persist_create_and_ifdown(cfg): + """ + Create RSS contexts then cycle the interface down and up. + """ + test_rss_context_persist_ifupdown(cfg, pre_down=False) + + +def test_rss_context_persist_ifdown_and_create(cfg): + """ + Bring interface down first, then create RSS contexts and bring up. + """ + test_rss_context_persist_ifupdown(cfg, pre_down=True) + + def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.context_cnt = None cfg.ethnl = EthtoolFamily() cfg.netdevnl = NetdevFamily() @@ -726,7 +932,11 @@ def main() -> None: test_rss_context_dump, test_rss_context_queue_reconfigure, test_rss_context_overlap, test_rss_context_overlap2, test_rss_context_out_of_order, test_rss_context4_create_with_cfg, - test_delete_rss_context_busy, test_rss_ntuple_addition], + test_flow_add_context_missing, + test_delete_rss_context_busy, test_rss_ntuple_addition, + test_rss_default_context_rule, + test_rss_context_persist_create_and_ifdown, + test_rss_context_persist_ifdown_and_create], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/hw/rss_drv.py b/tools/testing/selftests/drivers/net/hw/rss_drv.py new file mode 100755 index 000000000000..bd59dace6e15 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_drv.py @@ -0,0 +1,313 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Driver-related behavior tests for RSS. +""" + +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge +from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx, ksft_raises +from lib.py import defer, ethtool, CmdExitFailure +from lib.py import EthtoolFamily, NlError +from lib.py import NetDrvEnv + + +def _is_power_of_two(n): + return n > 0 and (n & (n - 1)) == 0 + + +def _get_rss(cfg, context=0): + return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0] + + +def _test_rss_indir_size(cfg, qcnt, context=0): + """Test that indirection table size is at least 4x queue count.""" + ethtool(f"-L {cfg.ifname} combined {qcnt}") + + rss = _get_rss(cfg, context=context) + indir = rss['rss-indirection-table'] + ksft_ge(len(indir), 4 * qcnt, "Table smaller than 4x") + return len(indir) + + +def _maybe_create_context(cfg, create_context): + """ Either create a context and return its ID or return 0 for main ctx """ + if not create_context: + return 0 + try: + ctx = cfg.ethnl.rss_create_act({'header': {'dev-index': cfg.ifindex}}) + ctx_id = ctx['context'] + defer(cfg.ethnl.rss_delete_act, + {'header': {'dev-index': cfg.ifindex}, 'context': ctx_id}) + except NlError: + raise KsftSkipEx("Device does not support additional RSS contexts") + + return ctx_id + + +def _require_dynamic_indir_size(cfg, ch_max): + """Skip if the device does not dynamically size its indirection table.""" + ethtool(f"-X {cfg.ifname} default") + ethtool(f"-L {cfg.ifname} combined 2") + small = len(_get_rss(cfg)['rss-indirection-table']) + ethtool(f"-L {cfg.ifname} combined {ch_max}") + large = len(_get_rss(cfg)['rss-indirection-table']) + + if small == large: + raise KsftSkipEx("Device does not dynamically size indirection table") + + +@ksft_variants([ + KsftNamedVariant("main", False), + KsftNamedVariant("ctx", True), +]) +def indir_size_4x(cfg, create_context): + """ + Test that the indirection table has at least 4 entries per queue. + Empirically network-heavy workloads like memcache suffer with the 33% + imbalance of a 2x indirection table size. + 4x table translates to a 16% imbalance. + """ + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ch_max = channels.get('combined-max', 0) + qcnt = channels['combined-count'] + + if ch_max < 3: + raise KsftSkipEx(f"Not enough queues for the test: max={ch_max}") + + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + ethtool(f"-L {cfg.ifname} combined 3") + + ctx_id = _maybe_create_context(cfg, create_context) + + indir_sz = _test_rss_indir_size(cfg, 3, context=ctx_id) + + # Test with max queue count (max - 1 if max is a power of two) + test_max = ch_max - 1 if _is_power_of_two(ch_max) else ch_max + if test_max > 3 and indir_sz < test_max * 4: + _test_rss_indir_size(cfg, test_max, context=ctx_id) + + +@ksft_variants([ + KsftNamedVariant("main", False), + KsftNamedVariant("ctx", True), +]) +def resize_periodic(cfg, create_context): + """Test that a periodic indirection table survives channel changes. + + Set a non-default periodic table ([3, 2, 1, 0] x N) via netlink, + reduce channels to trigger a fold, then increase to trigger an + unfold. Using a reversed pattern (instead of [0, 1, 2, 3]) ensures + the test can distinguish a correct fold from a driver that silently + resets the table to defaults. Verify the exact pattern is preserved + and the size tracks the channel count. + """ + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ch_max = channels.get('combined-max', 0) + qcnt = channels['combined-count'] + + if ch_max < 4: + raise KsftSkipEx(f"Not enough queues for the test: max={ch_max}") + + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + + _require_dynamic_indir_size(cfg, ch_max) + + ctx_id = _maybe_create_context(cfg, create_context) + + # Set a non-default periodic pattern via netlink. + # Send only 4 entries (user_size=4) so the kernel replicates it + # to fill the device table. This allows folding down to 4 entries. + rss = _get_rss(cfg, context=ctx_id) + orig_size = len(rss['rss-indirection-table']) + pattern = [3, 2, 1, 0] + req = {'header': {'dev-index': cfg.ifindex}, 'indir': pattern} + if ctx_id: + req['context'] = ctx_id + else: + defer(ethtool, f"-X {cfg.ifname} default") + cfg.ethnl.rss_set(req) + + # Shrink — should fold + ethtool(f"-L {cfg.ifname} combined 4") + rss = _get_rss(cfg, context=ctx_id) + indir = rss['rss-indirection-table'] + + ksft_ge(orig_size, len(indir), "Table did not shrink") + ksft_eq(indir, [3, 2, 1, 0] * (len(indir) // 4), + "Folded table has wrong pattern") + + # Grow back — should unfold + ethtool(f"-L {cfg.ifname} combined {ch_max}") + rss = _get_rss(cfg, context=ctx_id) + indir = rss['rss-indirection-table'] + + ksft_eq(len(indir), orig_size, "Table size not restored") + ksft_eq(indir, [3, 2, 1, 0] * (len(indir) // 4), + "Unfolded table has wrong pattern") + + +@ksft_variants([ + KsftNamedVariant("main", False), + KsftNamedVariant("ctx", True), +]) +def resize_below_user_size_reject(cfg, create_context): + """Test that shrinking below user_size is rejected. + + Send a table via netlink whose size (user_size) sits between + the small and large device table sizes. The table is periodic, + so folding would normally succeed, but the user_size floor must + prevent it. + """ + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ch_max = channels.get('combined-max', 0) + qcnt = channels['combined-count'] + + if ch_max < 4: + raise KsftSkipEx(f"Not enough queues for the test: max={ch_max}") + + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + + _require_dynamic_indir_size(cfg, ch_max) + + ctx_id = _maybe_create_context(cfg, create_context) + + # Measure the table size at max channels + rss = _get_rss(cfg, context=ctx_id) + big_size = len(rss['rss-indirection-table']) + + # Measure the table size at reduced channels + ethtool(f"-L {cfg.ifname} combined 4") + rss = _get_rss(cfg, context=ctx_id) + small_size = len(rss['rss-indirection-table']) + ethtool(f"-L {cfg.ifname} combined {ch_max}") + + if small_size >= big_size: + raise KsftSkipEx("Table did not shrink at reduced channels") + + # Find a user_size + user_size = None + for div in [2, 4]: + candidate = big_size // div + if candidate > small_size and big_size % candidate == 0: + user_size = candidate + break + if user_size is None: + raise KsftSkipEx("No suitable user_size between small and big table") + + # Send a periodic sub-table of exactly user_size entries. + # Pattern safe for 4 channels. + pattern = [0, 1, 2, 3] * (user_size // 4) + if len(pattern) != user_size: + raise KsftSkipEx(f"user_size ({user_size}) not divisible by 4") + req = {'header': {'dev-index': cfg.ifindex}, 'indir': pattern} + if ctx_id: + req['context'] = ctx_id + else: + defer(ethtool, f"-X {cfg.ifname} default") + cfg.ethnl.rss_set(req) + + # Shrink channels — table would go to small_size < user_size. + # The table is periodic so folding would work, but user_size + # floor must reject it. + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 4") + + +@ksft_variants([ + KsftNamedVariant("main", False), + KsftNamedVariant("ctx", True), +]) +def resize_nonperiodic_reject(cfg, create_context): + """Test that a non-periodic table blocks channel reduction. + + Set equal weight across all queues so the table is not periodic + at any smaller size, then verify channel reduction is rejected. + An additional context with a periodic table is created to verify + that validation catches the non-periodic one even when others + are fine. + """ + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ch_max = channels.get('combined-max', 0) + qcnt = channels['combined-count'] + + if ch_max < 4: + raise KsftSkipEx(f"Not enough queues for the test: max={ch_max}") + + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + + _require_dynamic_indir_size(cfg, ch_max) + + ctx_id = _maybe_create_context(cfg, create_context) + ctx_ref = f"context {ctx_id}" if ctx_id else "" + + # Create an extra context with a periodic (foldable) table so that + # the validation must iterate all contexts to find the bad one. + extra_ctx = _maybe_create_context(cfg, True) + ethtool(f"-X {cfg.ifname} context {extra_ctx} equal 2") + + ethtool(f"-X {cfg.ifname} {ctx_ref} equal {ch_max}") + if not create_context: + defer(ethtool, f"-X {cfg.ifname} default") + + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 2") + + +@ksft_variants([ + KsftNamedVariant("main", False), + KsftNamedVariant("ctx", True), +]) +def resize_nonperiodic_no_corruption(cfg, create_context): + """Test that a failed resize does not corrupt table or channel count. + + Set a non-periodic table, attempt a channel reduction (which must + fail), then verify both the indirection table contents and the + channel count are unchanged. + """ + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ch_max = channels.get('combined-max', 0) + qcnt = channels['combined-count'] + + if ch_max < 4: + raise KsftSkipEx(f"Not enough queues for the test: max={ch_max}") + + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + + _require_dynamic_indir_size(cfg, ch_max) + + ctx_id = _maybe_create_context(cfg, create_context) + ctx_ref = f"context {ctx_id}" if ctx_id else "" + + ethtool(f"-X {cfg.ifname} {ctx_ref} equal {ch_max}") + if not create_context: + defer(ethtool, f"-X {cfg.ifname} default") + + rss_before = _get_rss(cfg, context=ctx_id) + + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 2") + + rss_after = _get_rss(cfg, context=ctx_id) + ksft_eq(rss_after['rss-indirection-table'], + rss_before['rss-indirection-table'], + "Indirection table corrupted after failed resize") + + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ksft_eq(channels['combined-count'], ch_max, + "Channel count changed after failed resize") + + +def main() -> None: + """ Ksft boiler plate main """ + with NetDrvEnv(__file__) as cfg: + cfg.ethnl = EthtoolFamily() + ksft_run([indir_size_4x, resize_periodic, + resize_below_user_size_reject, + resize_nonperiodic_reject, + resize_nonperiodic_no_corruption], args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py new file mode 100755 index 000000000000..7dc80070884a --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Tests for RSS hashing on IPv6 Flow Label. +""" + +import glob +import os +import socket +from lib.py import CmdExitFailure +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_in, \ + ksft_not_in, ksft_raises, KsftSkipEx +from lib.py import bkg, cmd, defer, fd_read_timeout, rand_port +from lib.py import NetDrvEpEnv + + +def _check_system(cfg): + if not hasattr(socket, "SO_INCOMING_CPU"): + raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") + + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftSkipEx(f"Local has only {qcnt} queues") + + for f in [f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_flow_cnt", + f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_cpus"]: + try: + with open(f, 'r') as fp: + setting = fp.read().strip() + # CPU mask will be zeros and commas + if setting.replace("0", "").replace(",", ""): + raise KsftSkipEx(f"RPS/RFS is configured: {f}: {setting}") + except FileNotFoundError: + pass + + # 1 is the default, if someone changed it we probably shouldn"t mess with it + af = cmd("cat /proc/sys/net/ipv6/auto_flowlabels", host=cfg.remote).stdout + if af.strip() != "1": + raise KsftSkipEx("Remote does not have auto_flowlabels enabled") + + +def _ethtool_get_cfg(cfg, fl_type): + descr = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout + + converter = { + "IP SA": "s", + "IP DA": "d", + "L3 proto": "t", + "L4 bytes 0 & 1 [TCP/UDP src port]": "f", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "n", + "IPv6 Flow Label": "l", + } + + ret = "" + for line in descr.split("\n")[1:-2]: + # if this raises we probably need to add more keys to converter above + ret += converter[line] + return ret + + +def _traffic(cfg, one_sock, one_cpu): + local_port = rand_port(socket.SOCK_DGRAM) + remote_port = rand_port(socket.SOCK_DGRAM) + + sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + sock.bind(("", local_port)) + sock.connect((cfg.remote_addr_v["6"], 0)) + if one_sock: + send = f"exec 5<>/dev/udp/{cfg.addr_v['6']}/{local_port}; " \ + "for i in `seq 20`; do echo a >&5; sleep 0.02; done; exec 5>&-" + else: + send = "for i in `seq 20`; do echo a | socat -t0.02 - UDP6:" \ + f"[{cfg.addr_v['6']}]:{local_port},sourceport={remote_port}; done" + + cpus = set() + with bkg(send, shell=True, host=cfg.remote, exit_wait=True): + for _ in range(20): + fd_read_timeout(sock.fileno(), 1) + cpu = sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU) + cpus.add(cpu) + + if one_cpu: + ksft_eq(len(cpus), 1, + f"{one_sock=} - expected one CPU, got traffic on: {cpus=}") + else: + ksft_ge(len(cpus), 2, + f"{one_sock=} - expected many CPUs, got traffic on: {cpus=}") + + +def test_rss_flow_label(cfg): + """ + Test hashing on IPv6 flow label. Send traffic over a single socket + and over multiple sockets. Depend on the remote having auto-label + enabled so that it randomizes the label per socket. + """ + + cfg.require_ipver("6") + cfg.require_cmd("socat", remote=True) + _check_system(cfg) + + # Enable flow label hashing for UDP6 + initial = _ethtool_get_cfg(cfg, "udp6") + no_lbl = initial.replace("l", "") + if "l" not in initial: + try: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}") + except CmdExitFailure as exc: + raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc + + defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") + + _traffic(cfg, one_sock=True, one_cpu=True) + _traffic(cfg, one_sock=False, one_cpu=False) + + # Disable it, we should see no hashing (reset was already defer()ed) + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}") + + _traffic(cfg, one_sock=False, one_cpu=True) + + +def _check_v4_flow_types(cfg): + for fl_type in ["tcp4", "udp4", "ah4", "esp4", "sctp4"]: + try: + cur = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout + ksft_not_in("Flow Label", cur, + comment=f"{fl_type=} has Flow Label:" + cur) + except CmdExitFailure: + # Probably does not support this flow type + pass + + +def test_rss_flow_label_6only(cfg): + """ + Test interactions with IPv4 flow types. It should not be possible to set + IPv6 Flow Label hashing for an IPv4 flow type. The Flow Label should also + not appear in the IPv4 "current config". + """ + + with ksft_raises(CmdExitFailure) as cm: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash tcp4 sdfnl") + ksft_in("Invalid argument", cm.exception.cmd.stderr) + + _check_v4_flow_types(cfg) + + # Try to enable Flow Labels and check again, in case it leaks thru + initial = _ethtool_get_cfg(cfg, "udp6") + no_lbl = initial.replace("l", "") + if "l" not in initial: + try: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}") + except CmdExitFailure as exc: + raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc + else: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}") + restore = defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") + + _check_v4_flow_types(cfg) + restore.exec() + _check_v4_flow_types(cfg) + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + ksft_run([test_rss_flow_label, + test_rss_flow_label_6only], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py new file mode 100755 index 000000000000..503f1a2a2872 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import multiprocessing +import socket +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, cmd, fd_read_timeout +from lib.py import NetDrvEpEnv +from lib.py import EthtoolFamily, NetdevFamily, NlError +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import defer, ksft_pr, rand_port + + +def traffic(cfg, local_port, remote_port, ipver): + af_inet = socket.AF_INET if ipver == "4" else socket.AF_INET6 + sock = socket.socket(af_inet, socket.SOCK_DGRAM) + sock.bind(("", local_port)) + sock.connect((cfg.remote_addr_v[ipver], remote_port)) + tgt = f"{ipver}:[{cfg.addr_v[ipver]}]:{local_port},sourceport={remote_port}" + cmd("echo a | socat - UDP" + tgt, host=cfg.remote) + fd_read_timeout(sock.fileno(), 5) + return sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU) + + +def _rss_input_xfrm_try_enable(cfg): + """ + Check if symmetric input-xfrm is already enabled, if not try to enable it + and register a cleanup. + """ + rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}) + orig_xfrm = rss.get('input-xfrm', set()) + sym_xfrm = set(filter(lambda x: 'sym' in x, orig_xfrm)) + + if sym_xfrm: + ksft_pr("Sym input xfrm already enabled:", sym_xfrm) + return sym_xfrm + + for xfrm in cfg.ethnl.consts["input-xfrm"].entries: + # Skip non-symmetric transforms + if "sym" not in xfrm: + continue + + try_xfrm = {xfrm} | orig_xfrm + try: + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "input-xfrm": try_xfrm}) + except NlError: + continue + + ksft_pr("Sym input xfrm configured:", try_xfrm) + defer(cfg.ethnl.rss_set, + {"header": {"dev-index": cfg.ifindex}, + "input-xfrm": orig_xfrm}) + return {xfrm} + + return set() + + +def test_rss_input_xfrm(cfg, ipver): + """ + Test symmetric input_xfrm. + If symmetric RSS hash is configured, send traffic twice, swapping the + src/dst UDP ports, and verify that the same queue is receiving the traffic + in both cases (IPs are constant). + """ + + if multiprocessing.cpu_count() < 2: + raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash") + + cfg.require_cmd("socat", local=False, remote=True) + + if not hasattr(socket, "SO_INCOMING_CPU"): + raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") + + # Check for symmetric xor/or-xor + input_xfrm = _rss_input_xfrm_try_enable(cfg) + if not input_xfrm: + raise KsftSkipEx("Symmetric RSS hash not supported by device") + + cpus = set() + successful = 0 + for _ in range(100): + try: + port1 = rand_port(socket.SOCK_DGRAM) + port2 = rand_port(socket.SOCK_DGRAM) + cpu1 = traffic(cfg, port1, port2, ipver) + cpu2 = traffic(cfg, port2, port1, ipver) + cpus.update([cpu1, cpu2]) + ksft_eq( + cpu1, cpu2, comment=f"Received traffic on different cpus with ports ({port1 = }, {port2 = }) while symmetric hash is configured") + + successful += 1 + if successful == 10: + break + except: + continue + else: + raise KsftFailEx("Failed to run traffic") + + ksft_ge(len(cpus), 2, + comment=f"Received traffic on less than two cpus {cpus = }") + + +def test_rss_input_xfrm_ipv4(cfg): + cfg.require_ipver("4") + test_rss_input_xfrm(cfg, "4") + + +def test_rss_input_xfrm_ipv6(cfg): + cfg.require_ipver("6") + test_rss_input_xfrm(cfg, "6") + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netdevnl = NetdevFamily() + + ksft_run([test_rss_input_xfrm_ipv4, test_rss_input_xfrm_ipv6], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.c b/tools/testing/selftests/drivers/net/hw/toeplitz.c new file mode 100644 index 000000000000..035bf908d8d9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/toeplitz.c @@ -0,0 +1,673 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Toeplitz test + * + * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3 + * 2. Compute the rx_hash in software based on the packet contents + * 3. Compare the two + * + * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given. + * + * If '-C $rx_irq_cpu_list' is given, also + * + * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU + * 5. Compute the rxqueue that RSS would select based on this rx_hash + * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq + * 7. Compare the cpus from 4 and 6 + * + * Else if '-r $rps_bitmap' is given, also + * + * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU + * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap + * 6. Compare the cpus from 4 and 5 + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <getopt.h> +#include <linux/filter.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <net/if.h> +#include <netdb.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <poll.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/sysinfo.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#include <ynl.h> +#include "ethtool-user.h" + +#include "kselftest.h" +#include "../../../net/lib/ksft.h" + +#define TOEPLITZ_KEY_MIN_LEN 40 +#define TOEPLITZ_KEY_MAX_LEN 256 + +#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */ +#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN) +#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN) + +#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2)) + +#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */ +#define RSS_MAX_INDIR (1 << 16) + +#define RPS_MAX_CPUS 16UL /* must be a power of 2 */ + +#define MIN_PKT_SAMPLES 40 /* minimum number of packets to receive */ + +/* configuration options (cmdline arguments) */ +static uint16_t cfg_dport = 8000; +static int cfg_family = AF_INET6; +static char *cfg_ifname = "eth0"; +static int cfg_num_queues; +static int cfg_num_rps_cpus; +static bool cfg_sink; +static int cfg_type = SOCK_STREAM; +static int cfg_timeout_msec = 1000; +static bool cfg_verbose; + +/* global vars */ +static int num_cpus; +static int ring_block_nr; +static int ring_block_sz; + +/* stats */ +static int frames_received; +static int frames_nohash; +static int frames_error; + +#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0) + +/* tpacket ring */ +struct ring_state { + int fd; + char *mmap; + int idx; + int cpu; +}; + +static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */ +static int rps_silo_to_cpu[RPS_MAX_CPUS]; +static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN]; +static unsigned int rss_indir_tbl[RSS_MAX_INDIR]; +static unsigned int rss_indir_tbl_size; +static struct ring_state rings[RSS_MAX_CPUS]; + +static inline uint32_t toeplitz(const unsigned char *four_tuple, + const unsigned char *key) +{ + int i, bit, ret = 0; + uint32_t key32; + + key32 = ntohl(*((uint32_t *)key)); + key += 4; + + for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) { + for (bit = 7; bit >= 0; bit--) { + if (four_tuple[i] & (1 << bit)) + ret ^= key32; + + key32 <<= 1; + key32 |= !!(key[0] & (1 << bit)); + } + key++; + } + + return ret; +} + +/* Compare computed cpu with arrival cpu from packet_fanout_cpu */ +static void verify_rss(uint32_t rx_hash, int cpu) +{ + int queue; + + if (rss_indir_tbl_size) + queue = rss_indir_tbl[rx_hash % rss_indir_tbl_size]; + else + queue = rx_hash % cfg_num_queues; + + log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]); + if (rx_irq_cpus[queue] != cpu) { + log_verbose(". error: rss cpu mismatch (%d)", cpu); + frames_error++; + } +} + +static void verify_rps(uint64_t rx_hash, int cpu) +{ + int silo = (rx_hash * cfg_num_rps_cpus) >> 32; + + log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]); + if (rps_silo_to_cpu[silo] != cpu) { + log_verbose(". error: rps cpu mismatch (%d)", cpu); + frames_error++; + } +} + +static void log_rxhash(int cpu, uint32_t rx_hash, + const char *addrs, int addr_len) +{ + char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN]; + uint16_t *ports; + + if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) || + !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr))) + error(1, 0, "address parse error"); + + ports = (void *)addrs + (addr_len * 2); + log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]", + cpu, rx_hash, saddr, daddr, + ntohs(ports[0]), ntohs(ports[1])); +} + +/* Compare computed rxhash with rxhash received from tpacket_v3 */ +static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu) +{ + unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0}; + uint32_t rx_hash_sw; + const char *addrs; + int addr_len; + + if (cfg_family == AF_INET) { + addr_len = sizeof(struct in_addr); + addrs = pkt + offsetof(struct iphdr, saddr); + } else { + addr_len = sizeof(struct in6_addr); + addrs = pkt + offsetof(struct ip6_hdr, ip6_src); + } + + memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2)); + rx_hash_sw = toeplitz(four_tuple, toeplitz_key); + + if (cfg_verbose) + log_rxhash(cpu, rx_hash, addrs, addr_len); + + if (rx_hash != rx_hash_sw) { + log_verbose(" != expected 0x%x\n", rx_hash_sw); + frames_error++; + return; + } + + log_verbose(" OK"); + if (cfg_num_queues) + verify_rss(rx_hash, cpu); + else if (cfg_num_rps_cpus) + verify_rps(rx_hash, cpu); + log_verbose("\n"); +} + +static char *recv_frame(const struct ring_state *ring, char *frame) +{ + struct tpacket3_hdr *hdr = (void *)frame; + + if (hdr->hv1.tp_rxhash) + verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash, + ring->cpu); + else + frames_nohash++; + + return frame + hdr->tp_next_offset; +} + +/* A single TPACKET_V3 block can hold multiple frames */ +static bool recv_block(struct ring_state *ring) +{ + struct tpacket_block_desc *block; + char *frame; + int i; + + block = (void *)(ring->mmap + ring->idx * ring_block_sz); + if (!(block->hdr.bh1.block_status & TP_STATUS_USER)) + return false; + + frame = (char *)block; + frame += block->hdr.bh1.offset_to_first_pkt; + + for (i = 0; i < block->hdr.bh1.num_pkts; i++) { + frame = recv_frame(ring, frame); + frames_received++; + } + + block->hdr.bh1.block_status = TP_STATUS_KERNEL; + ring->idx = (ring->idx + 1) % ring_block_nr; + + return true; +} + +/* simple test: process all rings until MIN_PKT_SAMPLES packets are received, + * or the test times out. + */ +static void process_rings(void) +{ + struct timeval start, now; + bool pkts_found = true; + long elapsed_usec; + int i; + + gettimeofday(&start, NULL); + + do { + if (!pkts_found) + usleep(100); + + pkts_found = false; + for (i = 0; i < num_cpus; i++) + pkts_found |= recv_block(&rings[i]); + + gettimeofday(&now, NULL); + elapsed_usec = (now.tv_sec - start.tv_sec) * 1000000 + + (now.tv_usec - start.tv_usec); + } while (frames_received - frames_nohash < MIN_PKT_SAMPLES && + elapsed_usec < cfg_timeout_msec * 1000); + + fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n", + frames_received - frames_nohash - frames_error, + frames_nohash, frames_error); +} + +static char *setup_ring(int fd) +{ + struct tpacket_req3 req3 = {0}; + void *ring; + + req3.tp_retire_blk_tov = cfg_timeout_msec / 8; + req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH; + + req3.tp_frame_size = 2048; + req3.tp_frame_nr = 1 << 10; + req3.tp_block_nr = 16; + + req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr; + req3.tp_block_size /= req3.tp_block_nr; + + if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3))) + error(1, errno, "setsockopt PACKET_RX_RING"); + + ring_block_sz = req3.tp_block_size; + ring_block_nr = req3.tp_block_nr; + + ring = mmap(0, req3.tp_block_size * req3.tp_block_nr, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0); + if (ring == MAP_FAILED) + error(1, 0, "mmap failed"); + + return ring; +} + +static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = ARRAY_SIZE(filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +/* filter on transport protocol and destination port */ +static void set_filter(int fd) +{ + const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */ + uint8_t proto; + + proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP; + if (cfg_family == AF_INET) + __set_filter(fd, offsetof(struct iphdr, protocol), proto, + sizeof(struct iphdr) + off_dport); + else + __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto, + sizeof(struct ip6_hdr) + off_dport); +} + +/* drop everything: used temporarily during setup */ +static void set_filter_null(int fd) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET + BPF_K, 0), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = ARRAY_SIZE(filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +static int create_ring(char **ring) +{ + struct fanout_args args = { + .id = 1, + .type_flags = PACKET_FANOUT_CPU, + .max_num_members = RSS_MAX_CPUS + }; + struct sockaddr_ll ll = { 0 }; + int fd, val; + + fd = socket(PF_PACKET, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket creation failed"); + + val = TPACKET_V3; + if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val))) + error(1, errno, "setsockopt PACKET_VERSION"); + *ring = setup_ring(fd); + + /* block packets until all rings are added to the fanout group: + * else packets can arrive during setup and get misclassified + */ + set_filter_null(fd); + + ll.sll_family = AF_PACKET; + ll.sll_ifindex = if_nametoindex(cfg_ifname); + ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) : + htons(ETH_P_IPV6); + if (bind(fd, (void *)&ll, sizeof(ll))) + error(1, errno, "bind"); + + /* must come after bind: verifies all programs in group match */ + if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) { + /* on failure, retry using old API if that is sufficient: + * it has a hard limit of 256 sockets, so only try if + * (a) only testing rxhash, not RSS or (b) <= 256 cpus. + * in this API, the third argument is left implicit. + */ + if (cfg_num_queues || num_cpus > 256 || + setsockopt(fd, SOL_PACKET, PACKET_FANOUT, + &args, sizeof(uint32_t))) + error(1, errno, "setsockopt PACKET_FANOUT cpu"); + } + + return fd; +} + +/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */ +static int setup_sink(void) +{ + int fd, val; + + fd = socket(cfg_family, cfg_type, 0); + if (fd == -1) + error(1, errno, "socket %d.%d", cfg_family, cfg_type); + + val = 1 << 20; + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val))) + error(1, errno, "setsockopt rcvbuf"); + + return fd; +} + +static void setup_rings(void) +{ + int i; + + for (i = 0; i < num_cpus; i++) { + rings[i].cpu = i; + rings[i].fd = create_ring(&rings[i].mmap); + } + + /* accept packets once all rings in the fanout group are up */ + for (i = 0; i < num_cpus; i++) + set_filter(rings[i].fd); +} + +static void cleanup_rings(void) +{ + int i; + + for (i = 0; i < num_cpus; i++) { + if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz)) + error(1, errno, "munmap"); + if (close(rings[i].fd)) + error(1, errno, "close"); + } +} + +static void parse_cpulist(const char *arg) +{ + do { + rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10); + + arg = strchr(arg, ','); + if (!arg) + break; + arg++; // skip ',' + } while (1); +} + +static void show_cpulist(void) +{ + int i; + + for (i = 0; i < cfg_num_queues; i++) + fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]); +} + +static void show_silos(void) +{ + int i; + + for (i = 0; i < cfg_num_rps_cpus; i++) + fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]); +} + +static void parse_toeplitz_key(const char *str, int slen, unsigned char *key) +{ + int i, ret, off; + + if (slen < TOEPLITZ_STR_MIN_LEN || + slen > TOEPLITZ_STR_MAX_LEN + 1) + error(1, 0, "invalid toeplitz key"); + + for (i = 0, off = 0; off < slen; i++, off += 3) { + ret = sscanf(str + off, "%hhx", &key[i]); + if (ret != 1) + error(1, 0, "key parse error at %d off %d len %d", + i, off, slen); + } +} + +static void parse_rps_bitmap(const char *arg) +{ + unsigned long bitmap; + int i; + + bitmap = strtoul(arg, NULL, 0); + + if (bitmap & ~((1UL << RPS_MAX_CPUS) - 1)) + error(1, 0, "rps bitmap 0x%lx out of bounds, max cpu %lu", + bitmap, RPS_MAX_CPUS - 1); + + for (i = 0; i < RPS_MAX_CPUS; i++) + if (bitmap & 1UL << i) + rps_silo_to_cpu[cfg_num_rps_cpus++] = i; +} + +static void read_rss_dev_info_ynl(void) +{ + struct ethtool_rss_get_req *req; + struct ethtool_rss_get_rsp *rsp; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_ethtool_family, NULL); + if (!ys) + error(1, errno, "ynl_sock_create failed"); + + req = ethtool_rss_get_req_alloc(); + if (!req) + error(1, errno, "ethtool_rss_get_req_alloc failed"); + + ethtool_rss_get_req_set_header_dev_name(req, cfg_ifname); + + rsp = ethtool_rss_get(ys, req); + if (!rsp) + error(1, ys->err.code, "YNL: %s", ys->err.msg); + + if (!rsp->_len.hkey) + error(1, 0, "RSS key not available for %s", cfg_ifname); + + if (rsp->_len.hkey < TOEPLITZ_KEY_MIN_LEN || + rsp->_len.hkey > TOEPLITZ_KEY_MAX_LEN) + error(1, 0, "RSS key length %u out of bounds [%u, %u]", + rsp->_len.hkey, TOEPLITZ_KEY_MIN_LEN, + TOEPLITZ_KEY_MAX_LEN); + + memcpy(toeplitz_key, rsp->hkey, rsp->_len.hkey); + + if (rsp->_count.indir > RSS_MAX_INDIR) + error(1, 0, "RSS indirection table too large (%u > %u)", + rsp->_count.indir, RSS_MAX_INDIR); + + /* If indir table not available we'll fallback to simple modulo math */ + if (rsp->_count.indir) { + memcpy(rss_indir_tbl, rsp->indir, + rsp->_count.indir * sizeof(rss_indir_tbl[0])); + rss_indir_tbl_size = rsp->_count.indir; + + log_verbose("RSS indirection table size: %u\n", + rss_indir_tbl_size); + } + + ethtool_rss_get_rsp_free(rsp); + ethtool_rss_get_req_free(req); + ynl_sock_destroy(ys); +} + +static void parse_opts(int argc, char **argv) +{ + static struct option long_options[] = { + {"dport", required_argument, 0, 'd'}, + {"cpus", required_argument, 0, 'C'}, + {"key", required_argument, 0, 'k'}, + {"iface", required_argument, 0, 'i'}, + {"ipv4", no_argument, 0, '4'}, + {"ipv6", no_argument, 0, '6'}, + {"sink", no_argument, 0, 's'}, + {"tcp", no_argument, 0, 't'}, + {"timeout", required_argument, 0, 'T'}, + {"udp", no_argument, 0, 'u'}, + {"verbose", no_argument, 0, 'v'}, + {"rps", required_argument, 0, 'r'}, + {0, 0, 0, 0} + }; + bool have_toeplitz = false; + int index, c; + + while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) { + switch (c) { + case '4': + cfg_family = AF_INET; + break; + case '6': + cfg_family = AF_INET6; + break; + case 'C': + parse_cpulist(optarg); + break; + case 'd': + cfg_dport = strtol(optarg, NULL, 0); + break; + case 'i': + cfg_ifname = optarg; + break; + case 'k': + parse_toeplitz_key(optarg, strlen(optarg), + toeplitz_key); + have_toeplitz = true; + break; + case 'r': + parse_rps_bitmap(optarg); + break; + case 's': + cfg_sink = true; + break; + case 't': + cfg_type = SOCK_STREAM; + break; + case 'T': + cfg_timeout_msec = strtol(optarg, NULL, 0); + break; + case 'u': + cfg_type = SOCK_DGRAM; + break; + case 'v': + cfg_verbose = true; + break; + + default: + error(1, 0, "unknown option %c", optopt); + break; + } + } + + if (!have_toeplitz) + read_rss_dev_info_ynl(); + + num_cpus = get_nprocs(); + if (num_cpus > RSS_MAX_CPUS) + error(1, 0, "increase RSS_MAX_CPUS"); + + if (cfg_num_queues && cfg_num_rps_cpus) + error(1, 0, + "Can't supply both RSS cpus ('-C') and RPS map ('-r')"); + if (cfg_verbose) { + show_cpulist(); + show_silos(); + } +} + +int main(int argc, char **argv) +{ + const int min_tests = 10; + int fd_sink = -1; + + parse_opts(argc, argv); + + if (cfg_sink) + fd_sink = setup_sink(); + + setup_rings(); + + /* Signal to test framework that we're ready to receive */ + ksft_ready(); + + process_rings(); + cleanup_rings(); + + if (cfg_sink && close(fd_sink)) + error(1, errno, "close sink"); + + if (frames_received - frames_nohash < min_tests) + error(1, 0, "too few frames for verification"); + + return frames_error; +} diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.py b/tools/testing/selftests/drivers/net/hw/toeplitz.py new file mode 100755 index 000000000000..cd7e080e6f84 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/toeplitz.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Toeplitz Rx hashing test: + - rxhash (the hash value calculation itself); + - RSS mapping from rxhash to rx queue; + - RPS mapping from rxhash to cpu. +""" + +import glob +import os +import socket +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import NetDrvEpEnv, EthtoolFamily, NetdevFamily +from lib.py import cmd, bkg, rand_port, defer +from lib.py import ksft_in +from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx, KsftFailEx + +# "define" for the ID of the Toeplitz hash function +ETH_RSS_HASH_TOP = 1 +# Must match RPS_MAX_CPUS in toeplitz.c +RPS_MAX_CPUS = 16 + + +def _check_rps_and_rfs_not_configured(cfg): + """Verify that RPS is not already configured.""" + + for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"): + with open(rps_file, "r", encoding="utf-8") as fp: + val = fp.read().strip() + if set(val) - {"0", ","}: + raise KsftSkipEx(f"RPS already configured on {rps_file}: {val}") + + rfs_file = "/proc/sys/net/core/rps_sock_flow_entries" + with open(rfs_file, "r", encoding="utf-8") as fp: + val = fp.read().strip() + if val != "0": + raise KsftSkipEx(f"RFS already configured {rfs_file}: {val}") + + +def _get_cpu_for_irq(irq): + with open(f"/proc/irq/{irq}/smp_affinity_list", "r", + encoding="utf-8") as fp: + data = fp.read().strip() + if "," in data or "-" in data: + raise KsftFailEx(f"IRQ{irq} is not mapped to a single core: {data}") + return int(data) + + +def _get_irq_cpus(cfg): + """ + Read the list of IRQs for the device Rx queues. + """ + queues = cfg.netnl.queue_get({"ifindex": cfg.ifindex}, dump=True) + napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True) + + # Remap into ID-based dicts + napis = {n["id"]: n for n in napis} + queues = {f"{q['type']}{q['id']}": q for q in queues} + + cpus = [] + for rx in range(9999): + name = f"rx{rx}" + if name not in queues: + break + cpus.append(_get_cpu_for_irq(napis[queues[name]["napi-id"]]["irq"])) + + return cpus + + +def _get_unused_rps_cpus(cfg, count=2): + """ + Get CPUs that are not used by Rx queues for RPS. + Returns a list of at least 'count' CPU numbers within + the RPS_MAX_CPUS supported range. + """ + + # Get CPUs used by Rx queues + rx_cpus = set(_get_irq_cpus(cfg)) + + # Get total number of CPUs, capped by RPS_MAX_CPUS + num_cpus = min(os.cpu_count(), RPS_MAX_CPUS) + + # Find unused CPUs + unused_cpus = [cpu for cpu in range(num_cpus) if cpu not in rx_cpus] + + if len(unused_cpus) < count: + raise KsftSkipEx(f"Need at least {count} CPUs in range 0..{num_cpus - 1} not used by Rx queues, found {len(unused_cpus)}") + + return unused_cpus[:count] + + +def _configure_rps(cfg, rps_cpus): + """Configure RPS for all Rx queues.""" + + mask = 0 + for cpu in rps_cpus: + mask |= (1 << cpu) + + mask = hex(mask) + + # Set RPS bitmap for all rx queues + for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"): + with open(rps_file, "w", encoding="utf-8") as fp: + # sysfs expects hex without '0x' prefix, toeplitz.c needs the prefix + fp.write(mask[2:]) + + return mask + + +def _send_traffic(cfg, proto_flag, ipver, port): + """Send 20 packets of requested type.""" + + # Determine protocol and IP version for socat + if proto_flag == "-u": + proto = "UDP" + else: + proto = "TCP" + + baddr = f"[{cfg.addr_v['6']}]" if ipver == "6" else cfg.addr_v["4"] + + # Run socat in a loop to send traffic periodically + # Use sh -c with a loop similar to toeplitz_client.sh + socat_cmd = f""" + for i in `seq 20`; do + echo "msg $i" | socat -{ipver} -t 0.1 - {proto}:{baddr}:{port}; + sleep 0.001; + done + """ + + cmd(socat_cmd, shell=True, host=cfg.remote) + + +def _test_variants(): + for grp in ["", "rss", "rps"]: + for l4 in ["tcp", "udp"]: + for l3 in ["4", "6"]: + name = f"{l4}_ipv{l3}" + if grp: + name = f"{grp}_{name}" + yield KsftNamedVariant(name, "-" + l4[0], l3, grp) + + +@ksft_variants(_test_variants()) +def test(cfg, proto_flag, ipver, grp): + """Run a single toeplitz test.""" + + cfg.require_ipver(ipver) + + # Check that rxhash is enabled + ksft_in("receive-hashing: on", cmd(f"ethtool -k {cfg.ifname}").stdout) + + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + # Make sure NIC is configured to use Toeplitz hash, and no key xfrm. + if rss.get('hfunc') != ETH_RSS_HASH_TOP or rss.get('input-xfrm'): + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "hfunc": ETH_RSS_HASH_TOP, + "input-xfrm": {}}) + defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex}, + "hfunc": rss.get('hfunc'), + "input-xfrm": rss.get('input-xfrm', {}) + }) + + port = rand_port(socket.SOCK_DGRAM) + + toeplitz_path = cfg.test_dir / "toeplitz" + rx_cmd = [ + str(toeplitz_path), + "-" + ipver, + proto_flag, + "-d", str(port), + "-i", cfg.ifname, + "-T", "4000", + "-s", + "-v" + ] + + if grp: + _check_rps_and_rfs_not_configured(cfg) + if grp == "rss": + irq_cpus = ",".join([str(x) for x in _get_irq_cpus(cfg)]) + rx_cmd += ["-C", irq_cpus] + ksft_pr(f"RSS using CPUs: {irq_cpus}") + elif grp == "rps": + # Get CPUs not used by Rx queues and configure them for RPS + rps_cpus = _get_unused_rps_cpus(cfg, count=2) + rps_mask = _configure_rps(cfg, rps_cpus) + defer(_configure_rps, cfg, []) + rx_cmd += ["-r", rps_mask] + ksft_pr(f"RPS using CPUs: {rps_cpus}, mask: {rps_mask}") + + # Run rx in background, it will exit once it has seen enough packets + with bkg(" ".join(rx_cmd), ksft_ready=True, exit_wait=True) as rx_proc: + while rx_proc.proc.poll() is None: + _send_traffic(cfg, proto_flag, ipver, port) + + # Check rx result + ksft_pr("Receiver output:") + ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# ')) + if rx_proc.stderr: + ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# ')) + + +def main() -> None: + """Ksft boilerplate main.""" + + with NetDrvEpEnv(__file__) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + ksft_run(cases=[test], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py new file mode 100755 index 000000000000..bb675e3dac88 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -0,0 +1,264 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Run the tools/testing/selftests/net/csum testsuite.""" + +import fcntl +import socket +import struct +import termios +import time + +from lib.py import ksft_pr, ksft_run, ksft_exit, KsftSkipEx, KsftXfailEx +from lib.py import ksft_eq, ksft_ge, ksft_lt +from lib.py import EthtoolFamily, NetdevFamily, NetDrvEpEnv +from lib.py import bkg, cmd, defer, ethtool, ip, rand_port, wait_port_listen + + +def sock_wait_drain(sock, max_wait=1000): + """Wait for all pending write data on the socket to get ACKed.""" + for _ in range(max_wait): + one = b'\0' * 4 + outq = fcntl.ioctl(sock.fileno(), termios.TIOCOUTQ, one) + outq = struct.unpack("I", outq)[0] + if outq == 0: + break + time.sleep(0.01) + ksft_eq(outq, 0) + + +def tcp_sock_get_retrans(sock): + """Get the number of retransmissions for the TCP socket.""" + info = sock.getsockopt(socket.SOL_TCP, socket.TCP_INFO, 512) + return struct.unpack("I", info[100:104])[0] + + +def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso): + cfg.require_cmd("socat", local=False, remote=True) + + # Set recv window clamp to avoid overwhelming receiver on debug kernels + # the 200k clamp should still let use reach > 15Gbps on real HW + port = rand_port() + listen_opts = f"{port},reuseport,tcp-window-clamp=200000" + listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{listen_opts} /dev/null,ignoreeof" + + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc: + wait_port_listen(port, host=cfg.remote) + + if ipver == "4": + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect((remote_v4, port)) + else: + sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + sock.connect((remote_v6, port)) + + # Small send to make sure the connection is working. + sock.send("ping".encode()) + sock_wait_drain(sock) + + # Send 4MB of data, record the LSO packet count. + qstat_old = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + buf = b"0" * 1024 * 1024 * 4 + sock.send(buf) + sock_wait_drain(sock) + qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + # Check that at least 90% of the data was sent as LSO packets. + # System noise may cause false negatives. Also header overheads + # will add up to 5% of extra packes... The check is best effort. + total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"] + total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"] + + # Make sure we have order of magnitude more LSO packets than + # retransmits, in case TCP retransmitted all the LSO packets. + ksft_lt(tcp_sock_get_retrans(sock), total_lso_wire / 16) + sock.close() + + if should_lso: + if cfg.have_stat_super_count: + ksft_ge(qstat_new['tx-hw-gso-packets'] - + qstat_old['tx-hw-gso-packets'], + total_lso_super, + comment="Number of LSO super-packets with LSO enabled") + if cfg.have_stat_wire_count: + ksft_ge(qstat_new['tx-hw-gso-wire-packets'] - + qstat_old['tx-hw-gso-wire-packets'], + total_lso_wire, + comment="Number of LSO wire-packets with LSO enabled") + else: + if cfg.have_stat_super_count: + ksft_lt(qstat_new['tx-hw-gso-packets'] - + qstat_old['tx-hw-gso-packets'], + 15, comment="Number of LSO super-packets with LSO disabled") + if cfg.have_stat_wire_count: + ksft_lt(qstat_new['tx-hw-gso-wire-packets'] - + qstat_old['tx-hw-gso-wire-packets'], + 500, comment="Number of LSO wire-packets with LSO disabled") + + +def build_tunnel(cfg, outer_ipver, tun_info): + local_v4 = NetDrvEpEnv.nsim_v4_pfx + "1" + local_v6 = NetDrvEpEnv.nsim_v6_pfx + "1" + remote_v4 = NetDrvEpEnv.nsim_v4_pfx + "2" + remote_v6 = NetDrvEpEnv.nsim_v6_pfx + "2" + + local_addr = cfg.addr_v[outer_ipver] + remote_addr = cfg.remote_addr_v[outer_ipver] + + tun_type = tun_info[0] + tun_arg = tun_info[1] + ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}") + defer(ip, f"link del {tun_type}-ksft") + ip(f"link set dev {tun_type}-ksft up") + ip(f"addr add {local_v4}/24 dev {tun_type}-ksft") + ip(f"addr add {local_v6}/64 dev {tun_type}-ksft") + + ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}", + host=cfg.remote) + defer(ip, f"link del {tun_type}-ksft", host=cfg.remote) + ip(f"link set dev {tun_type}-ksft up", host=cfg.remote) + ip(f"addr add {remote_v4}/24 dev {tun_type}-ksft", host=cfg.remote) + ip(f"addr add {remote_v6}/64 dev {tun_type}-ksft", host=cfg.remote) + + return remote_v4, remote_v6 + + +def restore_wanted_features(cfg): + features_cmd = "" + for feature in cfg.hw_features: + setting = "on" if feature in cfg.wanted_features else "off" + features_cmd += f" {feature} {setting}" + try: + ethtool(f"-K {cfg.ifname} {features_cmd}") + except Exception as e: + ksft_pr(f"WARNING: failure restoring wanted features: {e}") + + +def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None): + """Construct specific tests from the common template.""" + def f(cfg): + cfg.require_ipver(outer_ipver) + defer(restore_wanted_features, cfg) + + if not cfg.have_stat_super_count and \ + not cfg.have_stat_wire_count: + raise KsftSkipEx(f"Device does not support LSO queue stats") + + if feature not in cfg.hw_features: + raise KsftSkipEx(f"Device does not support {feature}") + + ipver = outer_ipver + if tun: + remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun) + ipver = inner_ipver + else: + remote_v4 = cfg.remote_addr_v["4"] + remote_v6 = cfg.remote_addr_v["6"] + + # First test without the feature enabled. + ethtool(f"-K {cfg.ifname} {feature} off") + run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False) + + ethtool(f"-K {cfg.ifname} tx-gso-partial off") + ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off") + if feature in cfg.partial_features: + ethtool(f"-K {cfg.ifname} tx-gso-partial on") + if ipver == "4": + ksft_pr("Testing with mangleid enabled") + ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on") + + # Full feature enabled. + ethtool(f"-K {cfg.ifname} {feature} on") + run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True) + + f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver + return f + + +def query_nic_features(cfg) -> None: + """Query and cache the NIC features.""" + cfg.have_stat_super_count = False + cfg.have_stat_wire_count = False + + features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) + + cfg.wanted_features = set() + for f in features["wanted"]["bits"]["bit"]: + cfg.wanted_features.add(f["name"]) + + cfg.hw_features = set() + hw_all_features_cmd = "" + for f in features["hw"]["bits"]["bit"]: + if f.get("value", False): + feature = f["name"] + cfg.hw_features.add(feature) + hw_all_features_cmd += f" {feature} on" + try: + ethtool(f"-K {cfg.ifname} {hw_all_features_cmd}") + except Exception as e: + ksft_pr(f"WARNING: failure enabling all hw features: {e}") + ksft_pr("partial gso feature detection may be impacted") + + # Check which features are supported via GSO partial + cfg.partial_features = set() + if 'tx-gso-partial' in cfg.hw_features: + ethtool(f"-K {cfg.ifname} tx-gso-partial off") + + no_partial = set() + features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) + for f in features["active"]["bits"]["bit"]: + no_partial.add(f["name"]) + cfg.partial_features = cfg.hw_features - no_partial + ethtool(f"-K {cfg.ifname} tx-gso-partial on") + + restore_wanted_features(cfg) + + stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True) + if stats: + if 'tx-hw-gso-packets' in stats[0]: + ksft_pr("Detected qstat for LSO super-packets") + cfg.have_stat_super_count = True + if 'tx-hw-gso-wire-packets' in stats[0]: + ksft_pr("Detected qstat for LSO wire-packets") + cfg.have_stat_wire_count = True + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + + query_nic_features(cfg) + + test_info = ( + # name, v4/v6 ethtool_feature tun:(type, args, inner ip versions) + ("", "4", "tx-tcp-segmentation", None), + ("", "6", "tx-tcp6-segmentation", None), + ("vxlan", "4", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 noudpcsum", ("4", "6"))), + ("vxlan", "6", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 udp6zerocsumtx udp6zerocsumrx", ("4", "6"))), + ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", "id 100 dstport 4789 udpcsum", ("4", "6"))), + ("gre", "4", "tx-gre-segmentation", ("gre", "", ("4", "6"))), + ("gre", "6", "tx-gre-segmentation", ("ip6gre","", ("4", "6"))), + ) + + cases = [] + for outer_ipver in ["4", "6"]: + for info in test_info: + # Skip if test which only works for a specific IP version + if info[1] and outer_ipver != info[1]: + continue + + if info[3]: + cases += [ + test_builder(info[0], cfg, outer_ipver, info[2], info[3], inner_ipver) + for inner_ipver in info[3][2] + ] + else: + cases.append(test_builder(info[0], cfg, outer_ipver, info[2], None, outer_ipver)) + + ksft_run(cases=cases, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/uso.py b/tools/testing/selftests/drivers/net/hw/uso.py new file mode 100755 index 000000000000..6d61e56cab3c --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/uso.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Test USO + +Sends large UDP datagrams with UDP_SEGMENT and verifies that the peer +receives the expected total payload and that the NIC transmitted at least +the expected number of segments. +""" +import random +import socket +import string + +from lib.py import ksft_run, ksft_exit, KsftSkipEx +from lib.py import ksft_eq, ksft_ge, ksft_variants, KsftNamedVariant +from lib.py import NetDrvEpEnv +from lib.py import bkg, defer, ethtool, ip, rand_port, wait_port_listen + +# python doesn't expose this constant, so we need to hardcode it to enable UDP +# segmentation for large payloads +UDP_SEGMENT = 103 + + +def _send_uso(cfg, ipver, mss, total_payload, port): + if ipver == "4": + sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + dst = (cfg.remote_addr_v["4"], port) + else: + sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + dst = (cfg.remote_addr_v["6"], port) + + sock.setsockopt(socket.IPPROTO_UDP, UDP_SEGMENT, mss) + payload = ''.join(random.choice(string.ascii_lowercase) + for _ in range(total_payload)) + sock.sendto(payload.encode(), dst) + sock.close() + + +def _get_tx_packets(cfg): + stats = ip(f"-s link show dev {cfg.ifname}", json=True)[0] + return stats['stats64']['tx']['packets'] + + +def _test_uso(cfg, ipver, mss, total_payload): + cfg.require_ipver(ipver) + cfg.require_cmd("socat", remote=True) + + features = ethtool(f"-k {cfg.ifname}", json=True) + uso_was_on = features[0]["tx-udp-segmentation"]["active"] + + try: + ethtool(f"-K {cfg.ifname} tx-udp-segmentation on") + except Exception as exc: + raise KsftSkipEx( + "Device does not support tx-udp-segmentation") from exc + if not uso_was_on: + defer(ethtool, f"-K {cfg.ifname} tx-udp-segmentation off") + + expected_segs = (total_payload + mss - 1) // mss + + port = rand_port(stype=socket.SOCK_DGRAM) + rx_cmd = f"socat -{ipver} -T 2 -u UDP-LISTEN:{port},reuseport STDOUT" + + tx_before = _get_tx_packets(cfg) + + with bkg(rx_cmd, host=cfg.remote, exit_wait=True) as rx: + wait_port_listen(port, proto="udp", host=cfg.remote) + _send_uso(cfg, ipver, mss, total_payload, port) + + ksft_eq(len(rx.stdout), total_payload, + comment=f"Received {len(rx.stdout)}B, expected {total_payload}B") + + cfg.wait_hw_stats_settle() + + tx_after = _get_tx_packets(cfg) + tx_delta = tx_after - tx_before + + ksft_ge(tx_delta, expected_segs, + comment=f"Expected >= {expected_segs} tx packets, got {tx_delta}") + + +def _uso_variants(): + for ipver in ["4", "6"]: + yield KsftNamedVariant(f"v{ipver}_partial", ipver, 1400, 1400 * 10 + 500) + yield KsftNamedVariant(f"v{ipver}_exact", ipver, 1400, 1400 * 5) + + +@ksft_variants(_uso_variants()) +def test_uso(cfg, ipver, mss, total_payload): + """Send a USO datagram and verify the peer receives the expected segments.""" + _test_uso(cfg, ipver, mss, total_payload) + + +def main() -> None: + """Run USO tests.""" + with NetDrvEpEnv(__file__) as cfg: + ksft_run([test_uso], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/xdp_metadata.py b/tools/testing/selftests/drivers/net/hw/xdp_metadata.py new file mode 100644 index 000000000000..33a1985356d9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/xdp_metadata.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Tests for XDP metadata kfuncs (e.g. bpf_xdp_metadata_rx_hash). + +These tests load device-bound XDP programs from xdp_metadata.bpf.o +that call metadata kfuncs, send traffic, and verify the extracted +metadata via BPF maps. +""" +from lib.py import ksft_run, ksft_eq, ksft_exit, ksft_ge, ksft_ne, ksft_pr +from lib.py import KsftNamedVariant, ksft_variants +from lib.py import CmdExitFailure, KsftSkipEx, NetDrvEpEnv +from lib.py import NetdevFamily +from lib.py import bkg, cmd, rand_port, wait_port_listen +from lib.py import ip, bpftool, defer +from lib.py import bpf_map_set, bpf_map_dump, bpf_prog_map_ids + + +def _load_xdp_metadata_prog(cfg, prog_name, bpf_file="xdp_metadata.bpf.o"): + """Load a device-bound XDP metadata program and return prog/map info. + + Returns: + dict with 'id', 'name', and 'maps' (name -> map_id). + """ + abs_path = cfg.net_lib_dir / bpf_file + pin_dir = "/sys/fs/bpf/xdp_metadata_test" + + cmd(f"rm -rf {pin_dir}", shell=True, fail=False) + cmd(f"mkdir -p {pin_dir}", shell=True) + + try: + bpftool(f"prog loadall {abs_path} {pin_dir} type xdp " + f"xdpmeta_dev {cfg.ifname}") + except CmdExitFailure as e: + cmd(f"rm -rf {pin_dir}", shell=True, fail=False) + raise KsftSkipEx( + f"Failed to load device-bound XDP program '{prog_name}'" + ) from e + defer(cmd, f"rm -rf {pin_dir}", shell=True, fail=False) + + pin_path = f"{pin_dir}/{prog_name}" + ip(f"link set dev {cfg.ifname} xdpdrv pinned {pin_path}") + defer(ip, f"link set dev {cfg.ifname} xdpdrv off") + + xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0] + prog_id = xdp_info["xdp"]["prog"]["id"] + + return {"id": prog_id, + "name": xdp_info["xdp"]["prog"]["name"], + "maps": bpf_prog_map_ids(prog_id)} + + +def _send_probe(cfg, port, proto="tcp"): + """Send a single payload from the remote end using socat. + + Args: + cfg: Configuration object containing network settings. + port: Port number for the exchange. + proto: Protocol to use, either "tcp" or "udp". + """ + cfg.require_cmd("socat", remote=True) + + if proto == "tcp": + rx_cmd = f"socat -{cfg.addr_ipver} -T 2 TCP-LISTEN:{port},reuseport STDOUT" + tx_cmd = f"echo -n rss_hash_test | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}" + else: + rx_cmd = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT" + tx_cmd = f"echo -n rss_hash_test | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}" + + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(port, proto=proto) + cmd(tx_cmd, host=cfg.remote, shell=True) + + +# BPF map keys matching the enums in xdp_metadata.bpf.c +_SETUP_KEY_PORT = 1 + +_RSS_KEY_HASH = 0 +_RSS_KEY_TYPE = 1 +_RSS_KEY_PKT_CNT = 2 +_RSS_KEY_ERR_CNT = 3 + +XDP_RSS_L4 = 0x8 # BIT(3) from enum xdp_rss_hash_type + + +@ksft_variants([ + KsftNamedVariant("tcp", "tcp"), + KsftNamedVariant("udp", "udp"), +]) +def test_xdp_rss_hash(cfg, proto): + """Test RSS hash metadata extraction via bpf_xdp_metadata_rx_hash(). + + This test will only run on devices that support xdp-rx-metadata-features. + + Loads the xdp_rss_hash program from xdp_metadata, sends a packet using + the specified protocol, and verifies that the program extracted a non-zero + hash with an L4 hash type. + """ + dev_info = cfg.netnl.dev_get({"ifindex": cfg.ifindex}) + rx_meta = dev_info.get("xdp-rx-metadata-features", []) + if "hash" not in rx_meta: + raise KsftSkipEx("device does not support XDP rx hash metadata") + + prog_info = _load_xdp_metadata_prog(cfg, "xdp_rss_hash") + + port = rand_port() + bpf_map_set("map_xdp_setup", _SETUP_KEY_PORT, port) + + rss_map_id = prog_info["maps"]["map_rss"] + + _send_probe(cfg, port, proto=proto) + + rss = bpf_map_dump(rss_map_id) + + pkt_cnt = rss.get(_RSS_KEY_PKT_CNT, 0) + err_cnt = rss.get(_RSS_KEY_ERR_CNT, 0) + hash_val = rss.get(_RSS_KEY_HASH, 0) + hash_type = rss.get(_RSS_KEY_TYPE, 0) + + ksft_ge(pkt_cnt, 1, comment="should have received at least one packet") + ksft_eq(err_cnt, 0, comment=f"RSS hash error count: {err_cnt}") + + ksft_ne(hash_val, 0, + f"RSS hash should be non-zero for {proto.upper()} traffic") + ksft_pr(f" RSS hash: {hash_val:#010x}") + + ksft_pr(f" RSS hash type: {hash_type:#06x}") + ksft_ne(hash_type & XDP_RSS_L4, 0, + f"RSS hash type should include L4 for {proto.upper()} traffic") + + +def main(): + """Run XDP metadata kfunc tests against a real device.""" + with NetDrvEpEnv(__file__) as cfg: + cfg.netnl = NetdevFamily() + ksft_run( + [ + test_xdp_rss_hash, + ], + args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py new file mode 100755 index 000000000000..d19d1d518208 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +# This is intended to be run on a virtio-net guest interface. +# The test binds the XDP socket to the interface without setting +# the fill ring to trigger delayed refill_work. This helps to +# make it easier to reproduce the deadlock when XDP program, +# XDP socket bind/unbind, rx ring resize race with refill_work on +# the buggy kernel. +# +# The Qemu command to setup virtio-net +# -netdev tap,id=hostnet1,vhost=on,script=no,downscript=no +# -device virtio-net-pci,netdev=hostnet1,iommu_platform=on,disable-legacy=on + +from lib.py import ksft_exit, ksft_run +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import NetDrvEnv +from lib.py import bkg, ip, cmd, ethtool +import time + +def _get_rx_ring_entries(cfg): + output = ethtool(f"-g {cfg.ifname}", json=True) + return output[0]["rx"] + +def setup_xsk(cfg, xdp_queue_id = 0) -> bkg: + # Probe for support + xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False) + if xdp.ret == 255: + raise KsftSkipEx('AF_XDP unsupported') + elif xdp.ret > 0: + raise KsftFailEx('unable to create AF_XDP socket') + + try: + return bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} ' \ + '{xdp_queue_id} -z', ksft_wait=3) + except: + raise KsftSkipEx('Failed to bind XDP socket in zerocopy.\n' \ + 'Please consider adding iommu_platform=on ' \ + 'when setting up virtio-net-pci') + +def check_xdp_bind(cfg): + with setup_xsk(cfg): + ip(f"link set dev %s xdp obj %s sec xdp" % + (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o")) + ip(f"link set dev %s xdp off" % cfg.ifname) + +def check_rx_resize(cfg): + with setup_xsk(cfg): + rx_ring = _get_rx_ring_entries(cfg) + ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring // 2)) + ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring)) + +def main(): + with NetDrvEnv(__file__, nsim_test=False) as cfg: + ksft_run([check_xdp_bind, check_rx_resize], + args=(cfg, )) + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index 401e70f7f136..2b5ec0505672 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -1,5 +1,14 @@ # SPDX-License-Identifier: GPL-2.0 +""" +Driver test environment. +NetDrvEnv and NetDrvEpEnv are the main environment classes. +NetDrvContEnv extends NetDrvEpEnv with netkit container support. +Former is for local host only tests, latter creates / connects +to a remote endpoint. See NIPA wiki for more information about +running and writing driver tests. +""" + import sys from pathlib import Path @@ -7,13 +16,43 @@ KSFT_DIR = (Path(__file__).parent / "../../../..").resolve() try: sys.path.append(KSFT_DIR.as_posix()) - from net.lib.py import * + + # Import one by one to avoid pylint false positives + from net.lib.py import NetNS, NetNSEnter, NetdevSimDev + from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ + NlError, RtnlFamily, DevlinkFamily, PSPFamily, Netlink + from net.lib.py import CmdExitFailure + from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ + fd_read_timeout, ip, rand_port, rand_ports, wait_port_listen, wait_file + from net.lib.py import bpf_map_set, bpf_map_dump, bpf_prog_map_ids + from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx + from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ + ksft_setup, ksft_variants, KsftNamedVariant + from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ + ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none + + __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev", + "EthtoolFamily", "NetdevFamily", "NetshaperFamily", + "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily", "Netlink", + "CmdExitFailure", + "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool", + "fd_read_timeout", "ip", "rand_port", "rand_ports", + "wait_port_listen", "wait_file", + "bpf_map_set", "bpf_map_dump", "bpf_prog_map_ids", + "KsftSkipEx", "KsftFailEx", "KsftXfailEx", + "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", + "ksft_setup", "ksft_variants", "KsftNamedVariant", + "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt", + "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt", + "ksft_not_none", "ksft_not_none"] + + from .env import NetDrvEnv, NetDrvEpEnv, NetDrvContEnv + from .load import GenerateTraffic, Iperf3Runner + from .remote import Remote + + __all__ += ["NetDrvEnv", "NetDrvEpEnv", "NetDrvContEnv", "GenerateTraffic", + "Remote", "Iperf3Runner"] except ModuleNotFoundError as e: - ksft_pr("Failed importing `net` library from kernel sources") - ksft_pr(str(e)) - ktap_result(True, comment="SKIP") + print("Failed importing `net` library from kernel sources") + print(str(e)) sys.exit(4) - -from .env import * -from .load import * -from .remote import Remote diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 987e452d3a45..24ce122abd9c 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -1,74 +1,106 @@ # SPDX-License-Identifier: GPL-2.0 +import ipaddress import os import time +import json from pathlib import Path from lib.py import KsftSkipEx, KsftXfailEx -from lib.py import ksft_setup +from lib.py import ksft_setup, wait_file from lib.py import cmd, ethtool, ip, CmdExitFailure from lib.py import NetNS, NetdevSimDev from .remote import Remote +from . import bpftool, RtnlFamily, Netlink -def _load_env_file(src_path): - env = os.environ.copy() +class NetDrvEnvBase: + """ + Base class for a NIC / host environments - src_dir = Path(src_path).parent.resolve() - if not (src_dir / "net.config").exists(): + Attributes: + test_dir: Path to the source directory of the test + net_lib_dir: Path to the net/lib directory + """ + def __init__(self, src_path): + self.src_path = Path(src_path) + self.test_dir = self.src_path.parent.resolve() + self.net_lib_dir = (Path(__file__).parent / "../../../../net/lib").resolve() + + self.env = self._load_env_file() + + # Following attrs must be set be inheriting classes + self.dev = None + + def _load_env_file(self): + env = os.environ.copy() + + src_dir = Path(self.src_path).parent.resolve() + if not (src_dir / "net.config").exists(): + return ksft_setup(env) + + with open((src_dir / "net.config").as_posix(), 'r') as fp: + for line in fp.readlines(): + full_file = line + # Strip comments + pos = line.find("#") + if pos >= 0: + line = line[:pos] + line = line.strip() + if not line: + continue + pair = line.split('=', maxsplit=1) + if len(pair) != 2: + raise Exception("Can't parse configuration line:", full_file) + env[pair[0]] = pair[1] return ksft_setup(env) - with open((src_dir / "net.config").as_posix(), 'r') as fp: - for line in fp.readlines(): - full_file = line - # Strip comments - pos = line.find("#") - if pos >= 0: - line = line[:pos] - line = line.strip() - if not line: - continue - pair = line.split('=', maxsplit=1) - if len(pair) != 2: - raise Exception("Can't parse configuration line:", full_file) - env[pair[0]] = pair[1] - return ksft_setup(env) + def __del__(self): + pass + + def __enter__(self): + ip(f"link set dev {self.dev['ifname']} up") + wait_file(f"/sys/class/net/{self.dev['ifname']}/carrier", + lambda x: x.strip() == "1") + + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + """ + __exit__ gets called at the end of a "with" block. + """ + self.__del__() -class NetDrvEnv: +class NetDrvEnv(NetDrvEnvBase): """ Class for a single NIC / host env, with no remote end """ - def __init__(self, src_path, **kwargs): - self._ns = None + def __init__(self, src_path, nsim_test=None, **kwargs): + super().__init__(src_path) - self.env = _load_env_file(src_path) + self._ns = None if 'NETIF' in self.env: - self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0] + if nsim_test is True: + raise KsftXfailEx("Test only works on netdevsim") + + self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0] else: + if nsim_test is False: + raise KsftXfailEx("Test does not work on netdevsim") + self._ns = NetdevSimDev(**kwargs) self.dev = self._ns.nsims[0].dev self.ifname = self.dev['ifname'] self.ifindex = self.dev['ifindex'] - def __enter__(self): - ip(f"link set dev {self.dev['ifname']} up") - - return self - - def __exit__(self, ex_type, ex_value, ex_tb): - """ - __exit__ gets called at the end of a "with" block. - """ - self.__del__() - def __del__(self): if self._ns: self._ns.remove() self._ns = None -class NetDrvEpEnv: +class NetDrvEpEnv(NetDrvEnvBase): """ Class for an environment with a local device and "remote endpoint" which can be used to send traffic in. @@ -82,8 +114,7 @@ class NetDrvEpEnv: nsim_v6_pfx = "2001:db8::" def __init__(self, src_path, nsim_test=None): - - self.env = _load_env_file(src_path) + super().__init__(src_path) self._stats_settle_time = None @@ -94,17 +125,20 @@ class NetDrvEpEnv: self._ns = None self._ns_peer = None + self.addr_v = { "4": None, "6": None } + self.remote_addr_v = { "4": None, "6": None } + if "NETIF" in self.env: if nsim_test is True: raise KsftXfailEx("Test only works on netdevsim") self._check_env() - self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0] + self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0] - self.v4 = self.env.get("LOCAL_V4") - self.v6 = self.env.get("LOCAL_V6") - self.remote_v4 = self.env.get("REMOTE_V4") - self.remote_v6 = self.env.get("REMOTE_V6") + self.addr_v["4"] = self.env.get("LOCAL_V4") + self.addr_v["6"] = self.env.get("LOCAL_V6") + self.remote_addr_v["4"] = self.env.get("REMOTE_V4") + self.remote_addr_v["6"] = self.env.get("REMOTE_V6") kind = self.env["REMOTE_TYPE"] args = self.env["REMOTE_ARGS"] else: @@ -115,26 +149,32 @@ class NetDrvEpEnv: self.dev = self._ns.nsims[0].dev - self.v4 = self.nsim_v4_pfx + "1" - self.v6 = self.nsim_v6_pfx + "1" - self.remote_v4 = self.nsim_v4_pfx + "2" - self.remote_v6 = self.nsim_v6_pfx + "2" + self.addr_v["4"] = self.nsim_v4_pfx + "1" + self.addr_v["6"] = self.nsim_v6_pfx + "1" + self.remote_addr_v["4"] = self.nsim_v4_pfx + "2" + self.remote_addr_v["6"] = self.nsim_v6_pfx + "2" kind = "netns" args = self._netns.name self.remote = Remote(kind, args, src_path) - self.addr = self.v6 if self.v6 else self.v4 - self.remote_addr = self.remote_v6 if self.remote_v6 else self.remote_v4 + self.addr_ipver = "6" if self.addr_v["6"] else "4" + self.addr = self.addr_v[self.addr_ipver] + self.remote_addr = self.remote_addr_v[self.addr_ipver] - self.addr_ipver = "6" if self.v6 else "4" # Bracketed addresses, some commands need IPv6 to be inside [] - self.baddr = f"[{self.v6}]" if self.v6 else self.v4 - self.remote_baddr = f"[{self.remote_v6}]" if self.remote_v6 else self.remote_v4 + self.baddr = f"[{self.addr_v['6']}]" if self.addr_v["6"] else self.addr_v["4"] + self.remote_baddr = f"[{self.remote_addr_v['6']}]" if self.remote_addr_v["6"] else self.remote_addr_v["4"] self.ifname = self.dev['ifname'] self.ifindex = self.dev['ifindex'] + # resolve remote interface name + self.remote_ifname = self.resolve_remote_ifc() + self.remote_dev = ip("-d link show dev " + self.remote_ifname, + host=self.remote, json=True)[0] + self.remote_ifindex = self.remote_dev['ifindex'] + self._required_cmd = {} def create_local(self): @@ -181,14 +221,17 @@ class NetDrvEpEnv: raise Exception("Invalid environment, missing configuration:", missing, "Please see tools/testing/selftests/drivers/net/README.rst") - def __enter__(self): - return self - - def __exit__(self, ex_type, ex_value, ex_tb): - """ - __exit__ gets called at the end of a "with" block. - """ - self.__del__() + def resolve_remote_ifc(self): + v4 = v6 = None + if self.remote_addr_v["4"]: + v4 = ip("addr show to " + self.remote_addr_v["4"], json=True, host=self.remote) + if self.remote_addr_v["6"]: + v6 = ip("addr show to " + self.remote_addr_v["6"], json=True, host=self.remote) + if v4 and v6 and v4[0]["ifname"] != v6[0]["ifname"]: + raise Exception("Can't resolve remote interface name, v4 and v6 don't match") + if (v4 and len(v4) > 1) or (v6 and len(v6) > 1): + raise Exception("Can't resolve remote interface name, multiple interfaces match") + return v6[0]["ifname"] if v6 else v4[0]["ifname"] def __del__(self): if self._ns: @@ -204,13 +247,25 @@ class NetDrvEpEnv: del self.remote self.remote = None - def require_v4(self): - if not self.v4 or not self.remote_v4: - raise KsftSkipEx("Test requires IPv4 connectivity") - - def require_v6(self): - if not self.v6 or not self.remote_v6: - raise KsftSkipEx("Test requires IPv6 connectivity") + def require_ipver(self, ipver): + if not self.addr_v[ipver] or not self.remote_addr_v[ipver]: + raise KsftSkipEx(f"Test requires IPv{ipver} connectivity") + + def require_nsim(self, nsim_test=True): + """Require or exclude netdevsim for this test""" + if nsim_test and self._ns is None: + raise KsftXfailEx("Test only works on netdevsim") + if nsim_test is False and self._ns is not None: + raise KsftXfailEx("Test does not work on netdevsim") + + def get_local_nsim_dev(self): + """Returns the local netdevsim device or None. + Using this method is discouraged, as it makes tests nsim-specific. + Standard interfaces available on all HW should ideally be used. + This method is intended for the few cases where nsim-specific + assertions need to be verified which cannot be verified otherwise. + """ + return self._ns def _require_cmd(self, comm, key, host=None): cached = self._required_cmd.get(comm, {}) @@ -225,7 +280,7 @@ class NetDrvEpEnv: if not self._require_cmd(comm, "local"): raise KsftSkipEx("Test requires command: " + comm) if remote: - if not self._require_cmd(comm, "remote"): + if not self._require_cmd(comm, "remote", host=self.remote): raise KsftSkipEx("Test requires (remote) command: " + comm) def wait_hw_stats_settle(self): @@ -242,7 +297,211 @@ class NetDrvEpEnv: if "Operation not supported" not in e.cmd.stderr: raise - self._stats_settle_time = 0.025 + \ - data.get('stats-block-usecs', 0) / 1000 / 1000 + self._stats_settle_time = \ + 1.25 * data.get('stats-block-usecs', 20000) / 1000 / 1000 time.sleep(self._stats_settle_time) + + +class NetDrvContEnv(NetDrvEpEnv): + """ + Class for an environment with a netkit pair setup for forwarding traffic + between the physical interface and a network namespace. + NETIF = "eth0" + LOCAL_V6 = "2001:db8:1::1" + REMOTE_V6 = "2001:db8:1::2" + LOCAL_PREFIX_V6 = "2001:db8:2::0/64" + + +-----------------------------+ +------------------------------+ + dst | INIT NS | | TEST NS | + 2001: | +---------------+ | | | + db8:2::2| | NETIF | | bpf | | + +---|>| 2001:db8:1::1 | |redirect| +-------------------------+ | + | | | |-----------|--------|>| Netkit | | + | | +---------------+ | _peer | | nk_guest | | + | | +-------------+ Netkit pair | | | fe80::2/64 | | + | | | Netkit |.............|........|>| 2001:db8:2::2/64 | | + | | | nk_host | | | +-------------------------+ | + | | | fe80::1/64 | | | | + | | +-------------+ | | route: | + | | | | default | + | | route: | | via fe80::1 dev nk_guest | + | | 2001:db8:2::2/128 | +------------------------------+ + | | via fe80::2 dev nk_host | + | +-----------------------------+ + | + | +---------------+ + | | REMOTE | + +---| 2001:db8:1::2 | + +---------------+ + """ + + def __init__(self, src_path, rxqueues=1, **kwargs): + self.netns = None + self._nk_host_ifname = None + self._nk_guest_ifname = None + self._tc_clsact_added = False + self._tc_attached = False + self._bpf_prog_pref = None + self._bpf_prog_id = None + self._init_ns_attached = False + self._old_fwd = None + self._old_accept_ra = None + + super().__init__(src_path, **kwargs) + + self.require_ipver("6") + local_prefix = self.env.get("LOCAL_PREFIX_V6") + if not local_prefix: + raise KsftSkipEx("LOCAL_PREFIX_V6 required") + + net = ipaddress.IPv6Network(local_prefix, strict=False) + self.ipv6_prefix = str(net.network_address) + self.nk_host_ipv6 = f"{self.ipv6_prefix}2:1" + self.nk_guest_ipv6 = f"{self.ipv6_prefix}2:2" + + local_v6 = ipaddress.IPv6Address(self.addr_v["6"]) + if local_v6 in net: + raise KsftSkipEx("LOCAL_V6 must not fall within LOCAL_PREFIX_V6") + + rtnl = RtnlFamily() + rtnl.newlink( + { + "linkinfo": { + "kind": "netkit", + "data": { + "mode": "l2", + "policy": "forward", + "peer-policy": "forward", + }, + }, + "num-rx-queues": rxqueues, + }, + flags=[Netlink.NLM_F_CREATE, Netlink.NLM_F_EXCL], + ) + + all_links = ip("-d link show", json=True) + netkit_links = [link for link in all_links + if link.get('linkinfo', {}).get('info_kind') == 'netkit' + and 'UP' not in link.get('flags', [])] + + if len(netkit_links) != 2: + raise KsftSkipEx("Failed to create netkit pair") + + netkit_links.sort(key=lambda x: x['ifindex']) + self._nk_host_ifname = netkit_links[1]['ifname'] + self._nk_guest_ifname = netkit_links[0]['ifname'] + self.nk_host_ifindex = netkit_links[1]['ifindex'] + self.nk_guest_ifindex = netkit_links[0]['ifindex'] + + self._setup_ns() + self._attach_bpf() + + def __del__(self): + if self._tc_attached: + cmd(f"tc filter del dev {self.ifname} ingress pref {self._bpf_prog_pref}") + self._tc_attached = False + + if self._tc_clsact_added: + cmd(f"tc qdisc del dev {self.ifname} clsact") + self._tc_clsact_added = False + + if self._nk_host_ifname: + cmd(f"ip link del dev {self._nk_host_ifname}") + self._nk_host_ifname = None + self._nk_guest_ifname = None + + if self._init_ns_attached: + cmd("ip netns del init", fail=False) + self._init_ns_attached = False + + if self.netns: + del self.netns + self.netns = None + + if self._old_fwd is not None: + with open("/proc/sys/net/ipv6/conf/all/forwarding", "w", + encoding="utf-8") as f: + f.write(self._old_fwd) + self._old_fwd = None + if self._old_accept_ra is not None: + with open("/proc/sys/net/ipv6/conf/all/accept_ra", "w", + encoding="utf-8") as f: + f.write(self._old_accept_ra) + self._old_accept_ra = None + + super().__del__() + + def _setup_ns(self): + fwd_path = "/proc/sys/net/ipv6/conf/all/forwarding" + ra_path = "/proc/sys/net/ipv6/conf/all/accept_ra" + with open(fwd_path, encoding="utf-8") as f: + self._old_fwd = f.read().strip() + with open(ra_path, encoding="utf-8") as f: + self._old_accept_ra = f.read().strip() + with open(fwd_path, "w", encoding="utf-8") as f: + f.write("1") + with open(ra_path, "w", encoding="utf-8") as f: + f.write("2") + + self.netns = NetNS() + cmd("ip netns attach init 1") + self._init_ns_attached = True + ip("netns set init 0", ns=self.netns) + ip(f"link set dev {self._nk_guest_ifname} netns {self.netns.name}") + ip(f"link set dev {self._nk_host_ifname} up") + ip(f"-6 addr add fe80::1/64 dev {self._nk_host_ifname} nodad") + ip(f"-6 route add {self.nk_guest_ipv6}/128 via fe80::2 dev {self._nk_host_ifname}") + + ip("link set lo up", ns=self.netns) + ip(f"link set dev {self._nk_guest_ifname} up", ns=self.netns) + ip(f"-6 addr add fe80::2/64 dev {self._nk_guest_ifname}", ns=self.netns) + ip(f"-6 addr add {self.nk_guest_ipv6}/64 dev {self._nk_guest_ifname} nodad", ns=self.netns) + ip(f"-6 route add default via fe80::1 dev {self._nk_guest_ifname}", ns=self.netns) + + def _tc_ensure_clsact(self): + qdisc = json.loads(cmd(f"tc -j qdisc show dev {self.ifname}").stdout) + for q in qdisc: + if q['kind'] == 'clsact': + return + cmd(f"tc qdisc add dev {self.ifname} clsact") + self._tc_clsact_added = True + + def _get_bpf_prog_ids(self): + filters = json.loads(cmd(f"tc -j filter show dev {self.ifname} ingress").stdout) + for bpf in filters: + if 'options' not in bpf: + continue + if bpf['options']['bpf_name'].startswith('nk_forward.bpf'): + return (bpf['pref'], bpf['options']['prog']['id']) + raise Exception("Failed to get BPF prog ID") + + def _attach_bpf(self): + bpf_obj = self.test_dir / "nk_forward.bpf.o" + if not bpf_obj.exists(): + raise KsftSkipEx("BPF prog not found") + + self._tc_ensure_clsact() + cmd(f"tc filter add dev {self.ifname} ingress bpf obj {bpf_obj}" + " sec tc/ingress direct-action") + self._tc_attached = True + + (self._bpf_prog_pref, self._bpf_prog_id) = self._get_bpf_prog_ids() + prog_info = bpftool(f"prog show id {self._bpf_prog_id}", json=True) + map_ids = prog_info.get("map_ids", []) + + bss_map_id = None + for map_id in map_ids: + map_info = bpftool(f"map show id {map_id}", json=True) + if map_info.get("name").endswith("bss"): + bss_map_id = map_id + + if bss_map_id is None: + raise Exception("Failed to find .bss map") + + ipv6_addr = ipaddress.IPv6Address(self.ipv6_prefix) + ipv6_bytes = ipv6_addr.packed + ifindex_bytes = self.nk_host_ifindex.to_bytes(4, byteorder='little') + value = ipv6_bytes + ifindex_bytes + value_hex = ' '.join(f'{b:02x}' for b in value) + bpftool(f"map update id {bss_map_id} key hex 00 00 00 00 value hex {value_hex}") diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py index da5af2c680fa..f181fa2d38fc 100644 --- a/tools/testing/selftests/drivers/net/lib/py/load.py +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -1,46 +1,96 @@ # SPDX-License-Identifier: GPL-2.0 +import re import time +import json -from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen, bkg +from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen -class GenerateTraffic: - def __init__(self, env, port=None): - env.require_cmd("iperf3", remote=True) +class Iperf3Runner: + """ + Sets up and runs iperf3 traffic. + """ + def __init__(self, env, port=None, server_ip=None, client_ip=None): + env.require_cmd("iperf3", local=True, remote=True) self.env = env + self.port = rand_port() if port is None else port + self.server_ip = server_ip + self.client_ip = client_ip + + def _build_server(self): + cmdline = f"iperf3 -s -1 -p {self.port}" + if self.server_ip: + cmdline += f" -B {self.server_ip}" + return cmdline + + def _build_client(self, streams, duration, reverse): + host = self.env.addr if self.server_ip is None else self.server_ip + cmdline = f"iperf3 -c {host} -p {self.port} -P {streams} -t {duration} -J" + if self.client_ip: + cmdline += f" -B {self.client_ip}" + if reverse: + cmdline += " --reverse" + return cmdline - if port is None: - port = rand_port() - self._iperf_server = cmd(f"iperf3 -s -1 -p {port}", background=True) - wait_port_listen(port) + def start_server(self): + """ + Starts an iperf3 server with optional bind IP. + """ + cmdline = self._build_server() + proc = cmd(cmdline, background=True) + wait_port_listen(self.port) time.sleep(0.1) - self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {port} -t 86400", - background=True, host=env.remote) + return proc + + def start_client(self, background=False, streams=1, duration=10, reverse=False): + """ + Starts the iperf3 client with the configured options. + """ + cmdline = self._build_client(streams, duration, reverse) + return cmd(cmdline, background=background, host=self.env.remote) + + def measure_bandwidth(self, reverse=False): + """ + Runs an iperf3 measurement and returns the average bandwidth (Gbps). + Discards the first and last few reporting intervals and uses only the + middle part of the run where throughput is typically stable. + """ + self.start_server() + result = self.start_client(duration=10, reverse=reverse) + + if result.ret != 0: + raise RuntimeError("iperf3 failed to run successfully") + try: + out = json.loads(result.stdout) + except json.JSONDecodeError as exc: + raise ValueError("Failed to parse iperf3 JSON output") from exc + + intervals = out.get("intervals", []) + samples = [i["sum"]["bits_per_second"] / 1e9 for i in intervals] + if len(samples) < 10: + raise ValueError(f"iperf3 returned too few intervals: {len(samples)}") + # Discard potentially unstable first and last 3 seconds. + stable = samples[3:-3] + + avg = sum(stable) / len(stable) + + return avg + + +class GenerateTraffic: + def __init__(self, env, port=None): + self.env = env + self.runner = Iperf3Runner(env, port) + + self._iperf_server = self.runner.start_server() + self._iperf_client = self.runner.start_client(background=True, streams=16, duration=86400) # Wait for traffic to ramp up if not self._wait_pkts(pps=1000): self.stop(verbose=True) raise Exception("iperf3 traffic did not ramp up") - def run_remote_test(self, env: object, port=None, command=None): - if port is None: - port = rand_port() - try: - server_cmd = f"iperf3 -s 1 -p {port} --one-off" - with bkg(server_cmd, host=env.remote): - #iperf3 opens TCP connection as default in server - #-u to be specified in client command for UDP - wait_port_listen(port, host=env.remote) - except Exception as e: - raise Exception(f"Unexpected error occurred while running server command: {e}") - try: - client_cmd = f"iperf3 -c {env.remote_addr} -p {port} {command}" - proc = cmd(client_cmd) - return proc - except Exception as e: - raise Exception(f"Unexpected error occurred while running client command: {e}") - def _wait_pkts(self, pkt_cnt=None, pps=None): """ Wait until we've seen pkt_cnt or until traffic ramps up to pps. @@ -74,3 +124,16 @@ class GenerateTraffic: ksft_pr(">> Server:") ksft_pr(self._iperf_server.stdout) ksft_pr(self._iperf_server.stderr) + self._wait_client_stopped() + + def _wait_client_stopped(self, sleep=0.005, timeout=5): + end = time.monotonic() + timeout + + live_port_pattern = re.compile(fr":{self.runner.port:04X} 0[^6] ") + + while time.monotonic() < end: + data = cmd("cat /proc/net/tcp*", host=self.env.remote).stdout + if not live_port_pattern.search(data): + return + time.sleep(sleep) + raise Exception(f"Waiting for client to stop timed out after {timeout}s") diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 3acaba41ac7b..a9a01a64b7b3 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -11,9 +11,13 @@ set -euo pipefail LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") SRCIF="" # to be populated later -SRCIP=192.0.2.1 +SRCIP="" # to be populated later +SRCIP4="192.0.2.1" +SRCIP6="fc00::1" DSTIF="" # to be populated later -DSTIP=192.0.2.2 +DSTIP="" # to be populated later +DSTIP4="192.0.2.2" +DSTIP6="fc00::2" PORT="6666" MSG="netconsole selftest" @@ -26,18 +30,23 @@ NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}" # NAMESPACE will be populated by setup_ns with a random value NAMESPACE="" -# IDs for netdevsim +# IDs for netdevsim. We either use NSIM_DEV_{1,2}_ID for standard test +# or NSIM_BOND_{T,R}X_{1,2} for the bonding tests. Not both at the +# same time. NSIM_DEV_1_ID=$((256 + RANDOM % 256)) NSIM_DEV_2_ID=$((512 + RANDOM % 256)) +NSIM_BOND_TX_1=$((768 + RANDOM % 256)) +NSIM_BOND_TX_2=$((1024 + RANDOM % 256)) +NSIM_BOND_RX_1=$((1280 + RANDOM % 256)) +NSIM_BOND_RX_2=$((1536 + RANDOM % 256)) NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device" +NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device" # Used to create and delete namespaces source "${LIBDIR}"/../../../../net/lib.sh -source "${LIBDIR}"/../../../../net/net_helper.sh # Create netdevsim interfaces create_ifaces() { - echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW" echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW" udevadm settle 2> /dev/null || true @@ -81,7 +90,23 @@ function configure_ip() { ip link set "${SRCIF}" up } +function select_ipv4_or_ipv6() +{ + local VERSION=${1} + + if [[ "$VERSION" == "ipv6" ]] + then + DSTIP="${DSTIP6}" + SRCIP="${SRCIP6}" + else + DSTIP="${DSTIP4}" + SRCIP="${SRCIP4}" + fi +} + function set_network() { + local IP_VERSION=${1:-"ipv4"} + # setup_ns function is coming from lib.sh setup_ns NAMESPACE @@ -92,33 +117,77 @@ function set_network() { # Link both interfaces back to back link_ifaces + select_ipv4_or_ipv6 "${IP_VERSION}" configure_ip } -function create_dynamic_target() { +function _create_dynamic_target() { + local FORMAT="${1:?FORMAT parameter required}" + local NCPATH="${2:?NCPATH parameter required}" + DSTMAC=$(ip netns exec "${NAMESPACE}" \ ip link show "${DSTIF}" | awk '/ether/ {print $2}') # Create a dynamic target - mkdir "${NETCONS_PATH}" + mkdir "${NCPATH}" - echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip - echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip - echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac - echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name + echo "${DSTIP}" > "${NCPATH}"/remote_ip + echo "${SRCIP}" > "${NCPATH}"/local_ip + echo "${DSTMAC}" > "${NCPATH}"/remote_mac + echo "${SRCIF}" > "${NCPATH}"/dev_name - echo 1 > "${NETCONS_PATH}"/enabled + if [ "${FORMAT}" == "basic" ] + then + # Basic target does not support release + echo 0 > "${NCPATH}"/release + echo 0 > "${NCPATH}"/extended + elif [ "${FORMAT}" == "extended" ] + then + echo 1 > "${NCPATH}"/extended + fi } -function cleanup() { - local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device" +function create_dynamic_target() { + local FORMAT=${1:-"extended"} + local NCPATH=${2:-"$NETCONS_PATH"} + _create_dynamic_target "${FORMAT}" "${NCPATH}" - # delete netconsole dynamic reconfiguration + echo 1 > "${NCPATH}"/enabled + + # This will make sure that the kernel was able to + # load the netconsole driver configuration. The console message + # gets more organized/sequential as well. + sleep 1 +} + +# Generate the command line argument for netconsole following: +# netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr] +function create_cmdline_str() { + local BINDMODE=${1:-"ifname"} + if [ "${BINDMODE}" == "ifname" ] + then + SRCDEV=${SRCIF} + else + SRCDEV=$(mac_get "${SRCIF}") + fi + + DSTMAC=$(ip netns exec "${NAMESPACE}" \ + ip link show "${DSTIF}" | awk '/ether/ {print $2}') + SRCPORT="1514" + TGTPORT="6666" + + echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCDEV},${TGTPORT}@${DSTIP}/${DSTMAC}\"" +} + +# Do not append the release to the header of the message +function disable_release_append() { echo 0 > "${NETCONS_PATH}"/enabled - # Remove all the keys that got created during the selftest - find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete - # Remove the configfs entry - rmdir "${NETCONS_PATH}" + echo 0 > "${NETCONS_PATH}"/release + echo 1 > "${NETCONS_PATH}"/enabled +} + +function do_cleanup() { + local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device" # Delete netdevsim devices echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL" @@ -131,6 +200,31 @@ function cleanup() { echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk } +function cleanup_netcons() { + # delete netconsole dynamic reconfiguration + # do not fail if the target is already disabled + local TARGET_PATH=${1:-${NETCONS_PATH}} + + if [[ ! -d "${TARGET_PATH}" ]] + then + # in some cases this is called before netcons path is created + return + fi + if [[ $(cat "${TARGET_PATH}"/enabled) != 0 ]] + then + echo 0 > "${TARGET_PATH}"/enabled || true + fi + # Remove all the keys that got created during the selftest + find "${TARGET_PATH}/userdata/" -mindepth 1 -type d -delete + # Remove the configfs entry + rmdir "${TARGET_PATH}" +} + +function cleanup() { + cleanup_netcons + do_cleanup +} + function set_user_data() { if [[ ! -d "${NETCONS_PATH}""/userdata" ]] then @@ -146,18 +240,24 @@ function set_user_data() { function listen_port_and_save_to() { local OUTPUT=${1} - # Just wait for 2 seconds - timeout 2 ip netns exec "${NAMESPACE}" \ - socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}" + local IPVERSION=${2:-"ipv4"} + + if [ "${IPVERSION}" == "ipv4" ] + then + SOCAT_MODE="UDP-LISTEN" + else + SOCAT_MODE="UDP6-LISTEN" + fi + + # Just wait for 3 seconds + timeout 3 ip netns exec "${NAMESPACE}" \ + socat "${SOCAT_MODE}":"${PORT}",fork,shut-none "${OUTPUT}" 2> /dev/null } -function validate_result() { +# Only validate that the message arrived properly +function validate_msg() { local TMPFILENAME="$1" - # TMPFILENAME will contain something like: - # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM - # key=value - # Check if the file exists if [ ! -f "$TMPFILENAME" ]; then echo "FAIL: File was not generated." >&2 @@ -169,17 +269,32 @@ function validate_result() { cat "${TMPFILENAME}" >&2 exit "${ksft_fail}" fi +} - if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then - echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2 - cat "${TMPFILENAME}" >&2 - exit "${ksft_fail}" +# Validate the message and userdata +function validate_result() { + local TMPFILENAME="$1" + + # TMPFILENAME will contain something like: + # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM + # key=value + + validate_msg "${TMPFILENAME}" + + # userdata is not supported on basic format target, + # thus, do not validate it. + if [ "${FORMAT}" != "basic" ]; + then + if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then + echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2 + cat "${TMPFILENAME}" >&2 + exit "${ksft_fail}" + fi fi # Delete the file once it is validated, otherwise keep it # for debugging purposes rm "${TMPFILENAME}" - exit "${ksft_pass}" } function check_for_dependencies() { @@ -203,6 +318,11 @@ function check_for_dependencies() { exit "${ksft_skip}" fi + if [ ! -f /proc/net/if_inet6 ]; then + echo "SKIP: IPv6 not configured. Check if CONFIG_IPV6 is enabled" >&2 + exit "${ksft_skip}" + fi + if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2 exit "${ksft_skip}" @@ -218,8 +338,107 @@ function check_for_dependencies() { exit "${ksft_skip}" fi - if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then - echo "SKIP: IPs already in use. Skipping it" >&2 + REGEXP4="inet.*(${SRCIP4}|${DSTIP4})" + REGEXP6="inet.*(${SRCIP6}|${DSTIP6})" + if ip addr list | grep -E "${REGEXP4}" 2> /dev/null; then + echo "SKIP: IPv4s already in use. Skipping it" >&2 + exit "${ksft_skip}" + fi + + if ip addr list | grep -E "${REGEXP6}" 2> /dev/null; then + echo "SKIP: IPv6s already in use. Skipping it" >&2 exit "${ksft_skip}" fi } + +function check_for_taskset() { + if ! which taskset > /dev/null ; then + echo "SKIP: taskset(1) is not available" >&2 + exit "${ksft_skip}" + fi +} + +# This is necessary if running multiple tests in a row +function pkill_socat() { + PROCESS_NAME4="socat UDP-LISTEN:6666,fork,shut-none ${OUTPUT_FILE}" + PROCESS_NAME6="socat UDP6-LISTEN:6666,fork,shut-none ${OUTPUT_FILE}" + # socat runs under timeout(1), kill it if it is still alive + # do not fail if socat doesn't exist anymore + set +e + pkill -f "${PROCESS_NAME4}" + pkill -f "${PROCESS_NAME6}" + set -e +} + +# Check if netconsole was compiled as a module, otherwise exit +function check_netconsole_module() { + if modinfo netconsole | grep filename: | grep -q builtin + then + echo "SKIP: netconsole should be compiled as a module" >&2 + exit "${ksft_skip}" + fi +} + +function wait_target_state() { + local TARGET=${1} + local STATE=${2} + local TARGET_PATH="${NETCONS_CONFIGFS}"/"${TARGET}" + local ENABLED=0 + + if [ "${STATE}" == "enabled" ] + then + ENABLED=1 + fi + + if [ ! -d "$TARGET_PATH" ]; then + echo "FAIL: Target does not exist." >&2 + exit "${ksft_fail}" + fi + + local CHECK_CMD="grep \"$ENABLED\" \"$TARGET_PATH/enabled\"" + slowwait 2 sh -c "test -n \"\$($CHECK_CMD)\"" || { + echo "FAIL: ${TARGET} is not ${STATE}." >&2 + exit "${ksft_fail}" + } +} + +# A wrapper to translate protocol version to udp version +function wait_for_port() { + local NAMESPACE=${1} + local PORT=${2} + IP_VERSION=${3} + + if [ "${IP_VERSION}" == "ipv6" ] + then + PROTOCOL="udp6" + else + PROTOCOL="udp" + fi + + wait_local_port_listen "${NAMESPACE}" "${PORT}" "${PROTOCOL}" + # even after the port is open, let's wait 1 second before writing + # otherwise the packet could be missed, and the test will fail. Happens + # more frequently on IPv6 + sleep 1 +} + +# Clean up netdevsim ifaces created for bonding test +function cleanup_bond_nsim() { + ip -n "${TXNS}" \ + link delete "${BOND_TX_MAIN_IF}" type bond || true + ip -n "${RXNS}" \ + link delete "${BOND_RX_MAIN_IF}" type bond || true + + cleanup_netdevsim "$NSIM_BOND_TX_1" + cleanup_netdevsim "$NSIM_BOND_TX_2" + cleanup_netdevsim "$NSIM_BOND_RX_1" + cleanup_netdevsim "$NSIM_BOND_RX_2" +} + +# cleanup tests that use bonding interfaces +function cleanup_bond() { + cleanup_netcons + cleanup_bond_nsim + cleanup_all_ns + ip link delete "${VETH0}" || true +} diff --git a/tools/testing/selftests/drivers/net/macsec.py b/tools/testing/selftests/drivers/net/macsec.py new file mode 100755 index 000000000000..9a83d9542e04 --- /dev/null +++ b/tools/testing/selftests/drivers/net/macsec.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""MACsec tests.""" + +import os + +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises +from lib.py import ksft_variants, KsftNamedVariant +from lib.py import CmdExitFailure, KsftSkipEx +from lib.py import NetDrvEpEnv +from lib.py import cmd, ip, defer, ethtool + +MACSEC_KEY = "12345678901234567890123456789012" +MACSEC_VLAN_VID = 10 + +# Unique prefix per run to avoid collisions in the shared netns. +# Keep it short: IFNAMSIZ is 16 (incl. NUL), and VLAN names append ".<vid>". +MACSEC_PFX = f"ms{os.getpid()}_" + + +def _macsec_name(idx=0): + return f"{MACSEC_PFX}{idx}" + + +def _get_macsec_offload(dev): + """Returns macsec offload mode string from ip -d link show.""" + info = ip(f"-d link show dev {dev}", json=True)[0] + return info.get("linkinfo", {}).get("info_data", {}).get("offload") + + +def _get_features(dev): + """Returns ethtool features dict for a device.""" + return ethtool(f"-k {dev}", json=True)[0] + + +def _require_ip_macsec(cfg): + """SKIP if iproute2 on local or remote lacks 'ip macsec' support.""" + for host in [None, cfg.remote]: + out = cmd("ip macsec help", fail=False, host=host) + if "Usage" not in out.stdout + out.stderr: + where = "remote" if host else "local" + raise KsftSkipEx(f"iproute2 too old on {where}," + " missing macsec support") + + +def _require_ip_macsec_offload(): + """SKIP if local iproute2 doesn't understand 'ip macsec offload'.""" + out = cmd("ip macsec help", fail=False) + if "offload" not in out.stdout + out.stderr: + raise KsftSkipEx("iproute2 too old, missing macsec offload") + + +def _require_macsec_offload(cfg): + """SKIP if local device doesn't support macsec-hw-offload.""" + _require_ip_macsec_offload() + try: + feat = ethtool(f"-k {cfg.ifname}", json=True)[0] + except (CmdExitFailure, IndexError) as e: + raise KsftSkipEx( + f"can't query features: {e}") from e + if not feat.get("macsec-hw-offload", {}).get("active"): + raise KsftSkipEx("macsec-hw-offload not supported") + + +def _get_mac(ifname, host=None): + """Gets MAC address of an interface.""" + dev = ip(f"link show dev {ifname}", json=True, host=host) + return dev[0]["address"] + + +def _setup_macsec_sa(cfg, name): + """Adds matching TX/RX SAs on both ends.""" + local_mac = _get_mac(name) + remote_mac = _get_mac(name, host=cfg.remote) + + ip(f"macsec add {name} tx sa 0 pn 1 on key 01 {MACSEC_KEY}") + ip(f"macsec add {name} rx port 1 address {remote_mac}") + ip(f"macsec add {name} rx port 1 address {remote_mac} " + f"sa 0 pn 1 on key 02 {MACSEC_KEY}") + + ip(f"macsec add {name} tx sa 0 pn 1 on key 02 {MACSEC_KEY}", + host=cfg.remote) + ip(f"macsec add {name} rx port 1 address {local_mac}", host=cfg.remote) + ip(f"macsec add {name} rx port 1 address {local_mac} " + f"sa 0 pn 1 on key 01 {MACSEC_KEY}", host=cfg.remote) + + +def _setup_macsec_devs(cfg, name, offload): + """Creates macsec devices on both ends. + + Only the local device gets HW offload; the remote always uses software + MACsec since it may not support offload at all. + """ + offload_arg = "mac" if offload else "off" + + ip(f"link add link {cfg.ifname} {name} " + f"type macsec encrypt on offload {offload_arg}") + defer(ip, f"link del {name}") + ip(f"link add link {cfg.remote_ifname} {name} " + f"type macsec encrypt on", host=cfg.remote) + defer(ip, f"link del {name}", host=cfg.remote) + + +def _set_offload(name, offload): + """Sets offload on the local macsec device only.""" + offload_arg = "mac" if offload else "off" + + ip(f"link set {name} type macsec encrypt on offload {offload_arg}") + + +def _setup_vlans(cfg, name, vid): + """Adds VLANs on top of existing macsec devs.""" + vlan_name = f"{name}.{vid}" + + ip(f"link add link {name} {vlan_name} type vlan id {vid}") + defer(ip, f"link del {vlan_name}") + ip(f"link add link {name} {vlan_name} type vlan id {vid}", host=cfg.remote) + defer(ip, f"link del {vlan_name}", host=cfg.remote) + + +def _setup_vlan_ips(cfg, name, vid): + """Adds VLANs and IPs and brings up the macsec + VLAN devices.""" + local_ip = "198.51.100.1" + remote_ip = "198.51.100.2" + vlan_name = f"{name}.{vid}" + + ip(f"addr add {local_ip}/24 dev {vlan_name}") + ip(f"addr add {remote_ip}/24 dev {vlan_name}", host=cfg.remote) + ip(f"link set {name} up") + ip(f"link set {name} up", host=cfg.remote) + ip(f"link set {vlan_name} up") + ip(f"link set {vlan_name} up", host=cfg.remote) + + return vlan_name, remote_ip + + +def test_offload_api(cfg) -> None: + """MACsec offload API: create SecY, add SA/rx, toggle offload.""" + + _require_macsec_offload(cfg) + ms0 = _macsec_name(0) + ms1 = _macsec_name(1) + ms2 = _macsec_name(2) + + # Create 3 SecY with offload + ip(f"link add link {cfg.ifname} {ms0} type macsec " + f"port 4 encrypt on offload mac") + defer(ip, f"link del {ms0}") + + ip(f"link add link {cfg.ifname} {ms1} type macsec " + f"address aa:bb:cc:dd:ee:ff port 5 encrypt on offload mac") + defer(ip, f"link del {ms1}") + + ip(f"link add link {cfg.ifname} {ms2} type macsec " + f"sci abbacdde01020304 encrypt on offload mac") + defer(ip, f"link del {ms2}") + + # Add TX SA + ip(f"macsec add {ms0} tx sa 0 pn 1024 on " + "key 01 12345678901234567890123456789012") + + # Add RX SC + SA + ip(f"macsec add {ms0} rx port 1234 address 1c:ed:de:ad:be:ef") + ip(f"macsec add {ms0} rx port 1234 address 1c:ed:de:ad:be:ef " + "sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef") + + # Can't disable offload when SAs are configured + with ksft_raises(CmdExitFailure): + ip(f"link set {ms0} type macsec offload off") + with ksft_raises(CmdExitFailure): + ip(f"macsec offload {ms0} off") + + # Toggle offload via rtnetlink on SA-free device + ip(f"link set {ms2} type macsec offload off") + ip(f"link set {ms2} type macsec encrypt on offload mac") + + # Toggle offload via genetlink + ip(f"macsec offload {ms2} off") + ip(f"macsec offload {ms2} mac") + + +def test_max_secy(cfg) -> None: + """nsim-only test for max number of SecYs.""" + + cfg.require_nsim() + _require_ip_macsec_offload() + ms0 = _macsec_name(0) + ms1 = _macsec_name(1) + ms2 = _macsec_name(2) + ms3 = _macsec_name(3) + + ip(f"link add link {cfg.ifname} {ms0} type macsec " + f"port 4 encrypt on offload mac") + defer(ip, f"link del {ms0}") + + ip(f"link add link {cfg.ifname} {ms1} type macsec " + f"address aa:bb:cc:dd:ee:ff port 5 encrypt on offload mac") + defer(ip, f"link del {ms1}") + + ip(f"link add link {cfg.ifname} {ms2} type macsec " + f"sci abbacdde01020304 encrypt on offload mac") + defer(ip, f"link del {ms2}") + with ksft_raises(CmdExitFailure): + ip(f"link add link {cfg.ifname} {ms3} " + f"type macsec port 8 encrypt on offload mac") + + +def test_max_sc(cfg) -> None: + """nsim-only test for max number of SCs.""" + + cfg.require_nsim() + _require_ip_macsec_offload() + ms0 = _macsec_name(0) + + ip(f"link add link {cfg.ifname} {ms0} type macsec " + f"port 4 encrypt on offload mac") + defer(ip, f"link del {ms0}") + ip(f"macsec add {ms0} rx port 1234 address 1c:ed:de:ad:be:ef") + with ksft_raises(CmdExitFailure): + ip(f"macsec add {ms0} rx port 1235 address 1c:ed:de:ad:be:ef") + + +def test_offload_state(cfg) -> None: + """Offload state reflects configuration changes.""" + + _require_macsec_offload(cfg) + ms0 = _macsec_name(0) + + # Create with offload on + ip(f"link add link {cfg.ifname} {ms0} type macsec " + f"encrypt on offload mac") + cleanup = defer(ip, f"link del {ms0}") + + ksft_eq(_get_macsec_offload(ms0), "mac", + "created with offload: should be mac") + feats_on_1 = _get_features(ms0) + + ip(f"link set {ms0} type macsec offload off") + ksft_eq(_get_macsec_offload(ms0), "off", + "offload disabled: should be off") + feats_off_1 = _get_features(ms0) + + ip(f"link set {ms0} type macsec encrypt on offload mac") + ksft_eq(_get_macsec_offload(ms0), "mac", + "offload re-enabled: should be mac") + ksft_eq(_get_features(ms0), feats_on_1, + "features should match first offload-on snapshot") + + # Delete and recreate without offload + cleanup.exec() + ip(f"link add link {cfg.ifname} {ms0} type macsec") + defer(ip, f"link del {ms0}") + ksft_eq(_get_macsec_offload(ms0), "off", + "created without offload: should be off") + ksft_eq(_get_features(ms0), feats_off_1, + "features should match first offload-off snapshot") + + ip(f"link set {ms0} type macsec encrypt on offload mac") + ksft_eq(_get_macsec_offload(ms0), "mac", + "offload enabled after create: should be mac") + ksft_eq(_get_features(ms0), feats_on_1, + "features should match first offload-on snapshot") + + +def _check_nsim_vid(cfg, vid, expected) -> None: + """Checks if a VLAN is present. Only works on netdevsim.""" + + nsim = cfg.get_local_nsim_dev() + if not nsim: + return + + vlan_path = os.path.join(nsim.nsims[0].dfs_dir, "vlan") + with open(vlan_path, encoding="utf-8") as f: + vids = f.read() + found = f"ctag {vid}\n" in vids + ksft_eq(found, expected, + f"VLAN {vid} {'expected' if expected else 'not expected'}" + f" in debugfs") + + +@ksft_variants([ + KsftNamedVariant("offloaded", True), + KsftNamedVariant("software", False), +]) +def test_vlan(cfg, offload) -> None: + """Ping through VLAN-over-macsec.""" + + _require_ip_macsec(cfg) + if offload: + _require_macsec_offload(cfg) + else: + _require_ip_macsec_offload() + name = _macsec_name() + _setup_macsec_devs(cfg, name, offload=offload) + _setup_macsec_sa(cfg, name) + _setup_vlans(cfg, name, MACSEC_VLAN_VID) + vlan_name, remote_ip = _setup_vlan_ips(cfg, name, MACSEC_VLAN_VID) + _check_nsim_vid(cfg, MACSEC_VLAN_VID, offload) + # nsim doesn't handle the data path for offloaded macsec, so skip + # the ping when offloaded on nsim. + if not offload or not cfg.get_local_nsim_dev(): + cmd(f"ping -I {vlan_name} -c 1 -W 5 {remote_ip}") + + +@ksft_variants([ + KsftNamedVariant("on_to_off", True), + KsftNamedVariant("off_to_on", False), +]) +def test_vlan_toggle(cfg, offload) -> None: + """Toggle offload: VLAN filters propagate/remove correctly.""" + + _require_ip_macsec(cfg) + _require_macsec_offload(cfg) + name = _macsec_name() + _setup_macsec_devs(cfg, name, offload=offload) + _setup_vlans(cfg, name, MACSEC_VLAN_VID) + _check_nsim_vid(cfg, MACSEC_VLAN_VID, offload) + _set_offload(name, offload=not offload) + _check_nsim_vid(cfg, MACSEC_VLAN_VID, not offload) + vlan_name, remote_ip = _setup_vlan_ips(cfg, name, MACSEC_VLAN_VID) + _setup_macsec_sa(cfg, name) + # nsim doesn't handle the data path for offloaded macsec, so skip + # the ping when the final state is offloaded on nsim. + if offload or not cfg.get_local_nsim_dev(): + cmd(f"ping -I {vlan_name} -c 1 -W 5 {remote_ip}") + + +def main() -> None: + """Main program.""" + with NetDrvEpEnv(__file__) as cfg: + ksft_run([test_offload_api, + test_max_secy, + test_max_sc, + test_offload_state, + test_vlan, + test_vlan_toggle, + ], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh index 29a672c2270f..e212ad8ccef6 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh @@ -44,8 +44,7 @@ source $lib_dir/devlink_lib.sh h1_create() { - simple_if_init $h1 192.0.2.1/24 - defer simple_if_fini $h1 192.0.2.1/24 + adf_simple_if_init $h1 192.0.2.1/24 mtu_set $h1 10000 defer mtu_restore $h1 @@ -56,8 +55,7 @@ h1_create() h2_create() { - simple_if_init $h2 198.51.100.1/24 - defer simple_if_fini $h2 198.51.100.1/24 + adf_simple_if_init $h2 198.51.100.1/24 mtu_set $h2 10000 defer mtu_restore $h2 @@ -106,8 +104,7 @@ setup_prepare() # Reload to ensure devlink-trap settings are back to default. defer devlink_reload - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh index d5b6f2cc9a29..9ca340c5f3a6 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh @@ -57,8 +57,7 @@ source qos_lib.sh h1_create() { - simple_if_init $h1 - defer simple_if_fini $h1 + adf_simple_if_init $h1 mtu_set $h1 10000 defer mtu_restore $h1 @@ -70,8 +69,7 @@ h1_create() h2_create() { - simple_if_init $h2 - defer simple_if_fini $h2 + adf_simple_if_init $h2 mtu_set $h2 10000 defer mtu_restore $h2 @@ -83,8 +81,7 @@ h2_create() h3_create() { - simple_if_init $h3 - defer simple_if_fini $h3 + adf_simple_if_init $h3 mtu_set $h3 10000 defer mtu_restore $h3 @@ -225,8 +222,7 @@ setup_prepare() h3mac=$(mac_get $h3) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh index 2b5d2c2751d5..a4a25637fe2a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh @@ -68,8 +68,7 @@ mlxsw_only_on_spectrum 2+ || exit h1_create() { - simple_if_init $h1 - defer simple_if_fini $h1 + adf_simple_if_init $h1 vlan_create $h1 111 v$h1 192.0.2.33/28 defer vlan_destroy $h1 111 @@ -78,8 +77,7 @@ h1_create() h2_create() { - simple_if_init $h2 - defer simple_if_fini $h2 + adf_simple_if_init $h2 vlan_create $h2 111 v$h2 192.0.2.34/28 defer vlan_destroy $h2 111 @@ -178,8 +176,7 @@ setup_prepare() h2mac=$(mac_get $h2) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index cd4a5c21360c..d8f8ae8533cd 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -72,8 +72,7 @@ source qos_lib.sh h1_create() { - simple_if_init $h1 192.0.2.65/28 - defer simple_if_fini $h1 192.0.2.65/28 + adf_simple_if_init $h1 192.0.2.65/28 mtu_set $h1 10000 defer mtu_restore $h1 @@ -81,8 +80,7 @@ h1_create() h2_create() { - simple_if_init $h2 - defer simple_if_fini $h2 + adf_simple_if_init $h2 mtu_set $h2 10000 defer mtu_restore $h2 @@ -94,8 +92,7 @@ h2_create() h3_create() { - simple_if_init $h3 192.0.2.66/28 - defer simple_if_fini $h3 192.0.2.66/28 + adf_simple_if_init $h3 192.0.2.66/28 mtu_set $h3 10000 defer mtu_restore $h3 @@ -196,8 +193,7 @@ setup_prepare() h3mac=$(mac_get $h3) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index 537d6baa77b7..47d2ffcf366e 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -100,8 +100,7 @@ host_create() local dev=$1; shift local host=$1; shift - simple_if_init $dev - defer simple_if_fini $dev + adf_simple_if_init $dev mtu_set $dev 10000 defer mtu_restore $dev @@ -250,8 +249,7 @@ setup_prepare() h3_mac=$(mac_get $h3) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index 899b6892603f..d7505b933aef 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -51,7 +51,7 @@ for current_test in ${TESTS:-$ALL_TESTS}; do fi ${current_test}_setup_prepare - setup_wait $num_netifs + setup_wait_n $num_netifs # Update target in case occupancy of a certain resource changed # following the test setup. target=$(${current_test}_get_target "$should_fail") diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index 482ebb744eba..7b98cdd0580d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -55,7 +55,7 @@ for current_test in ${TESTS:-$ALL_TESTS}; do continue fi ${current_test}_setup_prepare - setup_wait $num_netifs + setup_wait_n $num_netifs # Update target in case occupancy of a certain resource # changed following the test setup. target=$(${current_test}_get_target "$should_fail") diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh index 0441a18f098b..aac8ef490feb 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh @@ -317,7 +317,7 @@ police_limits_test() tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ flower skip_sw \ - action police rate 0.5kbit burst 1m conform-exceed drop/ok + action police rate 0.5kbit burst 2k conform-exceed drop/ok check_fail $? "Incorrect success to add police action with too low rate" tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ @@ -327,7 +327,7 @@ police_limits_test() tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ flower skip_sw \ - action police rate 1.5kbit burst 1m conform-exceed drop/ok + action police rate 1.5kbit burst 2k conform-exceed drop/ok check_err $? "Failed to add police action with low rate" tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower diff --git a/tools/testing/selftests/drivers/net/napi_id.py b/tools/testing/selftests/drivers/net/napi_id.py new file mode 100755 index 000000000000..d05eddcad539 --- /dev/null +++ b/tools/testing/selftests/drivers/net/napi_id.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, NetDrvEpEnv +from lib.py import bkg, cmd, rand_port, NetNSEnter + +def test_napi_id(cfg) -> None: + port = rand_port() + listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr} {port}" + + with bkg(listen_cmd, ksft_wait=3) as server: + cmd(f"echo a | socat - TCP:{cfg.baddr}:{port}", host=cfg.remote, shell=True) + + ksft_eq(0, server.ret) + +def main() -> None: + with NetDrvEpEnv(__file__) as cfg: + ksft_run([test_napi_id], args=(cfg,)) + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/napi_id_helper.c b/tools/testing/selftests/drivers/net/napi_id_helper.c new file mode 100644 index 000000000000..7f49ca6c8637 --- /dev/null +++ b/tools/testing/selftests/drivers/net/napi_id_helper.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/socket.h> +#include <netdb.h> + +#include "../../net/lib/ksft.h" + +int main(int argc, char *argv[]) +{ + struct sockaddr_storage address; + struct addrinfo *result; + struct addrinfo hints; + unsigned int napi_id; + socklen_t addr_len; + socklen_t optlen; + char buf[1024]; + int opt = 1; + int family; + int server; + int client; + int ret; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_PASSIVE; + + ret = getaddrinfo(argv[1], argv[2], &hints, &result); + if (ret != 0) { + fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(ret)); + return 1; + } + + family = result->ai_family; + addr_len = result->ai_addrlen; + + server = socket(family, SOCK_STREAM, IPPROTO_TCP); + if (server < 0) { + perror("socket creation failed"); + freeaddrinfo(result); + if (errno == EAFNOSUPPORT) + return -1; + return 1; + } + + if (setsockopt(server, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) { + perror("setsockopt"); + freeaddrinfo(result); + return 1; + } + + memcpy(&address, result->ai_addr, result->ai_addrlen); + freeaddrinfo(result); + + if (bind(server, (struct sockaddr *)&address, addr_len) < 0) { + perror("bind failed"); + return 1; + } + + if (listen(server, 1) < 0) { + perror("listen"); + return 1; + } + + ksft_ready(); + + client = accept(server, NULL, 0); + if (client < 0) { + perror("accept"); + return 1; + } + + optlen = sizeof(napi_id); + ret = getsockopt(client, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id, + &optlen); + if (ret != 0) { + perror("getsockopt"); + return 1; + } + + read(client, buf, 1024); + + ksft_wait(); + + if (napi_id == 0) { + fprintf(stderr, "napi ID is 0\n"); + return 1; + } + + close(client); + close(server); + + return 0; +} diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py new file mode 100755 index 000000000000..f4be72b2145a --- /dev/null +++ b/tools/testing/selftests/drivers/net/napi_threaded.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Test napi threaded states. +""" + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, ksft_ne, ksft_ge +from lib.py import NetDrvEnv, NetdevFamily +from lib.py import cmd, defer, ethtool + + +def _assert_napi_threaded_enabled(nl, napi_id) -> None: + napi = nl.napi_get({'id': napi_id}) + ksft_eq(napi['threaded'], 'enabled') + ksft_ne(napi.get('pid'), None) + + +def _assert_napi_threaded_disabled(nl, napi_id) -> None: + napi = nl.napi_get({'id': napi_id}) + ksft_eq(napi['threaded'], 'disabled') + ksft_eq(napi.get('pid'), None) + + +def _set_threaded_state(cfg, threaded) -> None: + with open(f"/sys/class/net/{cfg.ifname}/threaded", "wb") as fp: + fp.write(str(threaded).encode('utf-8')) + + +def _setup_deferred_cleanup(cfg) -> None: + combined = ethtool(f"-l {cfg.ifname}", json=True)[0].get("combined", 0) + ksft_ge(combined, 2) + defer(ethtool, f"-L {cfg.ifname} combined {combined}") + + threaded = cmd(f"cat /sys/class/net/{cfg.ifname}/threaded").stdout + defer(_set_threaded_state, cfg, threaded) + + return combined + + +def napi_init(cfg, nl) -> None: + """ + Test that threaded state (in the persistent NAPI config) gets updated + even when NAPI with given ID is not allocated at the time. + """ + + qcnt = _setup_deferred_cleanup(cfg) + + _set_threaded_state(cfg, 1) + cmd(f"ethtool -L {cfg.ifname} combined 1") + _set_threaded_state(cfg, 0) + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + for napi in napis: + ksft_eq(napi['threaded'], 'disabled') + ksft_eq(napi.get('pid'), None) + + cmd(f"ethtool -L {cfg.ifname} combined 1") + _set_threaded_state(cfg, 1) + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + for napi in napis: + ksft_eq(napi['threaded'], 'enabled') + ksft_ne(napi.get('pid'), None) + + +def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: + """ + Test that when napi threaded is enabled at device level and + then disabled at napi level for one napi, the threaded state + of all napis is preserved after a change in number of queues. + """ + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + ksft_ge(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + qcnt = _setup_deferred_cleanup(cfg) + + # set threaded + _set_threaded_state(cfg, 1) + + # check napi threaded is set for both napis + _assert_napi_threaded_enabled(nl, napi0_id) + _assert_napi_threaded_enabled(nl, napi1_id) + + # disable threaded for napi1 + nl.napi_set({'id': napi1_id, 'threaded': 'disabled'}) + + cmd(f"ethtool -L {cfg.ifname} combined 1") + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") + _assert_napi_threaded_enabled(nl, napi0_id) + _assert_napi_threaded_disabled(nl, napi1_id) + + +def change_num_queues(cfg, nl) -> None: + """ + Test that when napi threaded is enabled at device level, + the napi threaded state is preserved after a change in + number of queues. + """ + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + ksft_ge(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + qcnt = _setup_deferred_cleanup(cfg) + + # set threaded + _set_threaded_state(cfg, 1) + + # check napi threaded is set for both napis + _assert_napi_threaded_enabled(nl, napi0_id) + _assert_napi_threaded_enabled(nl, napi1_id) + + cmd(f"ethtool -L {cfg.ifname} combined 1") + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") + + # check napi threaded is set for both napis + _assert_napi_threaded_enabled(nl, napi0_id) + _assert_napi_threaded_enabled(nl, napi1_id) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEnv(__file__, queue_count=2) as cfg: + ksft_run([napi_init, + change_num_queues, + enable_dev_threaded_disable_napi_threaded], + args=(cfg, NetdevFamily())) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh deleted file mode 100755 index fe765da498e8..000000000000 --- a/tools/testing/selftests/drivers/net/netcons_basic.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env bash -# SPDX-License-Identifier: GPL-2.0 - -# This test creates two netdevsim virtual interfaces, assigns one of them (the -# "destination interface") to a new namespace, and assigns IP addresses to both -# interfaces. -# -# It listens on the destination interface using socat and configures a dynamic -# target on netconsole, pointing to the destination IP address. -# -# Finally, it checks whether the message was received properly on the -# destination interface. Note that this test may pollute the kernel log buffer -# (dmesg) and relies on dynamic configuration and namespaces being configured. -# -# Author: Breno Leitao <leitao@debian.org> - -set -euo pipefail - -SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") - -source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh - -modprobe netdevsim 2> /dev/null || true -modprobe netconsole 2> /dev/null || true - -# The content of kmsg will be save to the following file -OUTPUT_FILE="/tmp/${TARGET}" - -# Check for basic system dependency and exit if not found -check_for_dependencies -# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) -echo "6 5" > /proc/sys/kernel/printk -# Remove the namespace, interfaces and netconsole target on exit -trap cleanup EXIT -# Create one namespace and two interfaces -set_network -# Create a dynamic target for netconsole -create_dynamic_target -# Set userdata "key" with the "value" value -set_user_data -# Listed for netconsole port inside the namespace and destination interface -listen_port_and_save_to "${OUTPUT_FILE}" & -# Wait for socat to start and listen to the port. -wait_local_port_listen "${NAMESPACE}" "${PORT}" udp -# Send the message -echo "${MSG}: ${TARGET}" > /dev/kmsg -# Wait until socat saves the file to disk -busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" - -# Make sure the message was received in the dst part -# and exit -validate_result "${OUTPUT_FILE}" diff --git a/tools/testing/selftests/drivers/net/netconsole/Makefile b/tools/testing/selftests/drivers/net/netconsole/Makefile new file mode 100644 index 000000000000..b56c70b7e274 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netconsole/Makefile @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_INCLUDES := \ + ../../../net/lib.sh \ + ../lib/sh/lib_netcons.sh \ +# end of TEST_INCLUDES + +TEST_PROGS := \ + netcons_basic.sh \ + netcons_cmdline.sh \ + netcons_fragmented_msg.sh \ + netcons_overflow.sh \ + netcons_resume.sh \ + netcons_sysdata.sh \ + netcons_torture.sh \ +# end of TEST_PROGS + +include ../../../lib.mk + diff --git a/tools/testing/selftests/drivers/net/netconsole/config b/tools/testing/selftests/drivers/net/netconsole/config new file mode 100644 index 000000000000..a3f6b0fd44ef --- /dev/null +++ b/tools/testing/selftests/drivers/net/netconsole/config @@ -0,0 +1,6 @@ +CONFIG_CONFIGFS_FS=y +CONFIG_IPV6=y +CONFIG_NETCONSOLE=m +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_NETCONSOLE_EXTENDED_LOG=y +CONFIG_NETDEVSIM=m diff --git a/tools/testing/selftests/drivers/net/netconsole/netcons_basic.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_basic.sh new file mode 100755 index 000000000000..7976206523b2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netconsole/netcons_basic.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# This test creates two netdevsim virtual interfaces, assigns one of them (the +# "destination interface") to a new namespace, and assigns IP addresses to both +# interfaces. +# +# It listens on the destination interface using socat and configures a dynamic +# target on netconsole, pointing to the destination IP address. +# +# Finally, it checks whether the message was received properly on the +# destination interface. Note that this test may pollute the kernel log buffer +# (dmesg) and relies on dynamic configuration and namespaces being configured. +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT + +# Run the test twice, with different format modes +for FORMAT in "basic" "extended" +do + for IP_VERSION in "ipv6" "ipv4" + do + echo "Running with target mode: ${FORMAT} (${IP_VERSION})" + # Set current loglevel to KERN_INFO(6), and default to + # KERN_NOTICE(5) + echo "6 5" > /proc/sys/kernel/printk + # Create one namespace and two interfaces + set_network "${IP_VERSION}" + # Create a dynamic target for netconsole + create_dynamic_target "${FORMAT}" + # Only set userdata for extended format + if [ "$FORMAT" == "extended" ] + then + # Set userdata "key" with the "value" value + set_user_data + fi + # Listed for netconsole port inside the namespace and + # destination interface + listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" & + # Wait for socat to start and listen to the port. + wait_for_port "${NAMESPACE}" "${PORT}" "${IP_VERSION}" + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + if ! busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + then + echo "FAIL: Timed out waiting (${BUSYWAIT_TIMEOUT} ms) for netconsole message in ${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + # Make sure the message was received in the dst part + # and exit + validate_result "${OUTPUT_FILE}" "${FORMAT}" + # kill socat in case it is still running + pkill_socat + cleanup + echo "${FORMAT} : ${IP_VERSION} : Test passed" >&2 + done +done + +trap - EXIT +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netconsole/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_cmdline.sh new file mode 100755 index 000000000000..96d704b8d9d9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netconsole/netcons_cmdline.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# This is a selftest to test cmdline arguments on netconsole. +# It exercises loading of netconsole from cmdline instead of the dynamic +# reconfiguration. This includes parsing the long netconsole= line and all the +# flow through init_netconsole(). +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh + +check_netconsole_module + +modprobe netdevsim 2> /dev/null || true +rmmod netconsole 2> /dev/null || true + +# Check for basic system dependency and exit if not found +# check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace and network interfaces +trap do_cleanup EXIT +# Create one namespace and two interfaces +set_network + +# Run the test twice, with different cmdline parameters +for BINDMODE in "ifname" "mac" +do + echo "Running with bind mode: ${BINDMODE}" >&2 + # Create the command line for netconsole, with the configuration from + # the function above + CMDLINE=$(create_cmdline_str "${BINDMODE}") + + # The content of kmsg will be save to the following file + OUTPUT_FILE="/tmp/${TARGET}-${BINDMODE}" + + # Load the module, with the cmdline set + modprobe netconsole "${CMDLINE}" + + # Listed for netconsole port inside the namespace and destination + # interface + listen_port_and_save_to "${OUTPUT_FILE}" & + # Wait for socat to start and listen to the port. + wait_local_port_listen "${NAMESPACE}" "${PORT}" udp + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + # Make sure the message was received in the dst part + # and exit + validate_msg "${OUTPUT_FILE}" + + # kill socat in case it is still running + pkill_socat + # Unload the module + rmmod netconsole + echo "${BINDMODE} : Test passed" >&2 +done + +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netconsole/netcons_fragmented_msg.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_fragmented_msg.sh new file mode 100755 index 000000000000..0dc7280c3080 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netconsole/netcons_fragmented_msg.sh @@ -0,0 +1,122 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# Test netconsole's message fragmentation functionality. +# +# When a message exceeds the maximum packet size, netconsole splits it into +# multiple fragments for transmission. This test verifies: +# - Correct fragmentation of large messages +# - Proper reassembly of fragments at the receiver +# - Preservation of userdata across fragments +# - Behavior with and without kernel release version appending +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# set userdata to a long value. In this case, it is "1-2-3-4...50-" +USERDATA_VALUE=$(printf -- '%.2s-' {1..60}) + +# Convert the header string in a regexp, so, we can remove +# the second header as well. +# A header looks like "13,468,514729715,-,ncfrag=0/1135;". If +# release is appended, you might find something like:L +# "6.13.0-04048-g4f561a87745a,13,468,514729715,-,ncfrag=0/1135;" +function header_to_regex() { + # header is everything before ; + local HEADER="${1}" + REGEX=$(echo "${HEADER}" | cut -d'=' -f1) + echo "${REGEX}=[0-9]*\/[0-9]*;" +} + +# We have two headers in the message. Remove both to get the full message, +# and extract the full message. +function extract_msg() { + local MSGFILE="${1}" + # Extract the header, which is the very first thing that arrives in the + # first list. + HEADER=$(sed -n '1p' "${MSGFILE}" | cut -d';' -f1) + HEADER_REGEX=$(header_to_regex "${HEADER}") + + # Remove the two headers from the received message + # This will return the message without any header, similarly to what + # was sent. + sed "s/""${HEADER_REGEX}""//g" "${MSGFILE}" +} + +# Validate the message, which has two messages glued together. +# unwrap them to make sure all the characters were transmitted. +# File will look like the following: +# 13,468,514729715,-,ncfrag=0/1135;<message> +# key=<part of key>-13,468,514729715,-,ncfrag=967/1135;<rest of the key> +function validate_fragmented_result() { + # Discard the netconsole headers, and assemble the full message + RCVMSG=$(extract_msg "${1}") + + # check for the main message + if ! echo "${RCVMSG}" | grep -q "${MSG}"; then + echo "Message body doesn't match." >&2 + echo "msg received=" "${RCVMSG}" >&2 + exit "${ksft_fail}" + fi + + # check userdata + if ! echo "${RCVMSG}" | grep -q "${USERDATA_VALUE}"; then + echo "message userdata doesn't match" >&2 + echo "msg received=" "${RCVMSG}" >&2 + exit "${ksft_fail}" + fi + # test passed. hooray +} + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create a dynamic target for netconsole +create_dynamic_target +# Set userdata "key" with the "value" value +set_user_data + + +# TEST 1: Send message and userdata. They will fragment +# ======= +MSG=$(printf -- 'MSG%.3s=' {1..150}) + +# Listen for netconsole port inside the namespace and destination interface +listen_port_and_save_to "${OUTPUT_FILE}" & +# Wait for socat to start and listen to the port. +wait_local_port_listen "${NAMESPACE}" "${PORT}" udp +# Send the message +echo "${MSG}: ${TARGET}" > /dev/kmsg +# Wait until socat saves the file to disk +busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +# Check if the message was not corrupted +validate_fragmented_result "${OUTPUT_FILE}" + +# TEST 2: Test with smaller message, and without release appended +# ======= +MSG=$(printf -- 'FOOBAR%.3s=' {1..100}) +# Let's disable release and test again. +disable_release_append + +listen_port_and_save_to "${OUTPUT_FILE}" & +wait_local_port_listen "${NAMESPACE}" "${PORT}" udp +echo "${MSG}: ${TARGET}" > /dev/kmsg +busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +validate_fragmented_result "${OUTPUT_FILE}" +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netcons_overflow.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_overflow.sh index 29bad56448a2..a8e43d08c166 100755 --- a/tools/testing/selftests/drivers/net/netcons_overflow.sh +++ b/tools/testing/selftests/drivers/net/netconsole/netcons_overflow.sh @@ -13,9 +13,9 @@ set -euo pipefail SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") -source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh # This is coming from netconsole code. Check for it in drivers/net/netconsole.c -MAX_USERDATA_ITEMS=16 +MAX_USERDATA_ITEMS=256 # Function to create userdata entries function create_userdata_max_entries() { diff --git a/tools/testing/selftests/drivers/net/netconsole/netcons_resume.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_resume.sh new file mode 100755 index 000000000000..cb59cf436dd0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netconsole/netcons_resume.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# This test validates that netconsole is able to resume a target that was +# deactivated when its interface was removed when the interface is brought +# back up. +# +# The test configures a netconsole target and then removes netdevsim module to +# cause the interface to disappear. Targets are configured via cmdline to ensure +# targets bound by interface name and mac address can be resumed. +# The test verifies that the target moved to disabled state before adding +# netdevsim and the interface back. +# +# Finally, the test verifies that the target is re-enabled automatically and +# the message is received on the destination interface. +# +# Author: Andre Carvalho <asantostc@gmail.com> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh + +SAVED_SRCMAC="" # to be populated later +SAVED_DSTMAC="" # to be populated later + +modprobe netdevsim 2> /dev/null || true +rmmod netconsole 2> /dev/null || true + +check_netconsole_module + +function cleanup() { + cleanup_netcons "${NETCONS_CONFIGFS}/cmdline0" + do_cleanup + rmmod netconsole +} + +function trigger_reactivation() { + # Add back low level module + modprobe netdevsim + # Recreate namespace and two interfaces + set_network + # Restore MACs + ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" \ + address "${SAVED_DSTMAC}" + if [ "${BINDMODE}" == "mac" ]; then + ip link set dev "${SRCIF}" down + ip link set dev "${SRCIF}" address "${SAVED_SRCMAC}" + # Rename device in order to trigger target resume, as initial + # when device was recreated it didn't have correct mac address. + ip link set dev "${SRCIF}" name "${TARGET}" + fi +} + +function trigger_deactivation() { + # Start by storing mac addresses so we can be restored in reactivate + SAVED_DSTMAC=$(ip netns exec "${NAMESPACE}" \ + cat /sys/class/net/"$DSTIF"/address) + SAVED_SRCMAC=$(mac_get "${SRCIF}") + # Remove low level module + rmmod netdevsim +} + +trap cleanup EXIT + +# Run the test twice, with different cmdline parameters +for BINDMODE in "ifname" "mac" +do + echo "Running with bind mode: ${BINDMODE}" >&2 + # Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) + echo "6 5" > /proc/sys/kernel/printk + + # Create one namespace and two interfaces + set_network + + # Create the command line for netconsole, with the configuration from + # the function above + CMDLINE=$(create_cmdline_str "${BINDMODE}") + + # The content of kmsg will be save to the following file + OUTPUT_FILE="/tmp/${TARGET}-${BINDMODE}" + + # Load the module, with the cmdline set + modprobe netconsole "${CMDLINE}" + # Expose cmdline target in configfs + mkdir "${NETCONS_CONFIGFS}/cmdline0" + + # Target should be enabled + wait_target_state "cmdline0" "enabled" + + # Trigger deactivation by unloading netdevsim module. Target should be + # disabled. + trigger_deactivation + wait_target_state "cmdline0" "disabled" + + # Trigger reactivation by loading netdevsim, recreating the network and + # restoring mac addresses. Target should be re-enabled. + trigger_reactivation + wait_target_state "cmdline0" "enabled" + + # Listen for netconsole port inside the namespace and destination + # interface + listen_port_and_save_to "${OUTPUT_FILE}" & + # Wait for socat to start and listen to the port. + wait_local_port_listen "${NAMESPACE}" "${PORT}" udp + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + # Make sure the message was received in the dst part + # and exit + validate_msg "${OUTPUT_FILE}" + + # kill socat in case it is still running + pkill_socat + # Cleanup & unload the module + cleanup + + echo "${BINDMODE} : Test passed" >&2 +done + +trap - EXIT +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/netconsole/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_sysdata.sh new file mode 100755 index 000000000000..3fb8c4afe3d2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netconsole/netcons_sysdata.sh @@ -0,0 +1,272 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# A test that makes sure that sysdata runtime CPU data is properly set +# when a message is sent. +# +# There are 3 different tests, every time sent using a random CPU. +# - Test #1 +# * Only enable cpu_nr sysdata feature. +# - Test #2 +# * Keep cpu_nr sysdata feature enable and enable userdata. +# - Test #3 +# * keep userdata enabled, and disable sysdata cpu_nr feature. +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh + +# Enable the sysdata cpu_nr feature +function set_cpu_nr() { + if [[ ! -f "${NETCONS_PATH}/userdata/cpu_nr_enabled" ]] + then + echo "Populate CPU configfs path not available in ${NETCONS_PATH}/userdata/cpu_nr_enabled" >&2 + exit "${ksft_skip}" + fi + + echo 1 > "${NETCONS_PATH}/userdata/cpu_nr_enabled" +} + +# Enable the taskname to be appended to sysdata +function set_taskname() { + if [[ ! -f "${NETCONS_PATH}/userdata/taskname_enabled" ]] + then + echo "Not able to enable taskname sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/taskname_enabled" >&2 + exit "${ksft_skip}" + fi + + echo 1 > "${NETCONS_PATH}/userdata/taskname_enabled" +} + +# Enable the release to be appended to sysdata +function set_release() { + if [[ ! -f "${NETCONS_PATH}/userdata/release_enabled" ]] + then + echo "Not able to enable release sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/release_enabled" >&2 + exit "${ksft_skip}" + fi + + echo 1 > "${NETCONS_PATH}/userdata/release_enabled" +} + +# Enable the msgid to be appended to sysdata +function set_msgid() { + if [[ ! -f "${NETCONS_PATH}/userdata/msgid_enabled" ]] + then + echo "Not able to enable msgid sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/msgid_enabled" >&2 + exit "${ksft_skip}" + fi + + echo 1 > "${NETCONS_PATH}/userdata/msgid_enabled" +} + +# Disable the sysdata cpu_nr feature +function unset_cpu_nr() { + echo 0 > "${NETCONS_PATH}/userdata/cpu_nr_enabled" +} + +# Once called, taskname=<..> will not be appended anymore +function unset_taskname() { + echo 0 > "${NETCONS_PATH}/userdata/taskname_enabled" +} + +function unset_release() { + echo 0 > "${NETCONS_PATH}/userdata/release_enabled" +} + +function unset_msgid() { + echo 0 > "${NETCONS_PATH}/userdata/msgid_enabled" +} + +# Test if MSG contains sysdata +function validate_sysdata() { + # OUTPUT_FILE will contain something like: + # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM + # userdatakey=userdatavalue + # cpu=X + # taskname=<taskname> + # msgid=<id> + + # Echo is what this test uses to create the message. See runtest() + # function + SENDER="echo" + + if [ ! -f "$OUTPUT_FILE" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then + echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + # Check if cpu=XX exists in the file and matches the one used + # in taskset(1) + if ! grep -q "cpu=${CPU}\+" "${OUTPUT_FILE}"; then + echo "FAIL: 'cpu=${CPU}' not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "taskname=${SENDER}" "${OUTPUT_FILE}"; then + echo "FAIL: 'taskname=echo' not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "msgid=[0-9]\+$" "${OUTPUT_FILE}"; then + echo "FAIL: 'msgid=<id>' not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + rm "${OUTPUT_FILE}" + pkill_socat +} + +function validate_release() { + RELEASE=$(uname -r) + + if [ ! -f "$OUTPUT_FILE" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "release=${RELEASE}" "${OUTPUT_FILE}"; then + echo "FAIL: 'release=${RELEASE}' not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi +} + +# Test if MSG content exists in OUTPUT_FILE but no `cpu=` and `taskname=` +# strings +function validate_no_sysdata() { + if [ ! -f "$OUTPUT_FILE" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then + echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + if grep -q "cpu=" "${OUTPUT_FILE}"; then + echo "FAIL: 'cpu= found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + if grep -q "taskname=" "${OUTPUT_FILE}"; then + echo "FAIL: 'taskname= found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + if grep -q "release=" "${OUTPUT_FILE}"; then + echo "FAIL: 'release= found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + if grep -q "msgid=" "${OUTPUT_FILE}"; then + echo "FAIL: 'msgid= found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + rm "${OUTPUT_FILE}" +} + +# Start socat, send the message and wait for the file to show up in the file +# system +function runtest { + # Listen for netconsole port inside the namespace and destination + # interface + listen_port_and_save_to "${OUTPUT_FILE}" & + # Wait for socat to start and listen to the port. + wait_local_port_listen "${NAMESPACE}" "${PORT}" udp + # Send the message + taskset -c "${CPU}" echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +} + +# ========== # +# Start here # +# ========== # + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# Check for basic system dependency and exit if not found +check_for_dependencies +# This test also depends on taskset(1). Check for it before starting the test +check_for_taskset + +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create a dynamic target for netconsole +create_dynamic_target + +#==================================================== +# TEST #1 +# Send message from a random CPU +#==================================================== +# Random CPU in the system +CPU=$((RANDOM % $(nproc))) +OUTPUT_FILE="/tmp/${TARGET}_1" +MSG="Test #1 from CPU${CPU}" +# Enable the auto population of cpu_nr +set_cpu_nr +# Enable taskname to be appended to sysdata +set_taskname +set_release +set_msgid +runtest +# Make sure the message was received in the dst part +# and exit +validate_release +validate_sysdata + +#==================================================== +# TEST #2 +# This test now adds userdata together with sysdata +# =================================================== +# Get a new random CPU +CPU=$((RANDOM % $(nproc))) +OUTPUT_FILE="/tmp/${TARGET}_2" +MSG="Test #2 from CPU${CPU}" +set_user_data +runtest +validate_release +validate_sysdata + +# =================================================== +# TEST #3 +# Unset all sysdata, fail if any userdata is set +# =================================================== +CPU=$((RANDOM % $(nproc))) +OUTPUT_FILE="/tmp/${TARGET}_3" +MSG="Test #3 from CPU${CPU}" +unset_cpu_nr +unset_taskname +unset_release +unset_msgid +runtest +# At this time, cpu= shouldn't be present in the msg +validate_no_sysdata + +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netconsole/netcons_torture.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_torture.sh new file mode 100755 index 000000000000..33a44adb6f8f --- /dev/null +++ b/tools/testing/selftests/drivers/net/netconsole/netcons_torture.sh @@ -0,0 +1,130 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# Repeatedly send kernel messages, toggles netconsole targets on and off, +# creates and deletes targets in parallel, and toggles the source interface to +# simulate stress conditions. +# +# This test aims to verify the robustness of netconsole under dynamic +# configurations and concurrent operations. +# +# The major goal is to run this test with LOCKDEP, Kmemleak and KASAN to make +# sure no issues is reported. +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh + +# Number of times the main loop run +ITERATIONS=${1:-150} + +# Only test extended format +FORMAT="extended" +# And ipv6 only +IP_VERSION="ipv6" + +# Create, enable and delete some targets. +create_and_delete_random_target() { + COUNT=2 + RND_PREFIX=$(mktemp -u netcons_rnd_XXXX_) + + if [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}${COUNT}" ] || \ + [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}0" ]; then + echo "Function didn't finish yet, skipping it." >&2 + return + fi + + # enable COUNT targets + for i in $(seq ${COUNT}) + do + RND_TARGET="${RND_PREFIX}"${i} + RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}" + + # Basic population so the target can come up + _create_dynamic_target "${FORMAT}" "${RND_TARGET_PATH}" + done + + echo "netconsole selftest: ${COUNT} additional targets were created" > /dev/kmsg + # disable them all + for i in $(seq ${COUNT}) + do + RND_TARGET="${RND_PREFIX}"${i} + RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}" + if [[ $(cat "${RND_TARGET_PATH}/enabled") -eq 1 ]] + then + echo 0 > "${RND_TARGET_PATH}"/enabled + fi + rmdir "${RND_TARGET_PATH}" + done +} + +# Disable and enable the target mid-air, while messages +# are being transmitted. +toggle_netcons_target() { + for i in $(seq 2) + do + if [ ! -d "${NETCONS_PATH}" ] + then + break + fi + echo 0 > "${NETCONS_PATH}"/enabled 2> /dev/null || true + # Try to enable a bit harder, given it might fail to enable + # Write to `enabled` might fail depending on the lock, which is + # highly contentious here + for _ in $(seq 5) + do + echo 1 > "${NETCONS_PATH}"/enabled 2> /dev/null || true + done + done +} + +toggle_iface(){ + ip link set "${SRCIF}" down + ip link set "${SRCIF}" up +} + +# Start here + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network "${IP_VERSION}" +# Create a dynamic target for netconsole +create_dynamic_target "${FORMAT}" + +for i in $(seq "$ITERATIONS") +do + for _ in $(seq 10) + do + echo "${MSG}: ${TARGET} ${i}" > /dev/kmsg + done + wait + + if (( i % 30 == 0 )); then + toggle_netcons_target & + fi + + if (( i % 50 == 0 )); then + # create some targets, enable them, send msg and disable + # all in a parallel thread + create_and_delete_random_target & + fi + + if (( i % 70 == 0 )); then + toggle_iface & + fi +done +wait + +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile index 07b7c46d3311..9808c2fbae9e 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/Makefile +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -1,21 +1,25 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT -TEST_PROGS = devlink.sh \ +TEST_PROGS := \ + devlink.sh \ devlink_in_netns.sh \ devlink_trap.sh \ ethtool-coalesce.sh \ ethtool-features.sh \ ethtool-fec.sh \ ethtool-pause.sh \ - ethtool-ring.sh \ fib.sh \ fib_notifications.sh \ hw_stats_l3.sh \ - macsec-offload.sh \ nexthop.sh \ peer.sh \ psample.sh \ tc-mq-visibility.sh \ udp_tunnel_nic.sh \ +# end of TEST_PROGS + +TEST_FILES := \ + ethtool-common.sh +# end of TEST_FILES include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index b5ea2526f23c..22a626c6cde3 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -3,8 +3,10 @@ lib_dir=$(dirname $0)/../../../net/forwarding -ALL_TESTS="fw_flash_test params_test regions_test reload_test \ - netns_reload_test resource_test dev_info_test \ +ALL_TESTS="fw_flash_test params_test \ + params_default_test regions_test reload_test \ + netns_reload_test resource_test resource_dump_test \ + port_resource_doit_test dev_info_test \ empty_reporter_test dummy_reporter_test rate_test" NUM_NETIFS=0 source $lib_dir/lib.sh @@ -40,6 +42,8 @@ fw_flash_test() return fi + echo "10"> $DEBUGFS_DIR/fw_update_flash_chunk_time_ms + devlink dev flash $DL_HANDLE file $DUMMYFILE check_err $? "Failed to flash with status updates on" @@ -76,17 +80,28 @@ fw_flash_test() param_get() { local name=$1 + local attr=${2:-value} + local cmode=${3:-driverinit} cmd_jq "devlink dev param show $DL_HANDLE name $name -j" \ - '.[][][].values[] | select(.cmode == "driverinit").value' + '.[][][].values[] | select(.cmode == "'"$cmode"'").'"$attr" } param_set() { local name=$1 local value=$2 + local cmode=${3:-driverinit} + + devlink dev param set $DL_HANDLE name $name cmode $cmode value $value +} + +param_set_default() +{ + local name=$1 + local cmode=${2:-driverinit} - devlink dev param set $DL_HANDLE name $name cmode driverinit value $value + devlink dev param set $DL_HANDLE name $name default cmode $cmode } check_value() @@ -95,12 +110,18 @@ check_value() local phase_name=$2 local expected_param_value=$3 local expected_debugfs_value=$4 + local cmode=${5:-driverinit} local value + local attr="value" - value=$(param_get $name) - check_err $? "Failed to get $name param value" + if [[ "$phase_name" == *"default"* ]]; then + attr="default" + fi + + value=$(param_get $name $attr $cmode) + check_err $? "Failed to get $name param $attr" [ "$value" == "$expected_param_value" ] - check_err $? "Unexpected $phase_name $name param value" + check_err $? "Unexpected $phase_name $name param $attr" value=$(<$DEBUGFS_DIR/$name) check_err $? "Failed to get $name debugfs value" [ "$value" == "$expected_debugfs_value" ] @@ -133,6 +154,92 @@ params_test() log_test "params test" } +value_to_debugfs() +{ + local value=$1 + + case "$value" in + true) + echo "Y" + ;; + false) + echo "N" + ;; + *) + echo "$value" + ;; + esac +} + +test_default() +{ + local param_name=$1 + local new_value=$2 + local expected_default=$3 + local cmode=${4:-driverinit} + local default_debugfs + local new_debugfs + local expected_debugfs + + default_debugfs=$(value_to_debugfs $expected_default) + new_debugfs=$(value_to_debugfs $new_value) + + expected_debugfs=$default_debugfs + check_value $param_name initial-default $expected_default $expected_debugfs $cmode + + param_set $param_name $new_value $cmode + check_err $? "Failed to set $param_name to $new_value" + + expected_debugfs=$([ "$cmode" == "runtime" ] && echo "$new_debugfs" || echo "$default_debugfs") + check_value $param_name post-set $new_value $expected_debugfs $cmode + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload device" + + expected_debugfs=$new_debugfs + check_value $param_name post-reload-new-value $new_value $expected_debugfs $cmode + + param_set_default $param_name $cmode + check_err $? "Failed to set $param_name to default" + + expected_debugfs=$([ "$cmode" == "runtime" ] && echo "$default_debugfs" || echo "$new_debugfs") + check_value $param_name post-set-default $expected_default $expected_debugfs $cmode + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload device" + + expected_debugfs=$default_debugfs + check_value $param_name post-reload-default $expected_default $expected_debugfs $cmode +} + +params_default_test() +{ + RET=0 + + if ! devlink dev param help 2>&1 | grep -q "value VALUE | default"; then + echo "SKIP: devlink cli missing default feature" + return + fi + + # Remove side effects of previous tests. Use plain param_set, because + # param_set_default is a feature under test here. + param_set max_macs 32 driverinit + check_err $? "Failed to reset max_macs to default value" + param_set test1 true driverinit + check_err $? "Failed to reset test1 to default value" + param_set test2 1234 runtime + check_err $? "Failed to reset test2 to default value" + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload device for clean state" + + test_default max_macs 16 32 driverinit + test_default test1 false true driverinit + test_default test2 100 1234 runtime + + log_test "params default test" +} + check_region_size() { local name=$1 @@ -376,6 +483,56 @@ resource_test() log_test "resource test" } +resource_dump_test() +{ + RET=0 + + local port_jq + local dev_jq + local dl_jq + local count + + dl_jq="with_entries(select(.key | startswith(\"$DL_HANDLE\")))" + port_jq="[.[] | $dl_jq | keys |" + port_jq+=" map(select(test(\"/.+/\"))) | length] | add" + dev_jq="[.[] | $dl_jq | keys |" + dev_jq+=" map(select(test(\"/.+/\")|not)) | length] | add" + + if ! devlink resource help 2>&1 | grep -q "scope"; then + echo "SKIP: devlink resource show not supported" + return + fi + + devlink resource show > /dev/null 2>&1 + check_err $? "Failed to dump all resources" + + count=$(cmd_jq "devlink resource show -j" "$port_jq") + [ "$count" -gt "0" ] + check_err $? "missing port resources in resource dump" + + count=$(cmd_jq "devlink resource show -j" "$dev_jq") + [ "$count" -gt "0" ] + check_err $? "missing device resources in resource dump" + + count=$(cmd_jq "devlink resource show scope dev -j" "$dev_jq") + [ "$count" -gt "0" ] + check_err $? "dev scope missing device resources" + + count=$(cmd_jq "devlink resource show scope dev -j" "$port_jq") + [ "$count" -eq "0" ] + check_err $? "dev scope returned port resources" + + count=$(cmd_jq "devlink resource show scope port -j" "$port_jq") + [ "$count" -gt "0" ] + check_err $? "port scope missing port resources" + + count=$(cmd_jq "devlink resource show scope port -j" "$dev_jq") + [ "$count" -eq "0" ] + check_err $? "port scope returned device resources" + + log_test "resource dump test" +} + info_get() { local name=$1 @@ -608,6 +765,46 @@ rate_attr_parent_check() check_err $? "Unexpected parent attr value $api_value != $parent" } +rate_attr_tc_bw_check() +{ + local handle=$1 + local tc_bw=$2 + local debug_file=$3 + + local tc_bw_str="" + for bw in $tc_bw; do + local tc=${bw%%:*} + local value=${bw##*:} + tc_bw_str="$tc_bw_str $tc:$value" + done + tc_bw_str=${tc_bw_str# } + + rate_attr_set "$handle" tc-bw "$tc_bw_str" + check_err $? "Failed to set tc-bw values" + + for bw in $tc_bw; do + local tc=${bw%%:*} + local value=${bw##*:} + local debug_value + debug_value=$(cat "$debug_file"/tc"${tc}"_bw) + check_err $? "Failed to read tc-bw value from debugfs for tc$tc" + [ "$debug_value" == "$value" ] + check_err $? "Unexpected tc-bw debug value for tc$tc: $debug_value != $value" + done + + for bw in $tc_bw; do + local tc=${bw%%:*} + local expected_value=${bw##*:} + local api_value + api_value=$(rate_attr_get "$handle" tc_"$tc") + if [ "$api_value" = "null" ]; then + api_value=0 + fi + [ "$api_value" == "$expected_value" ] + check_err $? "Unexpected tc-bw value for tc$tc: $api_value != $expected_value" + done +} + rate_node_add() { local handle=$1 @@ -622,6 +819,32 @@ rate_node_del() devlink port function rate del $handle } +port_resource_doit_test() +{ + RET=0 + + local port_handle="${DL_HANDLE}/0" + local name + local size + + if ! devlink resource help 2>&1 | grep -q "PORT_INDEX"; then + echo "SKIP: devlink resource show with port not supported" + return + fi + + name=$(cmd_jq "devlink resource show $port_handle -j" \ + '.[][][].name') + [ "$name" == "test_resource" ] + check_err $? "wrong port resource name (got $name)" + + size=$(cmd_jq "devlink resource show $port_handle -j" \ + '.[][][].size') + [ "$size" == "20" ] + check_err $? "wrong port resource size (got $size)" + + log_test "port resource doit test" +} + rate_test() { RET=0 @@ -649,6 +872,13 @@ rate_test() rate=$(($rate+100)) done + local tc_bw="0:0 1:40 2:0 3:0 4:0 5:0 6:60 7:0" + for r_obj in $leafs + do + rate_attr_tc_bw_check "$r_obj" "$tc_bw" \ + "$DEBUGFS_DIR"/ports/"${r_obj##*/}" + done + local node1_name='group1' local node1="$DL_HANDLE/$node1_name" rate_node_add "$node1" @@ -666,6 +896,12 @@ rate_test() rate_attr_tx_rate_check $node1 tx_max $node_tx_max \ $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max + + local tc_bw="0:20 1:0 2:0 3:0 4:0 5:20 6:60 7:0" + rate_attr_tc_bw_check $node1 "$tc_bw" \ + "$DEBUGFS_DIR"/rate_nodes/"${node1##*/}" + + rate_node_del "$node1" check_err $? "Failed to delete node $node1" local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w` diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh deleted file mode 100755 index c969559ffa7a..000000000000 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0-only - -source ethtool-common.sh - -function get_value { - local query="${SETTINGS_MAP[$1]}" - - echo $(ethtool -g $NSIM_NETDEV | \ - tail -n +$CURR_SETT_LINE | \ - awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[\t ]/, "", $2); print $2}') -} - -function update_current_settings { - for key in ${!SETTINGS_MAP[@]}; do - CURRENT_SETTINGS[$key]=$(get_value $key) - done - echo ${CURRENT_SETTINGS[@]} -} - -if ! ethtool -h | grep -q set-ring >/dev/null; then - echo "SKIP: No --set-ring support in ethtool" - exit 4 -fi - -NSIM_NETDEV=$(make_netdev) - -set -o pipefail - -declare -A SETTINGS_MAP=( - ["rx"]="RX" - ["rx-mini"]="RX Mini" - ["rx-jumbo"]="RX Jumbo" - ["tx"]="TX" -) - -declare -A EXPECTED_SETTINGS=( - ["rx"]="" - ["rx-mini"]="" - ["rx-jumbo"]="" - ["tx"]="" -) - -declare -A CURRENT_SETTINGS=( - ["rx"]="" - ["rx-mini"]="" - ["rx-jumbo"]="" - ["tx"]="" -) - -MAX_VALUE=$((RANDOM % $((2**32-1)))) -RING_MAX_LIST=$(ls $NSIM_DEV_DFS/ethtool/ring/) - -for ring_max_entry in $RING_MAX_LIST; do - echo $MAX_VALUE > $NSIM_DEV_DFS/ethtool/ring/$ring_max_entry -done - -CURR_SETT_LINE=$(ethtool -g $NSIM_NETDEV | grep -i -m1 -n 'Current hardware settings' | cut -f1 -d:) - -# populate the expected settings map -for key in ${!SETTINGS_MAP[@]}; do - EXPECTED_SETTINGS[$key]=$(get_value $key) -done - -# test -for key in ${!SETTINGS_MAP[@]}; do - value=$((RANDOM % $MAX_VALUE)) - - ethtool -G $NSIM_NETDEV "$key" "$value" - - EXPECTED_SETTINGS[$key]="$value" - expected=${EXPECTED_SETTINGS[@]} - current=$(update_current_settings) - - check $? "$current" "$expected" - set +x -done - -if [ $num_errors -eq 0 ]; then - echo "PASSED all $((num_passes)) checks" - exit 0 -else - echo "FAILED $num_errors/$((num_errors+num_passes)) checks" - exit 1 -fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh b/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh deleted file mode 100755 index 98033e6667d2..000000000000 --- a/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh +++ /dev/null @@ -1,117 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0-only - -source ethtool-common.sh - -NSIM_NETDEV=$(make_netdev) -MACSEC_NETDEV=macsec_nsim - -set -o pipefail - -if ! ethtool -k $NSIM_NETDEV | grep -q 'macsec-hw-offload: on'; then - echo "SKIP: netdevsim doesn't support MACsec offload" - exit 4 -fi - -if ! ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec offload mac 2>/dev/null; then - echo "SKIP: couldn't create macsec device" - exit 4 -fi -ip link del $MACSEC_NETDEV - -# -# test macsec offload API -# - -ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}" type macsec port 4 offload mac -check $? - -ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}2" type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac -check $? - -ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}3" type macsec sci abbacdde01020304 offload mac -check $? - -ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}4" type macsec port 8 offload mac 2> /dev/null -check $? '' '' 1 - -ip macsec add "${MACSEC_NETDEV}" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012 -check $? - -ip macsec add "${MACSEC_NETDEV}" rx port 1234 address "1c:ed:de:ad:be:ef" -check $? - -ip macsec add "${MACSEC_NETDEV}" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \ - key 00 0123456789abcdef0123456789abcdef -check $? - -ip macsec add "${MACSEC_NETDEV}" rx port 1235 address "1c:ed:de:ad:be:ef" 2> /dev/null -check $? '' '' 1 - -# can't disable macsec offload when SAs are configured -ip link set "${MACSEC_NETDEV}" type macsec offload off 2> /dev/null -check $? '' '' 1 - -ip macsec offload "${MACSEC_NETDEV}" off 2> /dev/null -check $? '' '' 1 - -# toggle macsec offload via rtnetlink -ip link set "${MACSEC_NETDEV}2" type macsec offload off -check $? - -ip link set "${MACSEC_NETDEV}2" type macsec offload mac -check $? - -# toggle macsec offload via genetlink -ip macsec offload "${MACSEC_NETDEV}2" off -check $? - -ip macsec offload "${MACSEC_NETDEV}2" mac -check $? - -for dev in ${MACSEC_NETDEV}{,2,3} ; do - ip link del $dev - check $? -done - - -# -# test ethtool features when toggling offload -# - -ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec offload mac -TMP_FEATS_ON_1="$(ethtool -k $MACSEC_NETDEV)" - -ip link set $MACSEC_NETDEV type macsec offload off -TMP_FEATS_OFF_1="$(ethtool -k $MACSEC_NETDEV)" - -ip link set $MACSEC_NETDEV type macsec offload mac -TMP_FEATS_ON_2="$(ethtool -k $MACSEC_NETDEV)" - -[ "$TMP_FEATS_ON_1" = "$TMP_FEATS_ON_2" ] -check $? - -ip link del $MACSEC_NETDEV - -ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec -check $? - -TMP_FEATS_OFF_2="$(ethtool -k $MACSEC_NETDEV)" -[ "$TMP_FEATS_OFF_1" = "$TMP_FEATS_OFF_2" ] -check $? - -ip link set $MACSEC_NETDEV type macsec offload mac -check $? - -TMP_FEATS_ON_3="$(ethtool -k $MACSEC_NETDEV)" -[ "$TMP_FEATS_ON_1" = "$TMP_FEATS_ON_3" ] -check $? - - -if [ $num_errors -eq 0 ]; then - echo "PASSED all $((num_passes)) checks" - exit 0 -else - echo "FAILED $num_errors/$((num_errors+num_passes)) checks" - exit 1 -fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh index e8e0dc088d6a..01d0c044a5fc 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh @@ -1053,6 +1053,6 @@ trap cleanup EXIT setup_prepare -tests_run +xfail_on_slow tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh index aed62d9e6c0a..f4721f7636dd 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/peer.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh @@ -1,7 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0-only -source ../../../net/net_helper.sh +lib_dir=$(dirname $0)/../../../net +source $lib_dir/lib.sh NSIM_DEV_1_ID=$((256 + RANDOM % 256)) NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID @@ -51,6 +52,39 @@ cleanup_ns() ip netns del nssv } +is_carrier_up() +{ + local netns="$1" + local nsim_dev="$2" + + test "$(ip netns exec "$netns" \ + cat /sys/class/net/"$nsim_dev"/carrier 2>/dev/null)" -eq 1 +} + +assert_carrier_up() +{ + local netns="$1" + local nsim_dev="$2" + + if ! is_carrier_up "$netns" "$nsim_dev"; then + echo "$nsim_dev's carrier should be UP, but it isn't" + cleanup_ns + exit 1 + fi +} + +assert_carrier_down() +{ + local netns="$1" + local nsim_dev="$2" + + if is_carrier_up "$netns" "$nsim_dev"; then + echo "$nsim_dev's carrier should be DOWN, but it isn't" + cleanup_ns + exit 1 + fi +} + ### ### Code start ### @@ -112,6 +146,32 @@ if [ $? -eq 0 ]; then exit 1 fi +# netdevsim carrier state consistency checking +assert_carrier_up nssv "$NSIM_DEV_1_NAME" +assert_carrier_up nscl "$NSIM_DEV_2_NAME" + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > "$NSIM_DEV_SYS_UNLINK" + +assert_carrier_down nssv "$NSIM_DEV_1_NAME" +assert_carrier_down nscl "$NSIM_DEV_2_NAME" + +ip netns exec nssv ip link set dev "$NSIM_DEV_1_NAME" down +ip netns exec nssv ip link set dev "$NSIM_DEV_1_NAME" up + +assert_carrier_down nssv "$NSIM_DEV_1_NAME" +assert_carrier_down nscl "$NSIM_DEV_2_NAME" + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK + +assert_carrier_up nssv "$NSIM_DEV_1_NAME" +assert_carrier_up nscl "$NSIM_DEV_2_NAME" + +ip netns exec nssv ip link set dev "$NSIM_DEV_1_NAME" down +ip netns exec nssv ip link set dev "$NSIM_DEV_1_NAME" up + +assert_carrier_up nssv "$NSIM_DEV_1_NAME" +assert_carrier_up nscl "$NSIM_DEV_2_NAME" + # send/recv packets tmp_file=$(mktemp) diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index 92c2f0376c08..4c859ecdad94 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -266,7 +266,6 @@ for port in 0 1; do echo $NSIM_ID > /sys/bus/netdevsim/new_device else echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep echo 1 > $NSIM_DEV_SYS/new_port fi NSIM_NETDEV=`get_netdev_name old_netdevs` @@ -350,23 +349,11 @@ old_netdevs=$(ls /sys/class/net) port=0 echo $NSIM_ID > /sys/bus/netdevsim/new_device echo 0 > $NSIM_DEV_SYS/del_port -echo 1000 > $NSIM_DEV_DFS/udp_ports_sleep echo 0 > $NSIM_DEV_SYS/new_port NSIM_NETDEV=`get_netdev_name old_netdevs` msg="create VxLANs" -exp0=( 0 0 0 0 ) # sleep is longer than out wait -new_vxlan vxlan0 10000 $NSIM_NETDEV - -modprobe -r vxlan -modprobe -r udp_tunnel - -msg="remove tunnels" -exp0=( 0 0 0 0 ) -check_tables - -msg="create VxLANs" -exp0=( 0 0 0 0 ) # sleep is longer than out wait +exp0=( `mke 10000 1` 0 0 0 ) new_vxlan vxlan0 10000 $NSIM_NETDEV exp0=( 0 0 0 0 ) @@ -428,7 +415,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -486,7 +472,6 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -543,7 +528,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -573,7 +557,6 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -634,7 +617,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -690,7 +672,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -750,7 +731,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -809,7 +789,6 @@ echo $NSIM_ID > /sys/bus/netdevsim/new_device echo 0 > $NSIM_DEV_SYS/del_port echo 0 > $NSIM_DEV_DFS/udp_ports_open_only -echo 1 > $NSIM_DEV_DFS/udp_ports_sleep echo 1 > $NSIM_DEV_DFS/udp_ports_shared old_netdevs=$(ls /sys/class/net) diff --git a/tools/testing/selftests/drivers/net/netpoll_basic.py b/tools/testing/selftests/drivers/net/netpoll_basic.py new file mode 100755 index 000000000000..408bd54d6779 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netpoll_basic.py @@ -0,0 +1,396 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Author: Breno Leitao <leitao@debian.org> +""" + This test aims to evaluate the netpoll polling mechanism (as in + netpoll_poll_dev()). It presents a complex scenario where the network + attempts to send a packet but fails, prompting it to poll the NIC from within + the netpoll TX side. + + This has been a crucial path in netpoll that was previously untested. Jakub + suggested using a single RX/TX queue, pushing traffic to the NIC, and then + sending netpoll messages (via netconsole) to trigger the poll. + + In parallel, bpftrace is used to detect if netpoll_poll_dev() was called. If + so, the test passes, otherwise it will be skipped. This test is very dependent on + the driver and environment, given we are trying to trigger a tricky scenario. +""" + +import errno +import logging +import os +import random +import string +import threading +import time +from typing import Optional + +from lib.py import ( + bpftrace, + CmdExitFailure, + defer, + ethtool, + GenerateTraffic, + ksft_exit, + ksft_pr, + ksft_run, + KsftFailEx, + KsftSkipEx, + NetDrvEpEnv, + KsftXfailEx, +) + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) + +NETCONSOLE_CONFIGFS_PATH: str = "/sys/kernel/config/netconsole" +NETCONS_REMOTE_PORT: int = 6666 +NETCONS_LOCAL_PORT: int = 1514 + +# Max number of netcons messages to send. Each iteration will setup +# netconsole and send MAX_WRITES messages +ITERATIONS: int = 20 +# Number of writes to /dev/kmsg per iteration +MAX_WRITES: int = 40 +# MAPS contains the information coming from bpftrace it will have only one +# key: "hits", which tells the number of times netpoll_poll_dev() was called +MAPS: dict[str, int] = {} +# Thread to run bpftrace in parallel +BPF_THREAD: Optional[threading.Thread] = None +# Time bpftrace will be running in parallel. +BPFTRACE_TIMEOUT: int = 10 + + +def ethtool_get_ringsize(interface_name: str) -> tuple[int, int]: + """ + Read the ringsize using ethtool. This will be used to restore it after the test + """ + try: + ethtool_result = ethtool(f"-g {interface_name}", json=True)[0] + rxs = ethtool_result["rx"] + txs = ethtool_result["tx"] + except (KeyError, IndexError) as exception: + raise KsftSkipEx( + f"Failed to read RX/TX ringsize: {exception}. Not going to mess with them." + ) from exception + + return rxs, txs + + +def ethtool_set_ringsize(interface_name: str, ring_size: tuple[int, int]) -> bool: + """Try to the number of RX and TX ringsize.""" + rxs = ring_size[0] + txs = ring_size[1] + + logging.debug("Setting ring size to %d/%d", rxs, txs) + try: + ethtool(f"-G {interface_name} rx {rxs} tx {txs}") + except CmdExitFailure: + # This might fail on real device, retry with a higher value, + # worst case, keep it as it is. + return False + + return True + + +def ethtool_get_queues_cnt(interface_name: str) -> tuple[int, int, int]: + """Read the number of RX, TX and combined queues using ethtool""" + + try: + ethtool_result = ethtool(f"-l {interface_name}", json=True)[0] + rxq = ethtool_result.get("rx", -1) + txq = ethtool_result.get("tx", -1) + combined = ethtool_result.get("combined", -1) + + except IndexError as exception: + raise KsftSkipEx( + f"Failed to read queues numbers: {exception}. Not going to mess with them." + ) from exception + + return rxq, txq, combined + + +def ethtool_set_queues_cnt(interface_name: str, queues: tuple[int, int, int]) -> None: + """Set the number of RX, TX and combined queues using ethtool""" + rxq, txq, combined = queues + + cmdline = f"-L {interface_name}" + + if rxq != -1: + cmdline += f" rx {rxq}" + if txq != -1: + cmdline += f" tx {txq}" + if combined != -1: + cmdline += f" combined {combined}" + + logging.debug("calling: ethtool %s", cmdline) + + try: + ethtool(cmdline) + except CmdExitFailure as exception: + raise KsftSkipEx( + f"Failed to configure RX/TX queues: {exception}. Ethtool not available?" + ) from exception + + +def netcons_generate_random_target_name() -> str: + """Generate a random target name starting with 'netcons'""" + random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8)) + return f"netcons_{random_suffix}" + + +def netcons_create_target( + config_data: dict[str, str], + target_name: str, +) -> None: + """Create a netconsole dynamic target against the interfaces""" + logging.debug("Using netconsole name: %s", target_name) + try: + os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True) + logging.debug( + "Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name + ) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise KsftFailEx( + f"Failed to create netconsole target directory: {exception}" + ) from exception + + try: + for key, value in config_data.items(): + path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}" + logging.debug("Writing %s to %s", key, path) + with open(path, "w", encoding="utf-8") as file: + # Always convert to string to write to file + file.write(str(value)) + + # Read all configuration values for debugging purposes + for debug_key in config_data.keys(): + with open( + f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}", + "r", + encoding="utf-8", + ) as file: + content = file.read() + logging.debug( + "%s/%s/%s : %s", + NETCONSOLE_CONFIGFS_PATH, + target_name, + debug_key, + content.strip(), + ) + + except Exception as exception: + raise KsftFailEx( + f"Failed to configure netconsole target: {exception}" + ) from exception + + +def netcons_configure_target( + cfg: NetDrvEpEnv, interface_name: str, target_name: str +) -> None: + """Configure netconsole on the interface with the given target name""" + config_data = { + "extended": "1", + "dev_name": interface_name, + "local_port": NETCONS_LOCAL_PORT, + "remote_port": NETCONS_REMOTE_PORT, + "local_ip": cfg.addr, + "remote_ip": cfg.remote_addr, + "remote_mac": "00:00:00:00:00:00", # Not important for this test + "enabled": "1", + } + + netcons_create_target(config_data, target_name) + logging.debug( + "Created netconsole target: %s on interface %s", target_name, interface_name + ) + + +def netcons_delete_target(name: str) -> None: + """Delete a netconsole dynamic target""" + target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}" + try: + if os.path.exists(target_path): + os.rmdir(target_path) + except OSError as exception: + raise KsftFailEx( + f"Failed to delete netconsole target: {exception}" + ) from exception + + +def netcons_load_module() -> None: + """Try to load the netconsole module""" + os.system("modprobe netconsole") + + +def bpftrace_call() -> None: + """Call bpftrace to find how many times netpoll_poll_dev() is called. + Output is saved in the global variable `maps`""" + + # This is going to update the global variable, that will be seen by the + # main function + global MAPS # pylint: disable=W0603 + + # This will be passed to bpftrace as in bpftrace -e "expr" + expr = "kprobe:netpoll_poll_dev { @hits = count(); }" + + MAPS = bpftrace(expr, timeout=BPFTRACE_TIMEOUT, json=True) + logging.debug("BPFtrace output: %s", MAPS) + + +def bpftrace_start(): + """Start a thread to call `call_bpf` in a parallel thread""" + global BPF_THREAD # pylint: disable=W0603 + + BPF_THREAD = threading.Thread(target=bpftrace_call) + BPF_THREAD.start() + if not BPF_THREAD.is_alive(): + raise KsftSkipEx("BPFtrace thread is not alive. Skipping test") + + +def bpftrace_stop() -> None: + """Stop the bpftrace thread""" + if BPF_THREAD: + BPF_THREAD.join() + + +def bpftrace_any_hit(join: bool) -> bool: + """Check if netpoll_poll_dev() was called by checking the global variable `maps`""" + if not BPF_THREAD: + raise KsftFailEx("BPFtrace didn't start") + + if BPF_THREAD.is_alive(): + if join: + # Wait for bpftrace to finish + BPF_THREAD.join() + else: + # bpftrace is still running, so, we will not check the result yet + return False + + logging.debug("MAPS coming from bpftrace = %s", MAPS) + if "hits" not in MAPS.keys(): + raise KsftFailEx(f"bpftrace failed to run!?: {MAPS}") + + logging.debug("Got a total of %d hits", MAPS["hits"]) + return MAPS["hits"] > 0 + + +def do_netpoll_flush_monitored(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None: + """Print messages to the console, trying to trigger a netpoll poll""" + # Start bpftrace in parallel, so, it is watching + # netpoll_poll_dev() while we are sending netconsole messages + bpftrace_start() + defer(bpftrace_stop) + + do_netpoll_flush(cfg, ifname, target_name) + + if bpftrace_any_hit(join=True): + ksft_pr("netpoll_poll_dev() was called. Success") + return + + raise KsftXfailEx("netpoll_poll_dev() was not called during the test...") + + +def do_netpoll_flush(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None: + """Print messages to the console, trying to trigger a netpoll poll""" + netcons_configure_target(cfg, ifname, target_name) + retry = 0 + + for i in range(int(ITERATIONS)): + if not BPF_THREAD.is_alive() or bpftrace_any_hit(join=False): + # bpftrace is done, stop sending messages + break + + msg = f"netcons test #{i}" + with open("/dev/kmsg", "w", encoding="utf-8") as kmsg: + for j in range(MAX_WRITES): + try: + kmsg.write(f"{msg}-{j}\n") + except OSError as exception: + # in some cases, kmsg can be busy, so, we will retry + time.sleep(1) + retry += 1 + if retry < 5: + logging.info("Failed to write to kmsg. Retrying") + # Just retry a few times + continue + raise KsftFailEx( + f"Failed to write to kmsg: {exception}" + ) from exception + + netcons_delete_target(target_name) + netcons_configure_target(cfg, ifname, target_name) + # If we sleep here, we will have a better chance of triggering + # This number is based on a few tests I ran while developing this test + time.sleep(0.4) + + +def configure_network(ifname: str) -> None: + """Configure ring size and queue numbers""" + + # Set defined queues to 1 to force congestion + prev_queues = ethtool_get_queues_cnt(ifname) + logging.debug("RX/TX/combined queues: %s", prev_queues) + # Only set the queues to 1 if they exists in the device. I.e, they are > 0 + ethtool_set_queues_cnt(ifname, tuple(1 if x > 0 else x for x in prev_queues)) + defer(ethtool_set_queues_cnt, ifname, prev_queues) + + # Try to set the ring size to some low value. + # Do not fail if the hardware do not accepted desired values + prev_ring_size = ethtool_get_ringsize(ifname) + for size in [(1, 1), (128, 128), (256, 256)]: + if ethtool_set_ringsize(ifname, size): + # hardware accepted the desired ringsize + logging.debug("Set RX/TX ringsize to: %s from %s", size, prev_ring_size) + break + defer(ethtool_set_ringsize, ifname, prev_ring_size) + + +def test_netpoll(cfg: NetDrvEpEnv) -> None: + """ + Test netpoll by sending traffic to the interface and then sending + netconsole messages to trigger a poll + """ + + ifname = cfg.ifname + configure_network(ifname) + target_name = netcons_generate_random_target_name() + traffic = None + + try: + traffic = GenerateTraffic(cfg) + do_netpoll_flush_monitored(cfg, ifname, target_name) + finally: + if traffic: + traffic.stop() + + # Revert RX/TX queues + netcons_delete_target(target_name) + + +def test_check_dependencies() -> None: + """Check if the dependencies are met""" + if not os.path.exists(NETCONSOLE_CONFIGFS_PATH): + raise KsftSkipEx( + f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set." # pylint: disable=C0301 + ) + + +def main() -> None: + """Main function to run the test""" + netcons_load_module() + test_check_dependencies() + with NetDrvEpEnv(__file__) as cfg: + ksft_run( + [test_netpoll], + args=(cfg,), + ) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh index bed748dde4b0..8972f42dfe03 100755 --- a/tools/testing/selftests/drivers/net/ocelot/psfp.sh +++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh @@ -266,18 +266,14 @@ run_test() "${base_time}" \ "${CYCLE_TIME_NS}" \ "${SHIFT_TIME_NS}" \ + "${GATE_DURATION_NS}" \ "${NUM_PKTS}" \ "${STREAM_VID}" \ "${STREAM_PRIO}" \ "" \ "${isochron_dat}" - # Count all received packets by looking at the non-zero RX timestamps - received=$(isochron report \ - --input-file "${isochron_dat}" \ - --printf-format "%u\n" --printf-args "R" | \ - grep -w -v '0' | wc -l) - + received=$(isochron_report_num_received "${isochron_dat}") if [ "${received}" = "${expected}" ]; then RET=0 else diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index eb83e7b48797..da3623c5e8a9 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -1,49 +1,239 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +import os +import random, string, time from lib.py import ksft_run, ksft_exit -from lib.py import ksft_eq -from lib.py import NetDrvEpEnv +from lib.py import ksft_eq, KsftSkipEx, KsftFailEx +from lib.py import EthtoolFamily, NetDrvEpEnv from lib.py import bkg, cmd, wait_port_listen, rand_port +from lib.py import defer, ethtool, ip +no_sleep=False -def test_v4(cfg) -> None: - cfg.require_v4() +def _test_v4(cfg) -> None: + if not cfg.addr_v["4"]: + return - cmd(f"ping -c 1 -W0.5 {cfg.remote_v4}") - cmd(f"ping -c 1 -W0.5 {cfg.v4}", host=cfg.remote) + cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"]) + cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.remote_addr_v["4"]) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) +def _test_v6(cfg) -> None: + if not cfg.addr_v["6"]: + return -def test_v6(cfg) -> None: - cfg.require_v6() + cmd("ping -c 1 -W5 " + cfg.remote_addr_v["6"]) + cmd("ping -c 1 -W5 " + cfg.addr_v["6"], host=cfg.remote) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.remote_addr_v["6"]) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote) - cmd(f"ping -c 1 -W0.5 {cfg.remote_v6}") - cmd(f"ping -c 1 -W0.5 {cfg.v6}", host=cfg.remote) - - -def test_tcp(cfg) -> None: - cfg.require_cmd("socat", remote=True) +def _test_tcp(cfg) -> None: + cfg.require_cmd("socat", local=False, remote=True) port = rand_port() listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT" + test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536)) with bkg(listen_cmd, exit_wait=True) as nc: wait_port_listen(port) - cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}", + cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}", shell=True, host=cfg.remote) - ksft_eq(nc.stdout.strip(), "ping") + ksft_eq(nc.stdout.strip(), test_string) + test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536)) with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc: wait_port_listen(port, host=cfg.remote) - cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True) - ksft_eq(nc.stdout.strip(), "ping") - + cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True) + ksft_eq(nc.stdout.strip(), test_string) + +def _schedule_checksum_reset(cfg, netnl) -> None: + features = ethtool(f"-k {cfg.ifname}", json=True) + setting = "" + for side in ["tx", "rx"]: + f = features[0][side + "-checksumming"] + if not f["fixed"]: + setting += " " + side + setting += " " + ("on" if f["requested"] or f["active"] else "off") + defer(ethtool, f" -K {cfg.ifname} " + setting) + +def _set_offload_checksum(cfg, netnl, on) -> None: + try: + ethtool(f" -K {cfg.ifname} rx {on} tx {on} ") + except: + return + +def _set_xdp_generic_sb_on(cfg) -> None: + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True) + defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off") + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_generic_mb_on(cfg) -> None: + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" + cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) + ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog)) + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpgeneric off") + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_native_sb_on(cfg) -> None: + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True) + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") + xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] + if xdp_info['xdp']['mode'] != 1: + """ + If the interface doesn't support native-mode, it falls back to generic mode. + The mode value 1 is native and 2 is generic. + So it raises an exception if mode is not 1(native mode). + """ + raise KsftSkipEx('device does not support native-XDP') + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_native_mb_on(cfg) -> None: + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" + cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) + try: + cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True) + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") + except Exception as e: + raise KsftSkipEx('device does not support native-multi-buffer XDP') + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_offload_on(cfg) -> None: + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" + cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True) + try: + cmd(f"ip link set dev {cfg.ifname} xdpoffload obj {prog} sec xdp", shell=True) + except Exception as e: + raise KsftSkipEx('device does not support offloaded XDP') + defer(ip, f"link set dev {cfg.ifname} xdpoffload off") + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) + + if no_sleep != True: + time.sleep(10) + +def get_interface_info(cfg) -> None: + global no_sleep + + if cfg.remote_ifname == "": + raise KsftFailEx('Can not get remote interface') + local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] + if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim": + no_sleep=True + if 'linkinfo' in local_info and local_info['linkinfo']['info_kind'] == "veth": + no_sleep=True + +def set_interface_init(cfg) -> None: + cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True) + cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True) + cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True) + cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) + +def test_default_v4(cfg, netnl) -> None: + cfg.require_ipver("4") + + _schedule_checksum_reset(cfg, netnl) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_tcp(cfg) + +def test_default_v6(cfg, netnl) -> None: + cfg.require_ipver("6") + + _schedule_checksum_reset(cfg, netnl) + _set_offload_checksum(cfg, netnl, "off") + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v6(cfg) + _test_tcp(cfg) + +def test_xdp_generic_sb(cfg, netnl) -> None: + _schedule_checksum_reset(cfg, netnl) + _set_xdp_generic_sb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + +def test_xdp_generic_mb(cfg, netnl) -> None: + _schedule_checksum_reset(cfg, netnl) + _set_xdp_generic_mb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + +def test_xdp_native_sb(cfg, netnl) -> None: + _schedule_checksum_reset(cfg, netnl) + _set_xdp_native_sb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + +def test_xdp_native_mb(cfg, netnl) -> None: + _schedule_checksum_reset(cfg, netnl) + _set_xdp_native_mb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + +def test_xdp_offload(cfg, netnl) -> None: + _set_xdp_offload_on(cfg) + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) def main() -> None: with NetDrvEpEnv(__file__) as cfg: - ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, )) + get_interface_info(cfg) + set_interface_init(cfg) + ksft_run([test_default_v4, + test_default_v6, + test_xdp_generic_sb, + test_xdp_generic_mb, + test_xdp_native_sb, + test_xdp_native_mb, + test_xdp_offload], + args=(cfg, EthtoolFamily())) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/psp.py b/tools/testing/selftests/drivers/net/psp.py new file mode 100755 index 000000000000..864d9fce1094 --- /dev/null +++ b/tools/testing/selftests/drivers/net/psp.py @@ -0,0 +1,644 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Test suite for PSP capable drivers.""" + +import errno +import fcntl +import socket +import struct +import termios +import time + +from lib.py import defer +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import ksft_true, ksft_eq, ksft_ne, ksft_gt, ksft_raises +from lib.py import ksft_not_none +from lib.py import KsftSkipEx +from lib.py import NetDrvEpEnv, PSPFamily, NlError +from lib.py import bkg, rand_port, wait_port_listen + + +def _get_outq(s): + one = b'\0' * 4 + outq = fcntl.ioctl(s.fileno(), termios.TIOCOUTQ, one) + return struct.unpack("I", outq)[0] + + +def _send_with_ack(cfg, msg): + cfg.comm_sock.send(msg) + response = cfg.comm_sock.recv(4) + if response != b'ack\0': + raise RuntimeError("Unexpected server response", response) + + +def _remote_read_len(cfg): + cfg.comm_sock.send(b'read len\0') + return int(cfg.comm_sock.recv(1024)[:-1].decode('utf-8')) + + +def _make_clr_conn(cfg, ipver=None): + _send_with_ack(cfg, b'conn clr\0') + remote_addr = cfg.remote_addr_v[ipver] if ipver else cfg.remote_addr + s = socket.create_connection((remote_addr, cfg.comm_port), ) + return s + + +def _make_psp_conn(cfg, version=0, ipver=None): + _send_with_ack(cfg, b'conn psp\0' + struct.pack('BB', version, version)) + remote_addr = cfg.remote_addr_v[ipver] if ipver else cfg.remote_addr + s = socket.create_connection((remote_addr, cfg.comm_port), ) + return s + + +def _close_conn(cfg, s): + _send_with_ack(cfg, b'data close\0') + s.close() + + +def _close_psp_conn(cfg, s): + _close_conn(cfg, s) + + +def _spi_xchg(s, rx): + s.send(struct.pack('I', rx['spi']) + rx['key']) + tx = s.recv(4 + len(rx['key'])) + return { + 'spi': struct.unpack('I', tx[:4])[0], + 'key': tx[4:] + } + + +def _send_careful(cfg, s, rounds): + data = b'0123456789' * 200 + for i in range(rounds): + n = 0 + for _ in range(10): # allow 10 retries + try: + n += s.send(data[n:], socket.MSG_DONTWAIT) + if n == len(data): + break + except BlockingIOError: + time.sleep(0.05) + else: + rlen = _remote_read_len(cfg) + outq = _get_outq(s) + report = f'sent: {i * len(data) + n} remote len: {rlen} outq: {outq}' + raise RuntimeError(report) + + return len(data) * rounds + + +def _check_data_rx(cfg, exp_len): + read_len = -1 + for _ in range(30): + cfg.comm_sock.send(b'read len\0') + read_len = int(cfg.comm_sock.recv(1024)[:-1].decode('utf-8')) + if read_len == exp_len: + break + time.sleep(0.01) + ksft_eq(read_len, exp_len) + + +def _check_data_outq(s, exp_len, force_wait=False): + outq = 0 + for _ in range(10): + outq = _get_outq(s) + if not force_wait and outq == exp_len: + break + time.sleep(0.01) + ksft_eq(outq, exp_len) + + +def _get_stat(cfg, key): + return cfg.pspnl.get_stats({'dev-id': cfg.psp_dev_id})[key] + +# +# Test case boiler plate +# + +def _init_psp_dev(cfg): + if not hasattr(cfg, 'psp_dev_id'): + # Figure out which local device we are testing against + for dev in cfg.pspnl.dev_get({}, dump=True): + if dev['ifindex'] == cfg.ifindex: + cfg.psp_info = dev + cfg.psp_dev_id = cfg.psp_info['id'] + break + else: + raise KsftSkipEx("No PSP devices found") + + # Enable PSP if necessary + cap = cfg.psp_info['psp-versions-cap'] + ena = cfg.psp_info['psp-versions-ena'] + if cap != ena: + cfg.pspnl.dev_set({'id': cfg.psp_dev_id, 'psp-versions-ena': cap}) + defer(cfg.pspnl.dev_set, {'id': cfg.psp_dev_id, + 'psp-versions-ena': ena }) + +# +# Test cases +# + +def dev_list_devices(cfg): + """ Dump all devices """ + _init_psp_dev(cfg) + + devices = cfg.pspnl.dev_get({}, dump=True) + + found = False + for dev in devices: + found |= dev['id'] == cfg.psp_dev_id + ksft_true(found) + + +def dev_get_device(cfg): + """ Get the device we intend to use """ + _init_psp_dev(cfg) + + dev = cfg.pspnl.dev_get({'id': cfg.psp_dev_id}) + ksft_eq(dev['id'], cfg.psp_dev_id) + + +def dev_get_device_bad(cfg): + """ Test getting device which doesn't exist """ + raised = False + try: + cfg.pspnl.dev_get({'id': 1234567}) + except NlError as e: + ksft_eq(e.nl_msg.error, -errno.ENODEV) + raised = True + ksft_true(raised) + + +def dev_rotate(cfg): + """ Test key rotation """ + _init_psp_dev(cfg) + + prev_rotations = _get_stat(cfg, 'key-rotations') + + rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + ksft_eq(rot['id'], cfg.psp_dev_id) + rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + ksft_eq(rot['id'], cfg.psp_dev_id) + + cur_rotations = _get_stat(cfg, 'key-rotations') + ksft_eq(cur_rotations, prev_rotations + 2) + + +def dev_rotate_spi(cfg): + """ Test key rotation and SPI check """ + _init_psp_dev(cfg) + + top_a = top_b = 0 + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + assoc_a = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + top_a = assoc_a['rx-key']['spi'] >> 31 + s.close() + rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + ksft_eq(rot['id'], cfg.psp_dev_id) + assoc_b = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + top_b = assoc_b['rx-key']['spi'] >> 31 + s.close() + ksft_ne(top_a, top_b) + + +def assoc_basic(cfg): + """ Test creating associations """ + _init_psp_dev(cfg) + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_eq(assoc['dev-id'], cfg.psp_dev_id) + ksft_gt(assoc['rx-key']['spi'], 0) + ksft_eq(len(assoc['rx-key']['key']), 16) + + assoc = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s.fileno()}) + ksft_eq(len(assoc), 0) + s.close() + + +def assoc_bad_dev(cfg): + """ Test creating associations with bad device ID """ + _init_psp_dev(cfg) + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id + 1234567, + "sock-fd": s.fileno()}) + ksft_eq(cm.exception.nl_msg.error, -errno.ENODEV) + + +def assoc_sk_only_conn(cfg): + """ Test creating associations based on socket """ + _init_psp_dev(cfg) + + with _make_clr_conn(cfg) as s: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + ksft_eq(assoc['dev-id'], cfg.psp_dev_id) + cfg.pspnl.tx_assoc({"version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s.fileno()}) + _close_conn(cfg, s) + + +def assoc_sk_only_mismatch(cfg): + """ Test creating associations based on socket (dev mismatch) """ + _init_psp_dev(cfg) + + with _make_clr_conn(cfg) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id + 1234567, + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id") + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + _close_conn(cfg, s) + + +def assoc_sk_only_mismatch_tx(cfg): + """ Test creating associations based on socket (dev mismatch) """ + _init_psp_dev(cfg) + + with _make_clr_conn(cfg) as s: + with ksft_raises(NlError) as cm: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + cfg.pspnl.tx_assoc({"version": 0, + "tx-key": assoc['rx-key'], + "dev-id": cfg.psp_dev_id + 1234567, + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id") + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + _close_conn(cfg, s) + + +def assoc_sk_only_unconn(cfg): + """ Test creating associations based on socket (unconnected, should fail) """ + _init_psp_dev(cfg) + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.extack['miss-type'], "dev-id") + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + + +def assoc_version_mismatch(cfg): + """ Test creating associations where Rx and Tx PSP versions do not match """ + _init_psp_dev(cfg) + + versions = list(cfg.psp_info['psp-versions-cap']) + if len(versions) < 2: + raise KsftSkipEx("Not enough PSP versions supported by the device for the test") + + # Translate versions to integers + versions = [cfg.pspnl.consts["version"].entries[v].value for v in versions] + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + rx = cfg.pspnl.rx_assoc({"version": versions[0], + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + + for version in versions[1:]: + with ksft_raises(NlError) as cm: + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": version, + "tx-key": rx['rx-key'], + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + + +def assoc_twice(cfg): + """ Test reusing Tx assoc for two sockets """ + _init_psp_dev(cfg) + + def rx_assoc_check(s): + assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_eq(assoc['dev-id'], cfg.psp_dev_id) + ksft_gt(assoc['rx-key']['spi'], 0) + ksft_eq(len(assoc['rx-key']['key']), 16) + + return assoc + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + assoc = rx_assoc_check(s) + tx = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s.fileno()}) + ksft_eq(len(tx), 0) + + # Use the same Tx assoc second time + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s2: + rx_assoc_check(s2) + tx = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s2.fileno()}) + ksft_eq(len(tx), 0) + + s.close() + + +def _data_basic_send(cfg, version, ipver): + """ Test basic data send """ + _init_psp_dev(cfg) + + # Version 0 is required by spec, don't let it skip + if version: + name = cfg.pspnl.consts["version"].entries_by_val[version].name + if name not in cfg.psp_info['psp-versions-cap']: + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": version, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP) + raise KsftSkipEx("PSP version not supported", name) + + s = _make_psp_conn(cfg, version, ipver) + + rx_assoc = cfg.pspnl.rx_assoc({"version": version, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": version, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + _close_psp_conn(cfg, s) + + +def __bad_xfer_do(cfg, s, tx, version='hdr0-aes-gcm-128'): + # Make sure we accept the ACK for the SPI before we seal with the bad assoc + _check_data_outq(s, 0) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": version, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 20) + _check_data_outq(s, data_len, force_wait=True) + _check_data_rx(cfg, 0) + _close_psp_conn(cfg, s) + + +def data_send_bad_key(cfg): + """ Test send data with bad key """ + _init_psp_dev(cfg) + + s = _make_psp_conn(cfg) + + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + tx['key'] = (tx['key'][0] ^ 0xff).to_bytes(1, 'little') + tx['key'][1:] + __bad_xfer_do(cfg, s, tx) + + +def data_send_disconnect(cfg): + """ Test socket close after sending data """ + _init_psp_dev(cfg) + + with _make_psp_conn(cfg) as s: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + tx = _spi_xchg(s, assoc['rx-key']) + cfg.pspnl.tx_assoc({"version": 0, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + + s.shutdown(socket.SHUT_RDWR) + s.close() + + +def _data_mss_adjust(cfg, ipver): + _init_psp_dev(cfg) + + # First figure out what the MSS would be without any adjustments + s = _make_clr_conn(cfg, ipver) + s.send(b"0123456789abcdef" * 1024) + _check_data_rx(cfg, 16 * 1024) + mss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + _close_conn(cfg, s) + + s = _make_psp_conn(cfg, 0, ipver) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + + rxmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + ksft_eq(mss, rxmss) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": tx, + "sock-fd": s.fileno()}) + + txmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + ksft_eq(mss, txmss + 40) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + _check_data_outq(s, 0) + + txmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + ksft_eq(mss, txmss + 40) + finally: + _close_psp_conn(cfg, s) + + +def data_stale_key(cfg): + """ Test send on a double-rotated key """ + _init_psp_dev(cfg) + + prev_stale = _get_stat(cfg, 'stale-events') + s = _make_psp_conn(cfg) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + _check_data_outq(s, 0) + + cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + + cur_stale = _get_stat(cfg, 'stale-events') + ksft_gt(cur_stale, prev_stale) + + s.send(b'0123456789' * 200) + _check_data_outq(s, 2000, force_wait=True) + finally: + _close_psp_conn(cfg, s) + + +def __nsim_psp_rereg(cfg): + # The PSP dev ID will change, remember what was there before + before = set([x['id'] for x in cfg.pspnl.dev_get({}, dump=True)]) + + cfg._ns.nsims[0].dfs_write('psp_rereg', '1') + + after = set([x['id'] for x in cfg.pspnl.dev_get({}, dump=True)]) + + new_devs = list(after - before) + ksft_eq(len(new_devs), 1) + cfg.psp_dev_id = list(after - before)[0] + + +def removal_device_rx(cfg): + """ Test removing a netdev / PSD with active Rx assoc """ + + # We could technically devlink reload real devices, too + # but that kills the control socket. So test this on + # netdevsim only for now + cfg.require_nsim() + + s = _make_clr_conn(cfg) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_not_none(rx_assoc) + + __nsim_psp_rereg(cfg) + finally: + _close_conn(cfg, s) + + +def removal_device_bi(cfg): + """ Test removing a netdev / PSD with active Rx/Tx assoc """ + + # We could technically devlink reload real devices, too + # but that kills the control socket. So test this on + # netdevsim only for now + cfg.require_nsim() + + s = _make_clr_conn(cfg) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": rx_assoc['rx-key'], + "sock-fd": s.fileno()}) + __nsim_psp_rereg(cfg) + finally: + _close_conn(cfg, s) + + +def psp_ip_ver_test_builder(name, test_func, psp_ver, ipver): + """Build test cases for each combo of PSP version and IP version""" + def test_case(cfg): + cfg.require_ipver(ipver) + test_func(cfg, psp_ver, ipver) + + test_case.__name__ = f"{name}_v{psp_ver}_ip{ipver}" + return test_case + + +def ipver_test_builder(name, test_func, ipver): + """Build test cases for each IP version""" + def test_case(cfg): + cfg.require_ipver(ipver) + test_func(cfg, ipver) + + test_case.__name__ = f"{name}_ip{ipver}" + return test_case + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEpEnv(__file__) as cfg: + cfg.pspnl = PSPFamily() + + # Set up responder and communication sock + responder = cfg.remote.deploy("psp_responder") + + cfg.comm_port = rand_port() + srv = None + try: + with bkg(responder + f" -p {cfg.comm_port} -i {cfg.remote_ifindex}", + host=cfg.remote, exit_wait=True) as srv: + wait_port_listen(cfg.comm_port, host=cfg.remote) + + cfg.comm_sock = socket.create_connection((cfg.remote_addr, + cfg.comm_port), + timeout=1) + + cases = [ + psp_ip_ver_test_builder( + "data_basic_send", _data_basic_send, version, ipver + ) + for version in range(0, 4) + for ipver in ("4", "6") + ] + cases += [ + ipver_test_builder("data_mss_adjust", _data_mss_adjust, ipver) + for ipver in ("4", "6") + ] + + ksft_run(cases=cases, globs=globals(), + case_pfx={"dev_", "data_", "assoc_", "removal_"}, + args=(cfg, )) + + cfg.comm_sock.send(b"exit\0") + cfg.comm_sock.close() + finally: + if srv and (srv.stdout or srv.stderr): + ksft_pr("") + ksft_pr(f"Responder logs ({srv.ret}):") + if srv and srv.stdout: + ksft_pr("STDOUT:\n# " + srv.stdout.strip().replace("\n", "\n# ")) + if srv and srv.stderr: + ksft_pr("STDERR:\n# " + srv.stderr.strip().replace("\n", "\n# ")) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/psp_responder.c b/tools/testing/selftests/drivers/net/psp_responder.c new file mode 100644 index 000000000000..a26e7628bbb1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/psp_responder.c @@ -0,0 +1,479 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <string.h> +#include <sys/poll.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <netinet/in.h> +#include <unistd.h> + +#include <ynl.h> + +#include "psp-user.h" + +#define dbg(msg...) \ +do { \ + if (opts->verbose) \ + fprintf(stderr, "DEBUG: " msg); \ +} while (0) + +static bool should_quit; + +struct opts { + int port; + int ifindex; + bool verbose; +}; + +enum accept_cfg { + ACCEPT_CFG_NONE = 0, + ACCEPT_CFG_CLEAR, + ACCEPT_CFG_PSP, +}; + +static struct { + unsigned char tx; + unsigned char rx; +} psp_vers; + +static int conn_setup_psp(struct ynl_sock *ys, struct opts *opts, int data_sock) +{ + struct psp_rx_assoc_rsp *rsp; + struct psp_rx_assoc_req *req; + struct psp_tx_assoc_rsp *tsp; + struct psp_tx_assoc_req *teq; + char info[300]; + int key_len; + ssize_t sz; + __u32 spi; + + dbg("create PSP connection\n"); + + // Rx assoc alloc + req = psp_rx_assoc_req_alloc(); + + psp_rx_assoc_req_set_sock_fd(req, data_sock); + psp_rx_assoc_req_set_version(req, psp_vers.rx); + + rsp = psp_rx_assoc(ys, req); + psp_rx_assoc_req_free(req); + + if (!rsp) { + perror("ERROR: failed to Rx assoc"); + return -1; + } + + // SPI exchange + key_len = rsp->rx_key._len.key; + memcpy(info, &rsp->rx_key.spi, sizeof(spi)); + memcpy(&info[sizeof(spi)], rsp->rx_key.key, key_len); + sz = sizeof(spi) + key_len; + + send(data_sock, info, sz, MSG_WAITALL); + psp_rx_assoc_rsp_free(rsp); + + sz = recv(data_sock, info, sz, MSG_WAITALL); + if (sz < 0) { + perror("ERROR: failed to read PSP key from sock"); + return -1; + } + memcpy(&spi, info, sizeof(spi)); + + // Setup Tx assoc + teq = psp_tx_assoc_req_alloc(); + + psp_tx_assoc_req_set_sock_fd(teq, data_sock); + psp_tx_assoc_req_set_version(teq, psp_vers.tx); + psp_tx_assoc_req_set_tx_key_spi(teq, spi); + psp_tx_assoc_req_set_tx_key_key(teq, &info[sizeof(spi)], key_len); + + tsp = psp_tx_assoc(ys, teq); + psp_tx_assoc_req_free(teq); + if (!tsp) { + perror("ERROR: failed to Tx assoc"); + return -1; + } + psp_tx_assoc_rsp_free(tsp); + + return 0; +} + +static void send_ack(int sock) +{ + send(sock, "ack", 4, MSG_WAITALL); +} + +static void send_err(int sock) +{ + send(sock, "err", 4, MSG_WAITALL); +} + +static void send_str(int sock, int value) +{ + char buf[128]; + int ret; + + ret = snprintf(buf, sizeof(buf), "%d", value); + send(sock, buf, ret + 1, MSG_WAITALL); +} + +static void +run_session(struct ynl_sock *ys, struct opts *opts, + int server_sock, int comm_sock) +{ + enum accept_cfg accept_cfg = ACCEPT_CFG_NONE; + struct pollfd pfds[3]; + size_t data_read = 0; + int data_sock = -1; + + while (true) { + bool race_close = false; + int nfds; + + memset(pfds, 0, sizeof(pfds)); + + pfds[0].fd = server_sock; + pfds[0].events = POLLIN; + + pfds[1].fd = comm_sock; + pfds[1].events = POLLIN; + + nfds = 2; + if (data_sock >= 0) { + pfds[2].fd = data_sock; + pfds[2].events = POLLIN; + nfds++; + } + + dbg(" ...\n"); + if (poll(pfds, nfds, -1) < 0) { + perror("poll"); + break; + } + + /* data sock */ + if (pfds[2].revents & POLLIN) { + char buf[8192]; + ssize_t n; + + n = recv(data_sock, buf, sizeof(buf), 0); + if (n <= 0) { + if (n < 0) + perror("data read"); + close(data_sock); + data_sock = -1; + dbg("data sock closed\n"); + } else { + data_read += n; + dbg("data read %zd\n", data_read); + } + } + + /* comm sock */ + if (pfds[1].revents & POLLIN) { + static char buf[4096]; + static ssize_t off; + bool consumed; + ssize_t n; + + n = recv(comm_sock, &buf[off], sizeof(buf) - off, 0); + if (n <= 0) { + if (n < 0) + perror("comm read"); + return; + } + + off += n; + n = off; + +#define __consume(sz) \ + ({ \ + if (n == (sz)) { \ + off = 0; \ + } else { \ + off -= (sz); \ + memmove(buf, &buf[(sz)], off); \ + } \ + }) + +#define cmd(_name) \ + ({ \ + ssize_t sz = sizeof(_name); \ + bool match = n >= sz && !memcmp(buf, _name, sz); \ + \ + if (match) { \ + dbg("command: " _name "\n"); \ + __consume(sz); \ + } \ + consumed |= match; \ + match; \ + }) + + do { + consumed = false; + + if (cmd("read len")) + send_str(comm_sock, data_read); + + if (cmd("data echo")) { + if (data_sock >= 0) + send(data_sock, "echo", 5, + MSG_WAITALL); + else + fprintf(stderr, "WARN: echo but no data sock\n"); + send_ack(comm_sock); + } + if (cmd("data close")) { + if (data_sock >= 0) { + close(data_sock); + data_sock = -1; + send_ack(comm_sock); + } else { + race_close = true; + } + } + if (cmd("conn psp")) { + if (accept_cfg != ACCEPT_CFG_NONE) + fprintf(stderr, "WARN: old conn config still set!\n"); + accept_cfg = ACCEPT_CFG_PSP; + send_ack(comm_sock); + /* next two bytes are versions */ + if (off >= 2) { + memcpy(&psp_vers, buf, 2); + __consume(2); + } else { + fprintf(stderr, "WARN: short conn psp command!\n"); + } + } + if (cmd("conn clr")) { + if (accept_cfg != ACCEPT_CFG_NONE) + fprintf(stderr, "WARN: old conn config still set!\n"); + accept_cfg = ACCEPT_CFG_CLEAR; + send_ack(comm_sock); + } + if (cmd("exit")) + should_quit = true; +#undef cmd + + if (!consumed) { + fprintf(stderr, "WARN: unknown cmd: [%zd] %s\n", + off, buf); + } + } while (consumed && off); + } + + /* server sock */ + if (pfds[0].revents & POLLIN) { + if (data_sock >= 0) { + fprintf(stderr, "WARN: new data sock but old one still here\n"); + close(data_sock); + data_sock = -1; + } + data_sock = accept(server_sock, NULL, NULL); + if (data_sock < 0) { + perror("accept"); + continue; + } + data_read = 0; + + if (accept_cfg == ACCEPT_CFG_CLEAR) { + dbg("new data sock: clear\n"); + /* nothing to do */ + } else if (accept_cfg == ACCEPT_CFG_PSP) { + dbg("new data sock: psp\n"); + conn_setup_psp(ys, opts, data_sock); + } else { + fprintf(stderr, "WARN: new data sock but no config\n"); + } + accept_cfg = ACCEPT_CFG_NONE; + } + + if (race_close) { + if (data_sock >= 0) { + /* indeed, ordering problem, handle the close */ + close(data_sock); + data_sock = -1; + send_ack(comm_sock); + } else { + fprintf(stderr, "WARN: close but no data sock\n"); + send_err(comm_sock); + } + } + } + dbg("session ending\n"); +} + +static int spawn_server(struct opts *opts) +{ + struct sockaddr_in6 addr; + int fd; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd < 0) { + perror("can't open socket"); + return -1; + } + + memset(&addr, 0, sizeof(addr)); + + addr.sin6_family = AF_INET6; + addr.sin6_addr = in6addr_any; + addr.sin6_port = htons(opts->port); + + if (bind(fd, (struct sockaddr *)&addr, sizeof(addr))) { + perror("can't bind socket"); + return -1; + } + + if (listen(fd, 5)) { + perror("can't listen"); + return -1; + } + + return fd; +} + +static int run_responder(struct ynl_sock *ys, struct opts *opts) +{ + int server_sock, comm; + + server_sock = spawn_server(opts); + if (server_sock < 0) + return 4; + + while (!should_quit) { + comm = accept(server_sock, NULL, NULL); + if (comm < 0) { + perror("accept failed"); + } else { + run_session(ys, opts, server_sock, comm); + close(comm); + } + } + + return 0; +} + +static void usage(const char *name, const char *miss) +{ + if (miss) + fprintf(stderr, "Missing argument: %s\n", miss); + + fprintf(stderr, "Usage: %s -p port [-v] [-i ifindex]\n", name); + exit(EXIT_FAILURE); +} + +static void parse_cmd_opts(int argc, char **argv, struct opts *opts) +{ + int opt; + + while ((opt = getopt(argc, argv, "vp:i:")) != -1) { + switch (opt) { + case 'v': + opts->verbose = 1; + break; + case 'p': + opts->port = atoi(optarg); + break; + case 'i': + opts->ifindex = atoi(optarg); + break; + default: + usage(argv[0], NULL); + } + } +} + +static int psp_dev_set_ena(struct ynl_sock *ys, __u32 dev_id, __u32 versions) +{ + struct psp_dev_set_req *sreq; + struct psp_dev_set_rsp *srsp; + + fprintf(stderr, "Set PSP enable on device %d to 0x%x\n", + dev_id, versions); + + sreq = psp_dev_set_req_alloc(); + + psp_dev_set_req_set_id(sreq, dev_id); + psp_dev_set_req_set_psp_versions_ena(sreq, versions); + + srsp = psp_dev_set(ys, sreq); + psp_dev_set_req_free(sreq); + if (!srsp) + return 10; + + psp_dev_set_rsp_free(srsp); + return 0; +} + +int main(int argc, char **argv) +{ + struct psp_dev_get_list *dev_list; + __u32 ver_ena, ver_cap; + struct opts opts = {}; + struct ynl_error yerr; + struct ynl_sock *ys; + int devid = -1; + int ret; + + parse_cmd_opts(argc, argv, &opts); + if (!opts.port) + usage(argv[0], "port"); // exits + + ys = ynl_sock_create(&ynl_psp_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return 1; + } + + dev_list = psp_dev_get_dump(ys); + if (ynl_dump_empty(dev_list) && ys->err.code) + goto err_close; + + ynl_dump_foreach(dev_list, d) { + if (opts.ifindex) { + if (d->ifindex != opts.ifindex) + continue; + devid = d->id; + ver_ena = d->psp_versions_ena; + ver_cap = d->psp_versions_cap; + break; + } else if (devid < 0) { + devid = d->id; + ver_ena = d->psp_versions_ena; + ver_cap = d->psp_versions_cap; + } else { + fprintf(stderr, "Multiple PSP devices found\n"); + goto err_close_silent; + } + } + psp_dev_get_list_free(dev_list); + + if (opts.ifindex && devid < 0) + fprintf(stderr, + "WARN: PSP device with ifindex %d requested on cmdline, not found\n", + opts.ifindex); + + if (devid >= 0 && ver_ena != ver_cap) { + ret = psp_dev_set_ena(ys, devid, ver_cap); + if (ret) + goto err_close; + } + + ret = run_responder(ys, &opts); + + if (devid >= 0 && ver_ena != ver_cap && + psp_dev_set_ena(ys, devid, ver_ena)) + fprintf(stderr, "WARN: failed to set the PSP versions back\n"); + + ynl_sock_destroy(ys); + + return ret; + +err_close: + fprintf(stderr, "YNL: %s\n", ys->err.msg); +err_close_silent: + ynl_sock_destroy(ys); + return 2; +} diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index 8a518905a9f9..236005290a33 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -2,13 +2,15 @@ # SPDX-License-Identifier: GPL-2.0 from lib.py import ksft_disruptive, ksft_exit, ksft_run -from lib.py import ksft_eq, ksft_raises, KsftSkipEx +from lib.py import ksft_eq, ksft_not_in, ksft_raises, KsftSkipEx, KsftFailEx from lib.py import EthtoolFamily, NetdevFamily, NlError from lib.py import NetDrvEnv -from lib.py import cmd, defer, ip +from lib.py import bkg, cmd, defer, ip import errno import glob - +import os +import socket +import struct def sys_get_queues(ifname, qtype='rx') -> int: folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*') @@ -22,6 +24,40 @@ def nl_get_queues(cfg, nl, qtype='rx'): return None +def check_xsk(cfg, nl, xdp_queue_id=0) -> None: + # Probe for support + xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False) + if xdp.ret == 255: + raise KsftSkipEx('AF_XDP unsupported') + elif xdp.ret > 0: + raise KsftFailEx('unable to create AF_XDP socket') + + with bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}', + ksft_wait=3): + + rx = tx = False + + queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True) + if not queues: + raise KsftSkipEx("Netlink reports no queues") + + for q in queues: + if q['id'] == 0: + if q['type'] == 'rx': + rx = True + if q['type'] == 'tx': + tx = True + + ksft_eq(q.get('xsk', None), {}, + comment="xsk attr on queue we configured") + else: + ksft_not_in('xsk', q, + comment="xsk attr on queue we didn't configure") + + ksft_eq(rx, True) + ksft_eq(tx, True) + + def get_queues(cfg, nl) -> None: snl = NetdevFamily(recv_size=4096) @@ -80,7 +116,8 @@ def check_down(cfg, nl) -> None: def main() -> None: with NetDrvEnv(__file__, queue_count=100) as cfg: - ksft_run([get_queues, addremove_queues, check_down], args=(cfg, NetdevFamily())) + ksft_run([get_queues, addremove_queues, check_down, check_xsk], + args=(cfg, NetdevFamily())) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/ring_reconfig.py b/tools/testing/selftests/drivers/net/ring_reconfig.py new file mode 100755 index 000000000000..f9530a8b0856 --- /dev/null +++ b/tools/testing/selftests/drivers/net/ring_reconfig.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Test channel and ring size configuration via ethtool (-L / -G). +""" + +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import ksft_eq +from lib.py import NetDrvEpEnv, EthtoolFamily, GenerateTraffic +from lib.py import defer, NlError + + +def channels(cfg) -> None: + """ + Twiddle channel counts in various combinations of parameters. + We're only looking for driver adhering to the requested config + if the config is accepted and crashes. + """ + ehdr = {'header':{'dev-index': cfg.ifindex}} + chans = cfg.eth.channels_get(ehdr) + + all_keys = ["rx", "tx", "combined"] + mixes = [{"combined"}, {"rx", "tx"}, {"rx", "combined"}, {"tx", "combined"}, + {"rx", "tx", "combined"},] + + # Get the set of keys that device actually supports + restore = {} + supported = set() + for key in all_keys: + if key + "-max" in chans: + supported.add(key) + restore |= {key + "-count": chans[key + "-count"]} + + defer(cfg.eth.channels_set, ehdr | restore) + + def test_config(config): + try: + cfg.eth.channels_set(ehdr | config) + get = cfg.eth.channels_get(ehdr) + for k, v in config.items(): + ksft_eq(get.get(k, 0), v) + except NlError as e: + failed.append(mix) + ksft_pr("Can't set", config, e) + else: + ksft_pr("Okay", config) + + failed = [] + for mix in mixes: + if not mix.issubset(supported): + continue + + # Set all the values in the mix to 1, other supported to 0 + config = {} + for key in all_keys: + config[key + "-count"] = 1 if key in mix else 0 + test_config(config) + + for mix in mixes: + if not mix.issubset(supported): + continue + if mix in failed: + continue + + # Set all the values in the mix to max, other supported to 0 + config = {} + for key in all_keys: + config[key + "-count"] = chans[key + '-max'] if key in mix else 0 + test_config(config) + + +def _configure_min_ring_cnt(cfg) -> None: + """ Try to configure a single Rx/Tx ring. """ + ehdr = {'header':{'dev-index': cfg.ifindex}} + chans = cfg.eth.channels_get(ehdr) + + all_keys = ["rx-count", "tx-count", "combined-count"] + restore = {} + config = {} + for key in all_keys: + if key in chans: + restore[key] = chans[key] + config[key] = 0 + + if chans.get('combined-count', 0) > 1: + config['combined-count'] = 1 + elif chans.get('rx-count', 0) > 1 and chans.get('tx-count', 0) > 1: + config['tx-count'] = 1 + config['rx-count'] = 1 + else: + # looks like we're already on 1 channel + return + + cfg.eth.channels_set(ehdr | config) + defer(cfg.eth.channels_set, ehdr | restore) + + +def ringparam(cfg) -> None: + """ + Tweak the ringparam configuration. Try to run some traffic over min + ring size to make sure it actually functions. + """ + ehdr = {'header':{'dev-index': cfg.ifindex}} + rings = cfg.eth.rings_get(ehdr) + + restore = {} + maxes = {} + params = set() + for key in rings.keys(): + if 'max' in key: + param = key[:-4] + maxes[param] = rings[key] + params.add(param) + restore[param] = rings[param] + + defer(cfg.eth.rings_set, ehdr | restore) + + # Speed up the reconfig by configuring just one ring + _configure_min_ring_cnt(cfg) + + # Try to reach min on all settings + for param in params: + val = rings[param] + while True: + try: + cfg.eth.rings_set({'header':{'dev-index': cfg.ifindex}, + param: val // 2}) + if val == 0: + break + val //= 2 + except NlError: + break + + get = cfg.eth.rings_get(ehdr) + ksft_eq(get[param], val) + + ksft_pr(f"Reached min for '{param}' at {val} (max {rings[param]})") + + GenerateTraffic(cfg).wait_pkts_and_stop(10000) + + # Try max across all params, if the driver supports large rings + # this may OOM so we ignore errors + try: + ksft_pr("Applying max settings") + config = {p: maxes[p] for p in params} + cfg.eth.rings_set(ehdr | config) + except NlError as e: + ksft_pr("Can't set max params", config, e) + else: + GenerateTraffic(cfg).wait_pkts_and_stop(10000) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEpEnv(__file__) as cfg: + cfg.eth = EthtoolFamily() + + ksft_run([channels, + ringparam], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index efcc1e10575b..b08e4d48b15c 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -1,12 +1,16 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +""" +Tests related to standard netdevice statistics. +""" + import errno import subprocess import time from lib.py import ksft_run, ksft_exit, ksft_pr from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises -from lib.py import KsftSkipEx, KsftXfailEx +from lib.py import KsftSkipEx, KsftFailEx from lib.py import ksft_disruptive from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError from lib.py import NetDrvEnv @@ -18,13 +22,16 @@ rtnl = RtnlFamily() def check_pause(cfg) -> None: - global ethnl + """ + Check that drivers which support Pause config also report standard + pause stats. + """ try: ethnl.pause_get({"header": {"dev-index": cfg.ifindex}}) except NlError as e: if e.error == errno.EOPNOTSUPP: - raise KsftXfailEx("pause not supported by the device") + raise KsftSkipEx("pause not supported by the device") from e raise data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex, @@ -33,13 +40,16 @@ def check_pause(cfg) -> None: def check_fec(cfg) -> None: - global ethnl + """ + Check that drivers which support FEC config also report standard + FEC stats. + """ try: ethnl.fec_get({"header": {"dev-index": cfg.ifindex}}) except NlError as e: if e.error == errno.EOPNOTSUPP: - raise KsftXfailEx("FEC not supported by the device") + raise KsftSkipEx("FEC not supported by the device") from e raise data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex, @@ -47,16 +57,48 @@ def check_fec(cfg) -> None: ksft_true(data['stats'], "driver does not report stats") +def check_fec_hist(cfg) -> None: + """ + Check that drivers which support FEC histogram statistics report + reasonable values. + """ + + try: + data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex, + "flags": {'stats'}}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("FEC not supported by the device") from e + raise + if 'stats' not in data: + raise KsftSkipEx("FEC stats not supported by the device") + if 'hist' not in data['stats']: + raise KsftSkipEx("FEC histogram not supported by the device") + + hist = data['stats']['hist'] + for fec_bin in hist: + for key in ['bin-low', 'bin-high', 'bin-val']: + ksft_in(key, fec_bin, + "Drivers should always report FEC bin range and value") + ksft_ge(fec_bin['bin-high'], fec_bin['bin-low'], + "FEC bin range should be valid") + if 'bin-val-per-lane' in fec_bin: + ksft_eq(sum(fec_bin['bin-val-per-lane']), fec_bin['bin-val'], + "FEC bin value should be equal to sum of per-plane values") + + def pkt_byte_sum(cfg) -> None: - global netfam, rtnl + """ + Check that qstat and interface stats match in value. + """ def get_qstat(test): - global netfam stats = netfam.qstats_get({}, dump=True) if stats: for qs in stats: if qs["ifindex"]== test.ifindex: return qs + return None qstat = get_qstat(cfg) if qstat is None: @@ -77,15 +119,14 @@ def pkt_byte_sum(cfg) -> None: for _ in range(10): rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] if stat_cmp(rtstat, qstat) < 0: - raise Exception("RTNL stats are lower, fetched later") + raise KsftFailEx("RTNL stats are lower, fetched later") qstat = get_qstat(cfg) if stat_cmp(rtstat, qstat) > 0: - raise Exception("Qstats are lower, fetched later") + raise KsftFailEx("Qstats are lower, fetched later") def qstat_by_ifindex(cfg) -> None: - global netfam - global rtnl + """ Qstats Netlink API tests - querying by ifindex. """ # Construct a map ifindex -> [dump, by-index, dump] ifindexes = {} @@ -93,7 +134,7 @@ def qstat_by_ifindex(cfg) -> None: for entry in stats: ifindexes[entry['ifindex']] = [entry, None, None] - for ifindex in ifindexes.keys(): + for ifindex in ifindexes: entry = netfam.qstats_get({"ifindex": ifindex}, dump=True) ksft_eq(len(entry), 1) ifindexes[entry[0]['ifindex']][1] = entry[0] @@ -145,7 +186,7 @@ def qstat_by_ifindex(cfg) -> None: # Try to get stats for lowest unused ifindex but not 0 devs = rtnl.getlink({}, dump=True) - all_ifindexes = set([dev["ifi-index"] for dev in devs]) + all_ifindexes = set(dev["ifi-index"] for dev in devs) lowest = 2 while lowest in all_ifindexes: lowest += 1 @@ -158,18 +199,20 @@ def qstat_by_ifindex(cfg) -> None: @ksft_disruptive def check_down(cfg) -> None: + """ Test statistics (interface and qstat) are not impacted by ifdown """ + try: qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] except NlError as e: if e.error == errno.EOPNOTSUPP: - raise KsftSkipEx("qstats not supported by the device") + raise KsftSkipEx("qstats not supported by the device") from e raise ip(f"link set dev {cfg.dev['ifname']} down") defer(ip, f"link set dev {cfg.dev['ifname']} up") qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] - for k, v in qstat.items(): + for k in qstat: ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down") # exercise per-queue API to make sure that "device down" state @@ -220,14 +263,15 @@ def procfs_downup_hammer(cfg) -> None: Reading stats via procfs only holds the RCU lock, drivers often try to sleep when reading the stats, or don't protect against races. """ - # Max out the queues, we'll flip between max and 1 + # Set a large number of queues, + # we'll flip between min(max_queues, 64) and 1 channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) if channels['combined-count'] == 0: rx_type = 'rx' else: rx_type = 'combined' cur_queue_cnt = channels[f'{rx_type}-count'] - max_queue_cnt = channels[f'{rx_type}-max'] + max_queue_cnt = min(channels[f'{rx_type}-max'], 64) cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}") defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}") @@ -263,9 +307,12 @@ def procfs_downup_hammer(cfg) -> None: def main() -> None: + """ Ksft boiler plate main """ + with NetDrvEnv(__file__, queue_count=100) as cfg: - ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex, - check_down, procfs_hammer, procfs_downup_hammer], + ksft_run([check_pause, check_fec, check_fec_hist, pkt_byte_sum, + qstat_by_ifindex, check_down, procfs_hammer, + procfs_downup_hammer], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile index 2d5a76d99181..7c58cf82121e 100644 --- a/tools/testing/selftests/drivers/net/team/Makefile +++ b/tools/testing/selftests/drivers/net/team/Makefile @@ -1,11 +1,24 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for net selftests -TEST_PROGS := dev_addr_lists.sh +TEST_PROGS := \ + decoupled_enablement.sh \ + dev_addr_lists.sh \ + non_ether_header_ops.sh \ + options.sh \ + propagation.sh \ + refleak.sh \ + teamd_activebackup.sh \ + transmit_failover.sh \ +# end of TEST_PROGS TEST_INCLUDES := \ + team_lib.sh \ ../bonding/lag_lib.sh \ ../../../net/forwarding/lib.sh \ - ../../../net/lib.sh + ../../../net/in_netns.sh \ + ../../../net/lib.sh \ + ../../../net/lib/sh/defer.sh \ +# end of TEST_INCLUDES include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config index b5e3a3aad4bf..8f04ae419c53 100644 --- a/tools/testing/selftests/drivers/net/team/config +++ b/tools/testing/selftests/drivers/net/team/config @@ -1,5 +1,13 @@ +CONFIG_BONDING=y CONFIG_DUMMY=y CONFIG_IPV6=y CONFIG_MACVLAN=y +CONFIG_NETDEVSIM=m +CONFIG_NET_IPGRE=y CONFIG_NET_TEAM=y +CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=y +CONFIG_NET_TEAM_MODE_BROADCAST=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y +CONFIG_NET_TEAM_MODE_RANDOM=y +CONFIG_NET_TEAM_MODE_ROUNDROBIN=y +CONFIG_VETH=y diff --git a/tools/testing/selftests/drivers/net/team/decoupled_enablement.sh b/tools/testing/selftests/drivers/net/team/decoupled_enablement.sh new file mode 100755 index 000000000000..0d3d9c98e9f5 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/decoupled_enablement.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# These tests verify the decoupled RX and TX enablement of team driver member +# interfaces. +# +# Topology +# +# +---------------------+ NS1 +# | test_team1 | +# | | | +# | eth0 | +# | | | +# | | | +# +---------------------+ +# | +# +---------------------+ NS2 +# | | | +# | | | +# | eth0 | +# | | | +# | test_team2 | +# +---------------------+ + +export ALL_TESTS=" + team_test_tx_enablement + team_test_rx_enablement +" + +test_dir="$(dirname "$0")" +# shellcheck disable=SC1091 +source "${test_dir}/../../../net/lib.sh" +# shellcheck disable=SC1091 +source "${test_dir}/team_lib.sh" + +NS1="" +NS2="" +export NODAD="nodad" +PREFIX_LENGTH="64" +NS1_IP="fd00::1" +NS2_IP="fd00::2" +NS1_IP4="192.168.0.1" +NS2_IP4="192.168.0.2" +MEMBERS=("eth0") +PING_COUNT=5 +PING_TIMEOUT_S=1 +PING_INTERVAL=0.1 + +while getopts "4" opt; do + case $opt in + 4) + echo "IPv4 mode selected." + export NODAD= + PREFIX_LENGTH="24" + NS1_IP="${NS1_IP4}" + NS2_IP="${NS2_IP4}" + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + esac +done + +# This has to be sourced after opts are gathered... +export REQUIRE_MZ=no +export NUM_NETIFS=0 +# shellcheck disable=SC1091 +source "${test_dir}/../../../net/forwarding/lib.sh" + +# Create the network namespaces, veth pair, and team devices in the specified +# mode. +# Globals: +# RET - Used by test infra, set by `check_err` functions. +# Arguments: +# mode - The team driver mode to use for the team devices. +environment_create() +{ + trap cleanup_all_ns EXIT + setup_ns ns1 ns2 + NS1="${NS_LIST[0]}" + NS2="${NS_LIST[1]}" + + # Create the interfaces. + ip -n "${NS1}" link add eth0 type veth peer name eth0 netns "${NS2}" + ip -n "${NS1}" link add test_team1 type team + ip -n "${NS2}" link add test_team2 type team + + # Set up the receiving network namespace's team interface. + setup_team "${NS2}" test_team2 roundrobin "${NS2_IP}" \ + "${PREFIX_LENGTH}" "${MEMBERS[@]}" +} + +# Set a particular option value for team or team port. +# Arguments: +# namespace - The namespace name that has the team. +# option_name - The option name to set. +# option_value - The value to set the option to. +# team_name - The name of team to set the option for. +# member_name - The (optional) optional name of the member port. +set_option_value() +{ + local namespace="$1" + local option_name="$2" + local option_value="$3" + local team_name="$4" + local member_name="$5" + local port_flag="--port=${member_name}" + + ip netns exec "${namespace}" teamnl "${team_name}" setoption \ + "${option_name}" "${option_value}" "${port_flag}" + return $? +} + +# Send some pings and return the ping command return value. +try_ping() +{ + ip netns exec "${NS1}" ping -i "${PING_INTERVAL}" -c "${PING_COUNT}" \ + "${NS2_IP}" -W "${PING_TIMEOUT_S}" +} + +# Checks tcpdump output from net/forwarding lib, and checks if there are any +# ICMP(4 or 6) packets. +# Arguments: +# interface - The interface name to search for. +# ip_address - The destination IP address (4 or 6) to search for. +did_interface_receive_icmp() +{ + local interface="$1" + local ip_address="$2" + local packet_count + + packet_count=$(tcpdump_show "$interface" | grep -c \ + "> ${ip_address}: ICMP") + echo "Packet count for ${interface} was ${packet_count}" + + if [[ "$packet_count" -gt 0 ]]; then + true + else + false + fi +} + +# Test JUST tx enablement with a given mode. +# Globals: +# RET - Used by test infra, set by `check_err` functions. +# Arguments: +# mode - The mode to set the team interfaces to. +team_test_mode_tx_enablement() +{ + local mode="$1" + export RET=0 + + # Set up the sender team with the correct mode. + setup_team "${NS1}" test_team1 "${mode}" "${NS1_IP}" \ + "${PREFIX_LENGTH}" "${MEMBERS[@]}" + check_err $? "Failed to set up sender team" + + ### Scenario 1: Member interface initially enabled. + # Expect ping to pass + try_ping + check_err $? "Ping failed when TX enabled" + + ### Scenario 2: One tx-side interface disabled. + # Expect ping to fail. + set_option_value "${NS1}" tx_enabled false test_team1 eth0 + check_err $? "Failed to disable TX" + tcpdump_start eth0 "${NS2}" + try_ping + check_fail $? "Ping succeeded when TX disabled" + tcpdump_stop eth0 + # Expect no packets to be transmitted, since TX is disabled. + did_interface_receive_icmp eth0 "${NS2_IP}" + check_fail $? "eth0 IS transmitting when TX disabled" + tcpdump_cleanup eth0 + + ### Scenario 3: The interface has tx re-enabled. + # Expect ping to pass. + set_option_value "${NS1}" tx_enabled true test_team1 eth0 + check_err $? "Failed to reenable TX" + try_ping + check_err $? "Ping failed when TX reenabled" + + log_test "TX failover of '${mode}' test" +} + +# Test JUST rx enablement with a given mode. +# Globals: +# RET - Used by test infra, set by `check_err` functions. +# Arguments: +# mode - The mode to set the team interfaces to. +team_test_mode_rx_enablement() +{ + local mode="$1" + export RET=0 + + # Set up the sender team with the correct mode. + setup_team "${NS1}" test_team1 "${mode}" "${NS1_IP}" \ + "${PREFIX_LENGTH}" "${MEMBERS[@]}" + check_err $? "Failed to set up sender team" + + ### Scenario 1: Member interface initially enabled. + # Expect ping to pass + try_ping + check_err $? "Ping failed when RX enabled" + + ### Scenario 2: One rx-side interface disabled. + # Expect ping to fail. + set_option_value "${NS1}" rx_enabled false test_team1 eth0 + check_err $? "Failed to disable RX" + tcpdump_start eth0 "${NS2}" + try_ping + check_fail $? "Ping succeeded when RX disabled" + tcpdump_stop eth0 + # Expect packets to be transmitted, since only RX is disabled. + did_interface_receive_icmp eth0 "${NS2_IP}" + check_err $? "eth0 not transmitting when RX disabled" + tcpdump_cleanup eth0 + + ### Scenario 3: The interface has rx re-enabled. + # Expect ping to pass. + set_option_value "${NS1}" rx_enabled true test_team1 eth0 + check_err $? "Failed to reenable RX" + try_ping + check_err $? "Ping failed when RX reenabled" + + log_test "RX failover of '${mode}' test" +} + +team_test_tx_enablement() +{ + team_test_mode_tx_enablement broadcast + team_test_mode_tx_enablement roundrobin + team_test_mode_tx_enablement random +} + +team_test_rx_enablement() +{ + team_test_mode_rx_enablement broadcast + team_test_mode_rx_enablement roundrobin + team_test_mode_rx_enablement random +} + +require_command teamnl +require_command tcpdump +require_command ping +environment_create +tests_run +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh b/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh new file mode 100755 index 000000000000..948a43576bdc --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2154 +# +# Reproduce the non-Ethernet header_ops confusion scenario with: +# g0 (gre) -> b0 (bond) -> t0 (team) +# +# Before the fix, direct header_ops inheritance in this stack could call +# callbacks with the wrong net_device context and crash. + +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/lib.sh + +trap cleanup_all_ns EXIT + +setup_ns ns1 + +ip -n "$ns1" link add d0 type dummy +ip -n "$ns1" addr add 10.10.10.1/24 dev d0 +ip -n "$ns1" link set d0 up + +ip -n "$ns1" link add g0 type gre local 10.10.10.1 +ip -n "$ns1" link add b0 type bond mode active-backup +ip -n "$ns1" link add t0 type team + +ip -n "$ns1" link set g0 master b0 +ip -n "$ns1" link set b0 master t0 + +ip -n "$ns1" link set g0 up +ip -n "$ns1" link set b0 up +ip -n "$ns1" link set t0 up + +# IPv6 address assignment triggers MLD join reports that call +# dev_hard_header() on t0, exercising the inherited header_ops path. +ip -n "$ns1" -6 addr add 2001:db8:1::1/64 dev t0 nodad +for i in $(seq 1 20); do + ip netns exec "$ns1" ping -6 -I t0 ff02::1 -c1 -W1 &>/dev/null || true +done + +echo "PASS: non-Ethernet header_ops stacking did not crash" +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/team/options.sh b/tools/testing/selftests/drivers/net/team/options.sh new file mode 100755 index 000000000000..66c0cb896dad --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/options.sh @@ -0,0 +1,285 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# These tests verify basic set and get functionality of the team +# driver options over netlink. + +# Run in private netns. +test_dir="$(dirname "$0")" +if [[ $# -eq 0 ]]; then + "${test_dir}"/../../../net/in_netns.sh "$0" __subprocess + exit $? +fi + +export ALL_TESTS=" + team_test_options + team_test_enabled_implicit_changes + team_test_rx_enabled_implicit_changes + team_test_tx_enabled_implicit_changes +" + +# shellcheck disable=SC1091 +source "${test_dir}/../../../net/lib.sh" + +TEAM_PORT="team0" +MEMBER_PORT="dummy0" + +setup() +{ + ip link add name "${MEMBER_PORT}" type dummy + ip link add name "${TEAM_PORT}" type team +} + +get_and_check_value() +{ + local option_name="$1" + local expected_value="$2" + local port_flag="$3" + + local value_from_get + + if ! value_from_get=$(teamnl "${TEAM_PORT}" getoption "${option_name}" \ + "${port_flag}"); then + echo "Could not get option '${option_name}'" >&2 + return 1 + fi + + if [[ "${value_from_get}" != "${expected_value}" ]]; then + echo "Incorrect value for option '${option_name}'" >&2 + echo "get (${value_from_get}) != set (${expected_value})" >&2 + return 1 + fi +} + +set_and_check_get() +{ + local option_name="$1" + local option_value="$2" + local port_flag="$3" + + local value_from_get + + if ! teamnl "${TEAM_PORT}" setoption "${option_name}" \ + "${option_value}" "${port_flag}"; then + echo "'setoption ${option_name} ${option_value}' failed" >&2 + return 1 + fi + + get_and_check_value "${option_name}" "${option_value}" "${port_flag}" + return $? +} + +# Get a "port flag" to pass to the `teamnl` command. +# E.g. $1="dummy0" -> "port=dummy0", +# $1="" -> "" +get_port_flag() +{ + local port_name="$1" + + if [[ -n "${port_name}" ]]; then + echo "--port=${port_name}" + fi +} + +attach_port_if_specified() +{ + local port_name="$1" + + if [[ -n "${port_name}" ]]; then + ip link set dev "${port_name}" master "${TEAM_PORT}" + return $? + fi +} + +detach_port_if_specified() +{ + local port_name="$1" + + if [[ -n "${port_name}" ]]; then + ip link set dev "${port_name}" nomaster + return $? + fi +} + +# Test that an option's get value matches its set value. +# Globals: +# RET - Used by testing infra like `check_err`. +# EXIT_STATUS - Used by `log_test` for whole script exit value. +# Arguments: +# option_name - The name of the option. +# value_1 - The first value to try setting. +# value_2 - The second value to try setting. +# port_name - The (optional) name of the attached port. +team_test_option() +{ + local option_name="$1" + local value_1="$2" + local value_2="$3" + local possible_values="$2 $3 $2" + local port_name="$4" + local port_flag + + RET=0 + + echo "Setting '${option_name}' to '${value_1}' and '${value_2}'" + + attach_port_if_specified "${port_name}" + check_err $? "Couldn't attach ${port_name} to master" + port_flag=$(get_port_flag "${port_name}") + + # Set and get both possible values. + for value in ${possible_values}; do + set_and_check_get "${option_name}" "${value}" "${port_flag}" + check_err $? "Failed to set '${option_name}' to '${value}'" + done + + detach_port_if_specified "${port_name}" + check_err $? "Couldn't detach ${port_name} from its master" + + log_test "Set + Get '${option_name}' test" +} + +# Test that getting a non-existant option fails. +# Globals: +# RET - Used by testing infra like `check_err`. +# EXIT_STATUS - Used by `log_test` for whole script exit value. +# Arguments: +# option_name - The name of the option. +# port_name - The (optional) name of the attached port. +team_test_get_option_fails() +{ + local option_name="$1" + local port_name="$2" + local port_flag + + RET=0 + + attach_port_if_specified "${port_name}" + check_err $? "Couldn't attach ${port_name} to master" + port_flag=$(get_port_flag "${port_name}") + + # Just confirm that getting the value fails. + teamnl "${TEAM_PORT}" getoption "${option_name}" "${port_flag}" + check_fail $? "Shouldn't be able to get option '${option_name}'" + + detach_port_if_specified "${port_name}" + + log_test "Get '${option_name}' fails" +} + +team_test_options() +{ + # Wrong option name behavior. + team_test_get_option_fails fake_option1 + team_test_get_option_fails fake_option2 "${MEMBER_PORT}" + + # Correct set and get behavior. + team_test_option mode activebackup loadbalance + team_test_option notify_peers_count 0 5 + team_test_option notify_peers_interval 0 5 + team_test_option mcast_rejoin_count 0 5 + team_test_option mcast_rejoin_interval 0 5 + team_test_option enabled true false "${MEMBER_PORT}" + team_test_option rx_enabled true false "${MEMBER_PORT}" + team_test_option tx_enabled true false "${MEMBER_PORT}" + team_test_option user_linkup true false "${MEMBER_PORT}" + team_test_option user_linkup_enabled true false "${MEMBER_PORT}" + team_test_option priority 10 20 "${MEMBER_PORT}" + team_test_option queue_id 0 1 "${MEMBER_PORT}" +} + +team_test_enabled_implicit_changes() +{ + export RET=0 + + attach_port_if_specified "${MEMBER_PORT}" + check_err $? "Couldn't attach ${MEMBER_PORT} to master" + + # Set enabled to true. + set_and_check_get enabled true "--port=${MEMBER_PORT}" + check_err $? "Failed to set 'enabled' to true" + + # Show that both rx enabled and tx enabled are true. + get_and_check_value rx_enabled true "--port=${MEMBER_PORT}" + check_err $? "'Rx_enabled' wasn't implicitly set to true" + get_and_check_value tx_enabled true "--port=${MEMBER_PORT}" + check_err $? "'Tx_enabled' wasn't implicitly set to true" + + # Set enabled to false. + set_and_check_get enabled false "--port=${MEMBER_PORT}" + check_err $? "Failed to set 'enabled' to false" + + # Show that both rx enabled and tx enabled are false. + get_and_check_value rx_enabled false "--port=${MEMBER_PORT}" + check_err $? "'Rx_enabled' wasn't implicitly set to false" + get_and_check_value tx_enabled false "--port=${MEMBER_PORT}" + check_err $? "'Tx_enabled' wasn't implicitly set to false" + + log_test "'Enabled' implicit changes" +} + +team_test_rx_enabled_implicit_changes() +{ + export RET=0 + + attach_port_if_specified "${MEMBER_PORT}" + check_err $? "Couldn't attach ${MEMBER_PORT} to master" + + # Set enabled to true. + set_and_check_get enabled true "--port=${MEMBER_PORT}" + check_err $? "Failed to set 'enabled' to true" + + # Set rx_enabled to false. + set_and_check_get rx_enabled false "--port=${MEMBER_PORT}" + check_err $? "Failed to set 'rx_enabled' to false" + + # Show that enabled is false. + get_and_check_value enabled false "--port=${MEMBER_PORT}" + check_err $? "'enabled' wasn't implicitly set to false" + + # Set rx_enabled to true. + set_and_check_get rx_enabled true "--port=${MEMBER_PORT}" + check_err $? "Failed to set 'rx_enabled' to true" + + # Show that enabled is true. + get_and_check_value enabled true "--port=${MEMBER_PORT}" + check_err $? "'enabled' wasn't implicitly set to true" + + log_test "'Rx_enabled' implicit changes" +} + +team_test_tx_enabled_implicit_changes() +{ + export RET=0 + + attach_port_if_specified "${MEMBER_PORT}" + check_err $? "Couldn't attach ${MEMBER_PORT} to master" + + # Set enabled to true. + set_and_check_get enabled true "--port=${MEMBER_PORT}" + check_err $? "Failed to set 'enabled' to true" + + # Set tx_enabled to false. + set_and_check_get tx_enabled false "--port=${MEMBER_PORT}" + check_err $? "Failed to set 'tx_enabled' to false" + + # Show that enabled is false. + get_and_check_value enabled false "--port=${MEMBER_PORT}" + check_err $? "'enabled' wasn't implicitly set to false" + + # Set tx_enabled to true. + set_and_check_get tx_enabled true "--port=${MEMBER_PORT}" + check_err $? "Failed to set 'tx_enabled' to true" + + # Show that enabled is true. + get_and_check_value enabled true "--port=${MEMBER_PORT}" + check_err $? "'enabled' wasn't implicitly set to true" + + log_test "'Tx_enabled' implicit changes" +} + + +require_command teamnl +setup +tests_run +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/team/propagation.sh b/tools/testing/selftests/drivers/net/team/propagation.sh new file mode 100755 index 000000000000..4bea75b79878 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/propagation.sh @@ -0,0 +1,80 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -e + +NSIM_LRO_ID=$((256 + RANDOM % 256)) +NSIM_LRO_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_LRO_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device + +cleanup() +{ + set +e + ip link del dummyteam &>/dev/null + ip link del team0 &>/dev/null + echo $NSIM_LRO_ID > $NSIM_DEV_SYS_DEL + modprobe -r netdevsim +} + +# Trigger LRO propagation to the lower. +# https://lore.kernel.org/netdev/aBvOpkIoxcr9PfDg@mini-arch/ +team_lro() +{ + # using netdevsim because it supports NETIF_F_LRO + NSIM_LRO_NAME=$(find $NSIM_LRO_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_LRO_SYS/net -exec basename {} \;) + + ip link add name team0 type team + ip link set $NSIM_LRO_NAME down + ip link set dev $NSIM_LRO_NAME master team0 + ip link set team0 up + ethtool -K team0 large-receive-offload off + + ip link del team0 +} + +# Trigger promisc propagation to the lower during IFLA_MASTER. +# https://lore.kernel.org/netdev/20250506032328.3003050-1-sdf@fomichev.me/ +team_promisc() +{ + ip link add name dummyteam type dummy + ip link add name team0 type team + ip link set dummyteam down + ip link set team0 promisc on + ip link set dev dummyteam master team0 + ip link set team0 up + + ip link del team0 + ip link del dummyteam +} + +# Trigger promisc propagation to the lower via netif_change_flags (aka +# ndo_change_rx_flags). +# https://lore.kernel.org/netdev/20250514220319.3505158-1-stfomichev@gmail.com/ +team_change_flags() +{ + ip link add name dummyteam type dummy + ip link add name team0 type team + ip link set dummyteam down + ip link set dev dummyteam master team0 + ip link set team0 up + ip link set team0 promisc on + + # Make sure we can add more L2 addresses without any issues. + ip link add link team0 address 00:00:00:00:00:01 team0.1 type macvlan + ip link set team0.1 up + + ip link del team0.1 + ip link del team0 + ip link del dummyteam +} + +trap cleanup EXIT +modprobe netdevsim || : +echo $NSIM_LRO_ID > $NSIM_DEV_SYS_NEW +udevadm settle +team_lro +team_promisc +team_change_flags diff --git a/tools/testing/selftests/drivers/net/team/refleak.sh b/tools/testing/selftests/drivers/net/team/refleak.sh new file mode 100755 index 000000000000..ef08213ab964 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/refleak.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2154 + +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/lib.sh + +trap cleanup_all_ns EXIT + +# Test that there is no reference count leak and that dummy1 can be deleted. +# https://lore.kernel.org/netdev/4d69abe1-ca8d-4f0b-bcf8-13899b211e57@I-love.SAKURA.ne.jp/ +setup_ns ns1 ns2 +ip -n "$ns1" link add name team1 type team +ip -n "$ns1" link add name dummy1 mtu 1499 type dummy +ip -n "$ns1" link set dev dummy1 master team1 +ip -n "$ns1" link set dev dummy1 netns "$ns2" +ip -n "$ns2" link del dev dummy1 diff --git a/tools/testing/selftests/drivers/net/team/settings b/tools/testing/selftests/drivers/net/team/settings new file mode 100644 index 000000000000..694d70710ff0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/settings @@ -0,0 +1 @@ +timeout=300 diff --git a/tools/testing/selftests/drivers/net/team/team_lib.sh b/tools/testing/selftests/drivers/net/team/team_lib.sh new file mode 100644 index 000000000000..02ef0ee02d6a --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/team_lib.sh @@ -0,0 +1,174 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +test_dir="$(dirname "$0")" +export REQUIRE_MZ=no +export NUM_NETIFS=0 +# shellcheck disable=SC1091 +source "${test_dir}/../../../net/forwarding/lib.sh" + +TCP_PORT="43434" + +# Create a team interface inside of a given network namespace with a given +# mode, members, and IP address. +# Arguments: +# namespace - Network namespace to put the team interface into. +# team - The name of the team interface to setup. +# mode - The team mode of the interface. +# ip_address - The IP address to assign to the team interface. +# prefix_length - The prefix length for the IP address subnet. +# $@ - members - The member interfaces of the aggregation. +setup_team() +{ + local namespace=$1 + local team=$2 + local mode=$3 + local ip_address=$4 + local prefix_length=$5 + shift 5 + local members=("$@") + + # Prerequisite: team must have no members + for member in "${members[@]}"; do + ip -n "${namespace}" link set "${member}" nomaster + done + + # Prerequisite: team must have no address in order to set it + # shellcheck disable=SC2086 + ip -n "${namespace}" addr del "${ip_address}/${prefix_length}" \ + ${NODAD} dev "${team}" + + echo "Setting team in ${namespace} to mode ${mode}" + + if ! ip -n "${namespace}" link set "${team}" down; then + echo "Failed to bring team device down" + return 1 + fi + if ! ip netns exec "${namespace}" teamnl "${team}" setoption mode \ + "${mode}"; then + echo "Failed to set ${team} mode to '${mode}'" + return 1 + fi + + # Aggregate the members into teams. + for member in "${members[@]}"; do + ip -n "${namespace}" link set "${member}" master "${team}" + done + + # Bring team devices up and give them addresses. + if ! ip -n "${namespace}" link set "${team}" up; then + echo "Failed to set ${team} up" + return 1 + fi + + # shellcheck disable=SC2086 + if ! ip -n "${namespace}" addr add "${ip_address}/${prefix_length}" \ + ${NODAD} dev "${team}"; then + echo "Failed to give ${team} IP address in ${namespace}" + return 1 + fi +} + +# This is global used to keep track of the sender's iperf3 process, so that it +# can be terminated. +declare sender_pid + +# Start sending and receiving TCP traffic with iperf3. +# Globals: +# sender_pid - The process ID of the iperf3 sender process. Used to kill it +# later. +start_listening_and_sending() +{ + ip netns exec "${NS2}" iperf3 -s -p "${TCP_PORT}" --logfile /dev/null & + # Wait for server to become reachable before starting client. + slowwait 5 ip netns exec "${NS1}" iperf3 -c "${NS2_IP}" -p \ + "${TCP_PORT}" -t 1 --logfile /dev/null + ip netns exec "${NS1}" iperf3 -c "${NS2_IP}" -p "${TCP_PORT}" -b 1M -l \ + 1K -t 0 --logfile /dev/null & + sender_pid=$! +} + +# Stop sending TCP traffic with iperf3. +# Globals: +# sender_pid - The process ID of the iperf3 sender process. +stop_sending_and_listening() +{ + kill "${sender_pid}" && wait "${sender_pid}" 2>/dev/null || true +} + +# Monitor for TCP traffic with Tcpdump, save results to temp files. +# Arguments: +# namespace - The network namespace to run tcpdump inside of. +# $@ - interfaces - The interfaces to listen to. +save_tcpdump_outputs() +{ + local namespace=$1 + shift 1 + local interfaces=("$@") + + for interface in "${interfaces[@]}"; do + tcpdump_start "${interface}" "${namespace}" + done + + sleep 1 + + for interface in "${interfaces[@]}"; do + tcpdump_stop_nosleep "${interface}" + done +} + +clear_tcpdump_outputs() +{ + local interfaces=("$@") + + for interface in "${interfaces[@]}"; do + tcpdump_cleanup "${interface}" + done +} + +# Read Tcpdump output, determine packet counts. +# Arguments: +# interface - The name of the interface to count packets for. +# ip_address - The destination IP address. +did_interface_receive() +{ + local interface="$1" + local ip_address="$2" + local packet_count + + packet_count=$(tcpdump_show "$interface" | grep -c \ + "> ${ip_address}.${TCP_PORT}") + echo "Packet count for ${interface} was ${packet_count}" + + if [[ "${packet_count}" -gt 0 ]]; then + true + else + false + fi +} + +# Return true if the given interface in the given namespace does NOT receive +# traffic over a 1 second period. +# Arguments: +# interface - The name of the interface. +# ip_address - The destination IP address. +# namespace - The name of the namespace that the interface is in. +check_no_traffic() +{ + local interface="$1" + local ip_address="$2" + local namespace="$3" + local rc + + save_tcpdump_outputs "${namespace}" "${interface}" + did_interface_receive "${interface}" "${ip_address}" + rc=$? + + clear_tcpdump_outputs "${interface}" + + if [[ "${rc}" -eq 0 ]]; then + return 1 + else + return 0 + fi +} diff --git a/tools/testing/selftests/drivers/net/team/teamd_activebackup.sh b/tools/testing/selftests/drivers/net/team/teamd_activebackup.sh new file mode 100755 index 000000000000..2b26a697e179 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/teamd_activebackup.sh @@ -0,0 +1,246 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# These tests verify that teamd is able to enable and disable ports via the +# active backup runner. +# +# Topology: +# +# +-------------------------+ NS1 +# | test_team1 | +# | + | +# | eth0 | eth1 | +# | +---+---+ | +# | | | | +# +-------------------------+ +# | | +# +-------------------------+ NS2 +# | | | | +# | +-------+ | +# | eth0 | eth1 | +# | + | +# | test_team2 | +# +-------------------------+ + +export ALL_TESTS="teamd_test_active_backup" + +test_dir="$(dirname "$0")" +# shellcheck disable=SC1091 +source "${test_dir}/../../../net/lib.sh" +# shellcheck disable=SC1091 +source "${test_dir}/team_lib.sh" + +NS1="" +NS2="" +export NODAD="nodad" +PREFIX_LENGTH="64" +NS1_IP="fd00::1" +NS2_IP="fd00::2" +NS1_IP4="192.168.0.1" +NS2_IP4="192.168.0.2" +NS1_TEAMD_CONF="" +NS2_TEAMD_CONF="" +NS1_TEAMD_PID="" +NS2_TEAMD_PID="" + +while getopts "4" opt; do + case $opt in + 4) + echo "IPv4 mode selected." + export NODAD= + PREFIX_LENGTH="24" + NS1_IP="${NS1_IP4}" + NS2_IP="${NS2_IP4}" + ;; + \?) + echo "Invalid option: -${OPTARG}" >&2 + exit 1 + ;; + esac +done + +teamd_config_create() +{ + local runner=$1 + local dev=$2 + local conf + + conf=$(mktemp) + + cat > "${conf}" <<-EOF + { + "device": "${dev}", + "runner": {"name": "${runner}"}, + "ports": { + "eth0": {}, + "eth1": {} + } + } + EOF + echo "${conf}" +} + +# Create the network namespaces, veth pair, and team devices in the specified +# runner. +# Globals: +# RET - Used by test infra, set by `check_err` functions. +# Arguments: +# runner - The Teamd runner to use for the Team devices. +environment_create() +{ + local runner=$1 + + echo "Setting up two-link aggregation for runner ${runner}" + echo "Teamd version is: $(teamd --version)" + trap environment_destroy EXIT + + setup_ns ns1 ns2 + NS1="${NS_LIST[0]}" + NS2="${NS_LIST[1]}" + + for link in $(seq 0 1); do + ip -n "${NS1}" link add "eth${link}" type veth peer name \ + "eth${link}" netns "${NS2}" + check_err $? "Failed to create veth pair" + done + + NS1_TEAMD_CONF=$(teamd_config_create "${runner}" "test_team1") + NS2_TEAMD_CONF=$(teamd_config_create "${runner}" "test_team2") + echo "Conf files are ${NS1_TEAMD_CONF} and ${NS2_TEAMD_CONF}" + + ip netns exec "${NS1}" teamd -d -f "${NS1_TEAMD_CONF}" + check_err $? "Failed to create team device in ${NS1}" + NS1_TEAMD_PID=$(pgrep -f "teamd -d -f ${NS1_TEAMD_CONF}") + + ip netns exec "${NS2}" teamd -d -f "${NS2_TEAMD_CONF}" + check_err $? "Failed to create team device in ${NS2}" + NS2_TEAMD_PID=$(pgrep -f "teamd -d -f ${NS2_TEAMD_CONF}") + + echo "Created team devices" + echo "Teamd PIDs are ${NS1_TEAMD_PID} and ${NS2_TEAMD_PID}" + + ip -n "${NS1}" link set test_team1 up + check_err $? "Failed to set test_team1 up in ${NS1}" + ip -n "${NS2}" link set test_team2 up + check_err $? "Failed to set test_team2 up in ${NS2}" + + ip -n "${NS1}" addr add "${NS1_IP}/${PREFIX_LENGTH}" "${NODAD}" dev \ + test_team1 + check_err $? "Failed to add address to team device in ${NS1}" + ip -n "${NS2}" addr add "${NS2_IP}/${PREFIX_LENGTH}" "${NODAD}" dev \ + test_team2 + check_err $? "Failed to add address to team device in ${NS2}" + + slowwait 2 timeout 0.5 ip netns exec "${NS1}" ping -W 1 -c 1 "${NS2_IP}" +} + +# Tear down the environment: kill teamd and delete network namespaces. +environment_destroy() +{ + echo "Tearing down two-link aggregation" + + rm "${NS1_TEAMD_CONF}" + rm "${NS2_TEAMD_CONF}" + + # First, try graceful teamd teardown. + ip netns exec "${NS1}" teamd -k -t test_team1 + ip netns exec "${NS2}" teamd -k -t test_team2 + + # If teamd can't be killed gracefully, then sigkill. + if kill -0 "${NS1_TEAMD_PID}" 2>/dev/null; then + echo "Sending sigkill to teamd for test_team1" + kill -9 "${NS1_TEAMD_PID}" + rm -f /var/run/teamd/test_team1.{pid,sock} + fi + if kill -0 "${NS2_TEAMD_PID}" 2>/dev/null; then + echo "Sending sigkill to teamd for test_team2" + kill -9 "${NS2_TEAMD_PID}" + rm -f /var/run/teamd/test_team2.{pid,sock} + fi + cleanup_all_ns +} + +# Change the active port for an active-backup mode team. +# Arguments: +# namespace - The network namespace that the team is in. +# team - The name of the team. +# active_port - The port to make active. +set_active_port() +{ + local namespace=$1 + local team=$2 + local active_port=$3 + + ip netns exec "${namespace}" teamdctl "${team}" state item set \ + runner.active_port "${active_port}" + slowwait 2 bash -c "ip netns exec ${namespace} teamdctl ${team} state \ + item get runner.active_port | grep -q ${active_port}" +} + +# Wait for an interface to stop receiving traffic. If it keeps receiving traffic +# for the duration of the timeout, then return an error. +# Arguments: +# - namespace - The network namespace that the interface is in. +# - interface - The name of the interface. +wait_to_stop_receiving() +{ + local namespace=$1 + local interface=$2 + + echo "Waiting for ${interface} in ${namespace} to stop receiving" + slowwait 10 check_no_traffic "${interface}" "${NS2_IP}" \ + "${namespace}" +} + +# Test that active backup runner can change active ports. +# Globals: +# RET - Used by test infra, set by `check_err` functions. +teamd_test_active_backup() +{ + export RET=0 + + start_listening_and_sending + + ### Scenario 1: Don't manually set active port, just make sure team + # works. + save_tcpdump_outputs "${NS2}" test_team2 + did_interface_receive test_team2 "${NS2_IP}" + check_err $? "Traffic did not reach team interface in NS2." + clear_tcpdump_outputs test_team2 + + ### Scenario 2: Choose active port. + set_active_port "${NS1}" test_team1 eth1 + set_active_port "${NS2}" test_team2 eth1 + + wait_to_stop_receiving "${NS2}" eth0 + save_tcpdump_outputs "${NS2}" eth0 eth1 + did_interface_receive eth0 "${NS2_IP}" + check_fail $? "eth0 IS transmitting when inactive" + did_interface_receive eth1 "${NS2_IP}" + check_err $? "eth1 not transmitting when active" + clear_tcpdump_outputs eth0 eth1 + + ### Scenario 3: Change active port. + set_active_port "${NS1}" test_team1 eth0 + set_active_port "${NS2}" test_team2 eth0 + + wait_to_stop_receiving "${NS2}" eth1 + save_tcpdump_outputs "${NS2}" eth0 eth1 + did_interface_receive eth0 "${NS2_IP}" + check_err $? "eth0 not transmitting when active" + did_interface_receive eth1 "${NS2_IP}" + check_fail $? "eth1 IS transmitting when inactive" + clear_tcpdump_outputs eth0 eth1 + + log_test "teamd active backup runner test" + + stop_sending_and_listening +} + +require_command teamd +require_command teamdctl +require_command iperf3 +require_command tcpdump +environment_create activebackup +tests_run +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/team/transmit_failover.sh b/tools/testing/selftests/drivers/net/team/transmit_failover.sh new file mode 100755 index 000000000000..b2bdcd27bc98 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/transmit_failover.sh @@ -0,0 +1,158 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# These tests verify the basic failover capability of the team driver via the +# `enabled` team driver option across different team driver modes. This does not +# rely on teamd, and instead just uses teamnl to set the `enabled` option +# directly. +# +# Topology: +# +# +-------------------------+ NS1 +# | test_team1 | +# | + | +# | eth0 | eth1 | +# | +---+---+ | +# | | | | +# +-------------------------+ +# | | +# +-------------------------+ NS2 +# | | | | +# | +-------+ | +# | eth0 | eth1 | +# | + | +# | test_team2 | +# +-------------------------+ + +export ALL_TESTS="team_test_failover" + +test_dir="$(dirname "$0")" +# shellcheck disable=SC1091 +source "${test_dir}/../../../net/lib.sh" +# shellcheck disable=SC1091 +source "${test_dir}/team_lib.sh" + +NS1="" +NS2="" +export NODAD="nodad" +PREFIX_LENGTH="64" +NS1_IP="fd00::1" +NS2_IP="fd00::2" +NS1_IP4="192.168.0.1" +NS2_IP4="192.168.0.2" +MEMBERS=("eth0" "eth1") + +while getopts "4" opt; do + case $opt in + 4) + echo "IPv4 mode selected." + export NODAD= + PREFIX_LENGTH="24" + NS1_IP="${NS1_IP4}" + NS2_IP="${NS2_IP4}" + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + esac +done + +# Create the network namespaces, veth pair, and team devices in the specified +# mode. +# Globals: +# RET - Used by test infra, set by `check_err` functions. +# Arguments: +# mode - The team driver mode to use for the team devices. +environment_create() +{ + trap cleanup_all_ns EXIT + setup_ns ns1 ns2 + NS1="${NS_LIST[0]}" + NS2="${NS_LIST[1]}" + + # Create the interfaces. + ip -n "${NS1}" link add eth0 type veth peer name eth0 netns "${NS2}" + ip -n "${NS1}" link add eth1 type veth peer name eth1 netns "${NS2}" + ip -n "${NS1}" link add test_team1 type team + ip -n "${NS2}" link add test_team2 type team + + # Set up the receiving network namespace's team interface. + setup_team "${NS2}" test_team2 roundrobin "${NS2_IP}" \ + "${PREFIX_LENGTH}" "${MEMBERS[@]}" +} + + +# Check that failover works for a specific team driver mode. +# Globals: +# RET - Used by test infra, set by `check_err` functions. +# Arguments: +# mode - The mode to set the team interfaces to. +team_test_mode_failover() +{ + local mode="$1" + export RET=0 + + # Set up the sender team with the correct mode. + setup_team "${NS1}" test_team1 "${mode}" "${NS1_IP}" \ + "${PREFIX_LENGTH}" "${MEMBERS[@]}" + check_err $? "Failed to set up sender team" + + start_listening_and_sending + + ### Scenario 1: All interfaces initially enabled. + save_tcpdump_outputs "${NS2}" "${MEMBERS[@]}" + did_interface_receive eth0 "${NS2_IP}" + check_err $? "eth0 not transmitting when both links enabled" + did_interface_receive eth1 "${NS2_IP}" + check_err $? "eth1 not transmitting when both links enabled" + clear_tcpdump_outputs "${MEMBERS[@]}" + + ### Scenario 2: One tx-side interface disabled. + ip netns exec "${NS1}" teamnl test_team1 setoption enabled false \ + --port=eth1 + slowwait 2 bash -c "ip netns exec ${NS1} teamnl test_team1 getoption \ + enabled --port=eth1 | grep -q false" + + save_tcpdump_outputs "${NS2}" "${MEMBERS[@]}" + did_interface_receive eth0 "${NS2_IP}" + check_err $? "eth0 not transmitting when enabled" + did_interface_receive eth1 "${NS2_IP}" + check_fail $? "eth1 IS transmitting when disabled" + clear_tcpdump_outputs "${MEMBERS[@]}" + + ### Scenario 3: The interface is re-enabled. + ip netns exec "${NS1}" teamnl test_team1 setoption enabled true \ + --port=eth1 + slowwait 2 bash -c "ip netns exec ${NS1} teamnl test_team1 getoption \ + enabled --port=eth1 | grep -q true" + + save_tcpdump_outputs "${NS2}" "${MEMBERS[@]}" + did_interface_receive eth0 "${NS2_IP}" + check_err $? "eth0 not transmitting when both links enabled" + did_interface_receive eth1 "${NS2_IP}" + check_err $? "eth1 not transmitting when both links enabled" + clear_tcpdump_outputs "${MEMBERS[@]}" + + log_test "Failover of '${mode}' test" + + # Clean up + stop_sending_and_listening +} + +team_test_failover() +{ + team_test_mode_failover broadcast + team_test_mode_failover roundrobin + team_test_mode_failover random + # Don't test `activebackup` or `loadbalance` modes, since they are too + # complicated for just setting `enabled` to work. They use more than + # the `enabled` option for transmit. +} + +require_command teamnl +require_command iperf3 +require_command tcpdump +environment_create +tests_run +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/virtio_net/Makefile b/tools/testing/selftests/drivers/net/virtio_net/Makefile index 7ec7cd3ab2cc..868ece3fea1f 100644 --- a/tools/testing/selftests/drivers/net/virtio_net/Makefile +++ b/tools/testing/selftests/drivers/net/virtio_net/Makefile @@ -1,15 +1,12 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT -TEST_PROGS = basic_features.sh \ - # +TEST_PROGS = basic_features.sh -TEST_FILES = \ - virtio_net_common.sh \ - # +TEST_FILES = virtio_net_common.sh TEST_INCLUDES = \ - ../../../net/forwarding/lib.sh \ - ../../../net/lib.sh \ - # + ../../../net/forwarding/lib.sh \ + ../../../net/lib.sh \ +# end of TEST_INCLUDES include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py new file mode 100755 index 000000000000..2ad5932299e8 --- /dev/null +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -0,0 +1,756 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +This file contains tests to verify native XDP support in network drivers. +The tests utilize the BPF program `xdp_native.bpf.o` from the `selftests.net.lib` +directory, with each test focusing on a specific aspect of XDP functionality. +""" +import random +import string +from dataclasses import dataclass +from enum import Enum + +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_ne, ksft_pr +from lib.py import KsftNamedVariant, ksft_variants +from lib.py import KsftFailEx, KsftSkipEx, NetDrvEpEnv +from lib.py import EthtoolFamily, NetdevFamily, NlError +from lib.py import bkg, cmd, rand_port, wait_port_listen +from lib.py import ip, defer +from lib.py import bpf_map_set, bpf_map_dump, bpf_prog_map_ids + + +class TestConfig(Enum): + """Enum for XDP configuration options.""" + MODE = 0 # Configures the BPF program for a specific test + PORT = 1 # Port configuration to communicate with the remote host + ADJST_OFFSET = 2 # Tail/Head adjustment offset for extension/shrinking + ADJST_TAG = 3 # Adjustment tag to annotate the start and end of extension + + +class XDPAction(Enum): + """Enum for XDP actions.""" + PASS = 0 # Pass the packet up to the stack + DROP = 1 # Drop the packet + TX = 2 # Route the packet to the remote host + TAIL_ADJST = 3 # Adjust the tail of the packet + HEAD_ADJST = 4 # Adjust the head of the packet + + +class XDPStats(Enum): + """Enum for XDP statistics.""" + RX = 0 # Count of valid packets received for testing + PASS = 1 # Count of packets passed up to the stack + DROP = 2 # Count of packets dropped + TX = 3 # Count of incoming packets routed to the remote host + ABORT = 4 # Count of packets that were aborted + + +@dataclass +class BPFProgInfo: + """Data class to store information about a BPF program.""" + name: str # Name of the BPF program + file: str # BPF program object file + xdp_sec: str = "xdp" # XDP section name (e.g., "xdp" or "xdp.frags") + mtu: int = 1500 # Maximum Transmission Unit, default is 1500 + + +def _exchg_udp(cfg, port, test_string): + """ + Exchanges UDP packets between a local and remote host using the socat tool. + + Args: + cfg: Configuration object containing network settings. + port: Port number to use for the UDP communication. + test_string: String that the remote host will send. + + Returns: + The string received by the test host. + """ + cfg.require_cmd("socat", remote=True) + + rx_udp_cmd = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT" + tx_udp_cmd = f"echo -n {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port},shut-none" + + with bkg(rx_udp_cmd, exit_wait=True) as nc: + wait_port_listen(port, proto="udp") + cmd(tx_udp_cmd, host=cfg.remote, shell=True) + + return nc.stdout.strip() + + +def _test_udp(cfg, port, size=256): + """ + Tests UDP packet exchange between a local and remote host. + + Args: + cfg: Configuration object containing network settings. + port: Port number to use for the UDP communication. + size: The length of the test string to be exchanged, default is 256 characters. + + Returns: + bool: True if the received string matches the sent string, False otherwise. + """ + test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(size)) + recvd_str = _exchg_udp(cfg, port, test_str) + + return recvd_str == test_str + + +def _load_xdp_prog(cfg, bpf_info): + """ + Loads an XDP program onto a network interface. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing information about the BPF program. + + Returns: + dict: A dictionary containing the XDP program ID, name, and associated map IDs. + """ + abs_path = cfg.net_lib_dir / bpf_info.file + prog_info = {} + + cmd(f"ip link set dev {cfg.remote_ifname} mtu {bpf_info.mtu}", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) + + cmd( + f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdpdrv obj {abs_path} sec {bpf_info.xdp_sec}", + shell=True + ) + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpdrv off") + + xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0] + prog_info["id"] = xdp_info["xdp"]["prog"]["id"] + prog_info["name"] = xdp_info["xdp"]["prog"]["name"] + prog_info["maps"] = bpf_prog_map_ids(prog_info["id"]) + + return prog_info + + +def _get_stats(xdp_map_id): + """ + Retrieves and formats statistics from an XDP map. + + Args: + xdp_map_id: The ID of the XDP map from which to retrieve statistics. + + Returns: + A dictionary containing formatted packet statistics for various XDP actions. + The keys are based on the XDPStats Enum values. + + Raises: + KsftFailEx: If the stats retrieval fails. + """ + stats = bpf_map_dump(xdp_map_id) + if not stats: + raise KsftFailEx(f"Failed to get stats for map {xdp_map_id}") + + return stats + + +def _test_pass(cfg, bpf_info, msg_sz): + """ + Tests the XDP_PASS action by exchanging UDP packets. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing information about the BPF program. + msg_sz: Size of the test message to send. + """ + + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + bpf_map_set("map_xdp_setup", TestConfig.MODE.value, XDPAction.PASS.value) + bpf_map_set("map_xdp_setup", TestConfig.PORT.value, port) + + ksft_eq(_test_udp(cfg, port, msg_sz), True, "UDP packet exchange failed") + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + + ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should not be zero") + ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.PASS.value], "RX and PASS stats mismatch") + + +def test_xdp_native_pass_sb(cfg): + """ + Tests the XDP_PASS action for single buffer case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + + _test_pass(cfg, bpf_info, 256) + + +def test_xdp_native_pass_mb(cfg): + """ + Tests the XDP_PASS action for a multi-buff size. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + + _test_pass(cfg, bpf_info, 8000) + + +def _test_drop(cfg, bpf_info, msg_sz): + """ + Tests the XDP_DROP action by exchanging UDP packets. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing information about the BPF program. + msg_sz: Size of the test message to send. + """ + + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + bpf_map_set("map_xdp_setup", TestConfig.MODE.value, XDPAction.DROP.value) + bpf_map_set("map_xdp_setup", TestConfig.PORT.value, port) + + ksft_eq(_test_udp(cfg, port, msg_sz), False, "UDP packet exchange should fail") + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + + ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should be zero") + ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.DROP.value], "RX and DROP stats mismatch") + + +def test_xdp_native_drop_sb(cfg): + """ + Tests the XDP_DROP action for a signle-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + + _test_drop(cfg, bpf_info, 256) + + +def test_xdp_native_drop_mb(cfg): + """ + Tests the XDP_DROP action for a multi-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + + _test_drop(cfg, bpf_info, 8000) + + +def _test_xdp_native_tx(cfg, bpf_info, payload_lens): + """ + Tests the XDP_TX action. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing the BPF program metadata. + payload_lens: Array of packet lengths to send. + """ + cfg.require_cmd("socat", remote=True) + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + bpf_map_set("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value) + bpf_map_set("map_xdp_setup", TestConfig.PORT.value, port) + + expected_pkts = 0 + for payload_len in payload_lens: + test_string = "".join( + random.choice(string.ascii_lowercase) for _ in range(payload_len) + ) + + rx_udp = f"socat -{cfg.addr_ipver} -T 2 " + \ + f"-u UDP-RECV:{port},reuseport STDOUT" + + # Writing zero bytes to stdin gets ignored by socat, + # but with the shut-null flag socat generates a zero sized packet + # when the socket is closed. + tx_cmd_suffix = ",shut-null" if payload_len == 0 else ",shut-none" + tx_udp = f"echo -n {test_string} | socat -t 2 " + \ + f"-u STDIN UDP:{cfg.baddr}:{port}{tx_cmd_suffix}" + + with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc: + wait_port_listen(port, proto="udp", host=cfg.remote) + cmd(tx_udp, host=cfg.remote, shell=True) + + ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed") + + expected_pkts += 1 + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + ksft_eq(stats[XDPStats.RX.value], expected_pkts, "RX stats mismatch") + ksft_eq(stats[XDPStats.TX.value], expected_pkts, "TX stats mismatch") + + +def test_xdp_native_tx_sb(cfg): + """ + Tests the XDP_TX action for a single-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + + # Ensure there's enough room for an ETH / IP / UDP header + pkt_hdr_len = 42 if cfg.addr_ipver == "4" else 62 + + _test_xdp_native_tx(cfg, bpf_info, [0, 1500 // 2, 1500 - pkt_hdr_len]) + + +def test_xdp_native_tx_mb(cfg): + """ + Tests the XDP_TX action for a multi-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", + "xdp.frags", 9000) + # The first packet ensures we exercise the fragmented code path. + # And the subsequent 0-sized packet ensures the driver + # reinitializes xdp_buff correctly. + _test_xdp_native_tx(cfg, bpf_info, [8000, 0]) + + +def _validate_res(res, offset_lst, pkt_sz_lst): + """ + Validates the result of a test. + + Args: + res: The result of the test, which should be a dictionary with a "status" key. + + Raises: + KsftFailEx: If the test fails to pass any combination of offset and packet size. + """ + if "status" not in res: + raise KsftFailEx("Missing 'status' key in result dictionary") + + # Validate that not a single case was successful + if res["status"] == "fail": + if res["offset"] == offset_lst[0] and res["pkt_sz"] == pkt_sz_lst[0]: + raise KsftFailEx(f"{res['reason']}") + + # Get the previous offset and packet size to report the successful run + tmp_idx = offset_lst.index(res["offset"]) + prev_offset = offset_lst[tmp_idx - 1] + if tmp_idx == 0: + tmp_idx = pkt_sz_lst.index(res["pkt_sz"]) + prev_pkt_sz = pkt_sz_lst[tmp_idx - 1] + else: + prev_pkt_sz = res["pkt_sz"] + + # Use these values for error reporting + ksft_pr( + f"Failed run: pkt_sz {res['pkt_sz']}, offset {res['offset']}. " + f"Last successful run: pkt_sz {prev_pkt_sz}, offset {prev_offset}. " + f"Reason: {res['reason']}" + ) + + +def _check_for_failures(recvd_str, stats): + """ + Checks for common failures while adjusting headroom or tailroom. + + Args: + recvd_str: The string received from the remote host after sending a test string. + stats: A dictionary containing formatted packet statistics for various XDP actions. + + Returns: + str: A string describing the failure reason if a failure is detected, otherwise None. + """ + + # Any adjustment failure result in an abort hence, we track this counter + if stats[XDPStats.ABORT.value] != 0: + return "Adjustment failed" + + # Since we are using aggregate stats for a single test across all offsets and packet sizes + # we can't use RX stats only to track data exchange failure without taking a previous + # snapshot. An easier way is to simply check for non-zero length of received string. + if len(recvd_str) == 0: + return "Data exchange failed" + + # Check for RX and PASS stats mismatch. Ideally, they should be equal for a successful run + if stats[XDPStats.RX.value] != stats[XDPStats.PASS.value]: + return "RX stats mismatch" + + return None + + +def _test_xdp_native_tail_adjst(cfg, pkt_sz_lst, offset_lst): + """ + Tests the XDP tail adjustment functionality. + + This function loads the appropriate XDP program based on the provided + program name and configures the XDP map for tail adjustment. It then + validates the tail adjustment by sending and receiving UDP packets + with specified packet sizes and offsets. + + Args: + cfg: Configuration object containing network settings. + prog: Name of the XDP program to load. + pkt_sz_lst: List of packet sizes to test. + offset_lst: List of offsets to validate support for tail adjustment. + + Returns: + dict: A dictionary with test status and failure details if applicable. + """ + port = rand_port() + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + + prog_info = _load_xdp_prog(cfg, bpf_info) + + # Configure the XDP map for tail adjustment + bpf_map_set("map_xdp_setup", TestConfig.MODE.value, XDPAction.TAIL_ADJST.value) + bpf_map_set("map_xdp_setup", TestConfig.PORT.value, port) + + for offset in offset_lst: + tag = format(random.randint(65, 90), "02x") + + bpf_map_set("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset) + if offset > 0: + bpf_map_set("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16)) + + for pkt_sz in pkt_sz_lst: + test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz)) + recvd_str = _exchg_udp(cfg, port, test_str) + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + + failure = _check_for_failures(recvd_str, stats) + if failure is not None: + return { + "status": "fail", + "reason": failure, + "offset": offset, + "pkt_sz": pkt_sz, + } + + # Validate data content based on offset direction + expected_data = None + if offset > 0: + expected_data = test_str + (offset * chr(int(tag, 16))) + else: + expected_data = test_str[0:pkt_sz + offset] + + if recvd_str != expected_data: + return { + "status": "fail", + "reason": "Data mismatch", + "offset": offset, + "pkt_sz": pkt_sz, + } + + return {"status": "pass"} + + +def test_xdp_native_adjst_tail_grow_data(cfg): + """ + Tests the XDP tail adjustment by growing packet data. + + Args: + cfg: Configuration object containing network settings. + """ + pkt_sz_lst = [512, 1024, 2048] + offset_lst = [1, 16, 32, 64, 128, 256] + res = _test_xdp_native_tail_adjst( + cfg, + pkt_sz_lst, + offset_lst, + ) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +def test_xdp_native_adjst_tail_shrnk_data(cfg): + """ + Tests the XDP tail adjustment by shrinking packet data. + + Args: + cfg: Configuration object containing network settings. + """ + pkt_sz_lst = [512, 1024, 2048] + offset_lst = [-16, -32, -64, -128, -256] + res = _test_xdp_native_tail_adjst( + cfg, + pkt_sz_lst, + offset_lst, + ) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +def get_hds_thresh(cfg): + """ + Retrieves the header data split (HDS) threshold for a network interface. + + Args: + cfg: Configuration object containing network settings. + + Returns: + The HDS threshold value. If the threshold is not supported or an error occurs, + a default value of 1500 is returned. + """ + ethnl = cfg.ethnl + hds_thresh = 1500 + + try: + rings = ethnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + if 'hds-thresh' not in rings: + ksft_pr(f'hds-thresh not supported. Using default: {hds_thresh}') + return hds_thresh + hds_thresh = rings['hds-thresh'] + except NlError as e: + ksft_pr(f"Failed to get rings: {e}. Using default: {hds_thresh}") + + return hds_thresh + + +def _test_xdp_native_head_adjst(cfg, prog, pkt_sz_lst, offset_lst): + """ + Tests the XDP head adjustment action for a multi-buffer case. + + Args: + cfg: Configuration object containing network settings. + ethnl: Network namespace or link object (not used in this function). + + This function sets up the packet size and offset lists, then performs + the head adjustment test by sending and receiving UDP packets. + """ + cfg.require_cmd("socat", remote=True) + + prog_info = _load_xdp_prog(cfg, BPFProgInfo(prog, "xdp_native.bpf.o", "xdp.frags", 9000)) + port = rand_port() + + bpf_map_set("map_xdp_setup", TestConfig.MODE.value, XDPAction.HEAD_ADJST.value) + bpf_map_set("map_xdp_setup", TestConfig.PORT.value, port) + + hds_thresh = get_hds_thresh(cfg) + for offset in offset_lst: + for pkt_sz in pkt_sz_lst: + # The "head" buffer must contain at least the Ethernet header + # after we eat into it. We send large-enough packets, but if HDS + # is enabled head will only contain headers. Don't try to eat + # more than 28 bytes (UDPv4 + eth hdr left: (14 + 20 + 8) - 14) + l2_cut_off = 28 if cfg.addr_ipver == 4 else 48 + if pkt_sz > hds_thresh and offset > l2_cut_off: + ksft_pr( + f"Failed run: pkt_sz ({pkt_sz}) > HDS threshold ({hds_thresh}) and " + f"offset {offset} > {l2_cut_off}" + ) + return {"status": "pass"} + + test_str = ''.join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz)) + tag = format(random.randint(65, 90), '02x') + + bpf_map_set("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset) + bpf_map_set("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16)) + + recvd_str = _exchg_udp(cfg, port, test_str) + + # Check for failures around adjustment and data exchange + failure = _check_for_failures(recvd_str, _get_stats(prog_info['maps']['map_xdp_stats'])) + if failure is not None: + return { + "status": "fail", + "reason": failure, + "offset": offset, + "pkt_sz": pkt_sz + } + + # Validate data content based on offset direction + expected_data = None + if offset < 0: + expected_data = chr(int(tag, 16)) * (0 - offset) + test_str + else: + expected_data = test_str[offset:] + + if recvd_str != expected_data: + return { + "status": "fail", + "reason": "Data mismatch", + "offset": offset, + "pkt_sz": pkt_sz + } + + return {"status": "pass"} + + +def test_xdp_native_adjst_head_grow_data(cfg): + """ + Tests the XDP headroom growth support. + + Args: + cfg: Configuration object containing network settings. + + This function sets up the packet size and offset lists, then calls the + _test_xdp_native_head_adjst_mb function to perform the actual test. The + test is passed if the headroom is successfully extended for given packet + sizes and offsets. + """ + pkt_sz_lst = [512, 1024, 2048] + + # Negative values result in headroom shrinking, resulting in growing of payload + offset_lst = [-16, -32, -64, -128, -256] + res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +def test_xdp_native_adjst_head_shrnk_data(cfg): + """ + Tests the XDP headroom shrinking support. + + Args: + cfg: Configuration object containing network settings. + + This function sets up the packet size and offset lists, then calls the + _test_xdp_native_head_adjst_mb function to perform the actual test. The + test is passed if the headroom is successfully shrunk for given packet + sizes and offsets. + """ + pkt_sz_lst = [512, 1024, 2048] + + # Positive values result in headroom growing, resulting in shrinking of payload + offset_lst = [16, 32, 64, 128, 256] + res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +@ksft_variants([ + KsftNamedVariant("pass", XDPAction.PASS), + KsftNamedVariant("drop", XDPAction.DROP), + KsftNamedVariant("tx", XDPAction.TX), +]) +def test_xdp_native_qstats(cfg, act): + """ + Send 1000 messages. Expect XDP action specified in @act. + Make sure the packets were counted to interface level qstats + (Rx, and Tx if act is TX). + """ + + cfg.require_cmd("socat") + + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + bpf_map_set("map_xdp_setup", TestConfig.MODE.value, act.value) + bpf_map_set("map_xdp_setup", TestConfig.PORT.value, port) + + # Discard the input, but we need a listener to avoid ICMP errors + rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport " + \ + "/dev/null" + # Listener runs on "remote" in case of XDP_TX + rx_host = cfg.remote if act == XDPAction.TX else None + # We want to spew 1000 packets quickly, bash seems to do a good enough job + # Each reopening of the socket gives us a differenot local port (for RSS) + tx_udp = "for _ in `seq 20`; do " \ + f"exec 5<>/dev/udp/{cfg.addr}/{port}; " \ + "for i in `seq 50`; do echo a >&5; done; " \ + "exec 5>&-; done" + + cfg.wait_hw_stats_settle() + # Qstats have more clearly defined semantics than rtnetlink. + # XDP is the "first layer of the stack" so XDP packets should be counted + # as received and sent as if the decision was made in the routing layer. + before = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + with bkg(rx_udp, host=rx_host, exit_wait=True): + wait_port_listen(port, proto="udp", host=rx_host) + cmd(tx_udp, host=cfg.remote, shell=True) + + cfg.wait_hw_stats_settle() + after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + expected_pkts = 1000 + ksft_ge(after['rx-packets'] - before['rx-packets'], expected_pkts) + if act == XDPAction.TX: + ksft_ge(after['tx-packets'] - before['tx-packets'], expected_pkts) + + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + ksft_eq(stats[XDPStats.RX.value], expected_pkts, "XDP RX stats mismatch") + if act == XDPAction.TX: + ksft_eq(stats[XDPStats.TX.value], expected_pkts, "XDP TX stats mismatch") + + # Flip the ring count back and forth to make sure the stats from XDP rings + # don't get lost. + chans = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + if chans.get('combined-count', 0) > 1: + cfg.ethnl.channels_set({'header': {'dev-index': cfg.ifindex}, + 'combined-count': 1}) + cfg.ethnl.channels_set({'header': {'dev-index': cfg.ifindex}, + 'combined-count': chans['combined-count']}) + before = after + after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + ksft_ge(after['rx-packets'], before['rx-packets']) + if act == XDPAction.TX: + ksft_ge(after['tx-packets'], before['tx-packets']) + + +def test_xdp_native_update_mb_to_sb(cfg): + """ + Test multi-buf to single-buf replacement with jumbo MTU. + """ + obj = cfg.net_lib_dir / "xdp_dummy.bpf.o" + mtu = 9000 + + ip(f"link set dev {cfg.ifname} mtu {mtu}") + defer(ip, f"link set dev {cfg.ifname} mtu {cfg.dev['mtu']} xdpdrv off") + + attach = cmd(f"ip link set dev {cfg.ifname} xdpdrv obj {obj} sec xdp", fail=False) + if attach.ret == 0: + raise KsftSkipEx(f"device supports single-buffer XDP with mtu {mtu}") + + attach = cmd(f"ip link set dev {cfg.ifname} xdpdrv obj {obj} sec xdp.frags", fail=False) + if attach.ret != 0: + ksft_pr(attach) + raise KsftSkipEx("device does not support multi-buffer XDP") + + # Verify updating mb -> mb program works. + cmd(f"ip -force link set dev {cfg.ifname} xdpdrv obj {obj} sec xdp.frags") + + # Verify updating mb -> sb program does not work. + update = cmd(f"ip -force link set dev {cfg.ifname} xdpdrv obj {obj} sec xdp", fail=False) + if update.ret == 0: + raise KsftFailEx("device unexpectedly updates non-multi-buffer XDP") + + +def main(): + """ + Main function to execute the XDP tests. + + This function runs a series of tests to validate the XDP support for + both the single and multi-buffer. It uses the NetDrvEpEnv context + manager to manage the network driver environment and the ksft_run + function to execute the tests. + """ + with NetDrvEpEnv(__file__) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + ksft_run( + [ + test_xdp_native_pass_sb, + test_xdp_native_pass_mb, + test_xdp_native_drop_sb, + test_xdp_native_drop_mb, + test_xdp_native_tx_sb, + test_xdp_native_tx_mb, + test_xdp_native_adjst_tail_grow_data, + test_xdp_native_adjst_tail_shrnk_data, + test_xdp_native_adjst_head_grow_data, + test_xdp_native_adjst_head_shrnk_data, + test_xdp_native_qstats, + test_xdp_native_update_mb_to_sb, + ], + args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/ntsync/ntsync.c b/tools/testing/selftests/drivers/ntsync/ntsync.c index 3aad311574c4..e6a37214aa46 100644 --- a/tools/testing/selftests/drivers/ntsync/ntsync.c +++ b/tools/testing/selftests/drivers/ntsync/ntsync.c @@ -12,7 +12,7 @@ #include <time.h> #include <pthread.h> #include <linux/ntsync.h> -#include "../../kselftest_harness.h" +#include "kselftest_harness.h" static int read_sem_state(int sem, __u32 *count, __u32 *max) { diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c index 7ee7492138c6..14df9aa07308 100644 --- a/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c +++ b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c @@ -14,7 +14,7 @@ #include <asm/uvdevice.h> -#include "../../../kselftest_harness.h" +#include "kselftest_harness.h" #define UV_PATH "/dev/uv" #define BUFFER_SIZE 0x200 |
