summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
l---------tools/arch/arm64/vdso1
l---------tools/arch/loongarch/vdso1
l---------tools/arch/powerpc/vdso1
l---------tools/arch/s390/vdso1
-rw-r--r--tools/arch/x86/kcpuid/cpuid.csv1430
-rw-r--r--tools/arch/x86/kcpuid/kcpuid.c109
l---------tools/arch/x86/vdso1
-rw-r--r--tools/crypto/ccp/dbc.c1
-rw-r--r--tools/gpio/Makefile2
-rw-r--r--tools/gpio/gpio-hammer.c4
-rw-r--r--tools/hv/Makefile2
-rwxr-xr-x[-rw-r--r--]tools/hv/lsvmbus2
-rw-r--r--tools/include/asm/alternative.h10
-rw-r--r--tools/include/generated/asm-offsets.h0
-rw-r--r--tools/include/generated/asm/cpucap-defs.h0
-rw-r--r--tools/include/generated/asm/sysreg-defs.h0
-rw-r--r--tools/include/linux/compiler.h4
-rw-r--r--tools/include/linux/linkage.h4
-rw-r--r--tools/include/nolibc/Makefile1
-rw-r--r--tools/include/nolibc/arch-aarch64.h4
-rw-r--r--tools/include/nolibc/arch-arm.h8
-rw-r--r--tools/include/nolibc/arch-i386.h4
-rw-r--r--tools/include/nolibc/arch-loongarch.h4
-rw-r--r--tools/include/nolibc/arch-mips.h8
-rw-r--r--tools/include/nolibc/arch-powerpc.h6
-rw-r--r--tools/include/nolibc/arch-riscv.h4
-rw-r--r--tools/include/nolibc/arch-s390.h4
-rw-r--r--tools/include/nolibc/arch-x86_64.h8
-rw-r--r--tools/include/nolibc/compiler.h24
-rw-r--r--tools/include/nolibc/crt.h25
-rw-r--r--tools/include/nolibc/nolibc.h3
-rw-r--r--tools/include/nolibc/stackprotector.h4
-rw-r--r--tools/include/nolibc/stdbool.h16
-rw-r--r--tools/include/nolibc/string.h1
-rw-r--r--tools/include/uapi/linux/bpf.h3
-rw-r--r--tools/include/uapi/linux/netdev.h13
-rw-r--r--tools/memory-model/Documentation/README24
-rw-r--r--tools/memory-model/Documentation/herd-representation.txt110
-rw-r--r--tools/memory-model/Documentation/simple.txt2
-rw-r--r--tools/net/ynl/lib/.gitignore1
-rw-r--r--tools/net/ynl/lib/ynl.c4
-rw-r--r--tools/net/ynl/lib/ynl.py7
-rw-r--r--tools/net/ynl/samples/netdev.c6
-rwxr-xr-xtools/net/ynl/ynl-gen-c.py6
-rw-r--r--tools/perf/builtin-daemon.c8
-rw-r--r--tools/perf/tests/pmu.c4
-rw-r--r--tools/perf/util/bpf_lock_contention.c3
-rw-r--r--tools/perf/util/python.c1
-rw-r--r--tools/power/cpupower/bindings/python/.gitignore8
-rw-r--r--tools/power/cpupower/bindings/python/Makefile33
-rw-r--r--tools/power/cpupower/bindings/python/README59
-rw-r--r--tools/power/cpupower/bindings/python/raw_pylibcpupower.i247
-rwxr-xr-xtools/power/cpupower/bindings/python/test_raw_pylibcpupower.py42
-rw-r--r--tools/power/cpupower/lib/cpuidle.c8
-rw-r--r--tools/power/cpupower/lib/cpuidle.h2
-rw-r--r--tools/power/cpupower/lib/powercap.c8
-rw-r--r--tools/power/cpupower/utils/cpuidle-info.c4
-rw-r--r--tools/power/pm-graph/.gitignore3
-rw-r--r--tools/power/pm-graph/Makefile111
-rwxr-xr-xtools/rcu/rcu-updaters.sh2
-rwxr-xr-xtools/sound/dapm-graph44
-rw-r--r--tools/spi/spidev_fdx.c2
-rw-r--r--tools/testing/cxl/Kbuild1
-rw-r--r--tools/testing/cxl/test/mock.c12
-rw-r--r--tools/testing/kunit/kunit_kernel.py3
-rw-r--r--tools/testing/selftests/Makefile6
-rw-r--r--tools/testing/selftests/acct/.gitignore3
-rw-r--r--tools/testing/selftests/acct/Makefile5
-rw-r--r--tools/testing/selftests/acct/acct_syscall.c78
-rw-r--r--tools/testing/selftests/alsa/Makefile4
-rw-r--r--tools/testing/selftests/alsa/global-timer.c87
-rw-r--r--tools/testing/selftests/alsa/utimer-test.c164
-rw-r--r--tools/testing/selftests/arm64/abi/hwcap.c14
-rw-r--r--tools/testing/selftests/arm64/abi/ptrace.c4
-rw-r--r--tools/testing/selftests/arm64/signal/.gitignore1
-rw-r--r--tools/testing/selftests/arm64/signal/Makefile2
-rw-r--r--tools/testing/selftests/arm64/signal/sve_helpers.c56
-rw-r--r--tools/testing/selftests/arm64/signal/sve_helpers.h21
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c46
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c30
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c86
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/ssve_regs.c36
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c36
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sve_regs.c32
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/testcases.c27
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/testcases.h28
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/za_no_regs.c32
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/za_regs.c36
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h6
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c34
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/setget_sockopt.c47
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h149
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c117
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c14
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c25
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c23
-rw-r--r--tools/testing/selftests/bpf/progs/setget_sockopt.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c11
-rw-r--r--tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c24
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c43
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h1
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_prs.sh56
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_v1_base.sh77
-rw-r--r--tools/testing/selftests/core/Makefile2
-rw-r--r--tools/testing/selftests/core/close_range_test.c39
-rw-r--r--tools/testing/selftests/core/unshare_test.c94
-rwxr-xr-xtools/testing/selftests/cpufreq/cpufreq.sh15
-rwxr-xr-xtools/testing/selftests/cpufreq/main.sh13
-rw-r--r--tools/testing/selftests/drivers/net/Makefile5
-rw-r--r--tools/testing/selftests/drivers/net/config4
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/pp_alloc_fail.py3
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_ctx.py80
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py5
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh3
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_basic.sh234
-rwxr-xr-xtools/testing/selftests/drivers/net/stats.py33
-rw-r--r--tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c6
-rw-r--r--tools/testing/selftests/exec/execveat.c2
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test_ns.c7
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc46
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc26
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc9
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc2
-rw-r--r--tools/testing/selftests/hid/hid_bpf.c6
-rw-r--r--tools/testing/selftests/iommu/iommufd.c6
-rw-r--r--tools/testing/selftests/kselftest.h10
-rw-r--r--tools/testing/selftests/kselftest/runner.sh7
-rw-r--r--tools/testing/selftests/kselftest_harness.h18
-rw-r--r--tools/testing/selftests/kvm/Makefile2
-rw-r--r--tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c1062
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list.c14
-rw-r--r--tools/testing/selftests/kvm/aarch64/no-vgic-v3.c175
-rw-r--r--tools/testing/selftests/kvm/aarch64/set_id_regs.c1
-rw-r--r--tools/testing/selftests/kvm/aarch64/vgic_irq.c11
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/arch_timer.h18
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/processor.h3
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c6
-rw-r--r--tools/testing/selftests/lib/Makefile3
-rw-r--r--tools/testing/selftests/lib/config1
-rwxr-xr-xtools/testing/selftests/lib/strscpy.sh3
-rwxr-xr-xtools/testing/selftests/livepatch/test-livepatch.sh7
-rw-r--r--tools/testing/selftests/lsm/lsm_list_modules_test.c3
-rw-r--r--tools/testing/selftests/mm/Makefile3
-rw-r--r--tools/testing/selftests/mm/mseal_test.c37
-rw-r--r--tools/testing/selftests/mm/pkey-arm64.h139
-rw-r--r--tools/testing/selftests/mm/pkey-helpers.h21
-rw-r--r--tools/testing/selftests/mm/pkey-powerpc.h3
-rw-r--r--tools/testing/selftests/mm/pkey-x86.h4
-rw-r--r--tools/testing/selftests/mm/pkey_sighandler_tests.c481
-rw-r--r--tools/testing/selftests/mm/protection_keys.c119
-rw-r--r--tools/testing/selftests/mm/seal_elf.c13
-rw-r--r--tools/testing/selftests/net/.gitignore2
-rw-r--r--tools/testing/selftests/net/Makefile15
-rw-r--r--tools/testing/selftests/net/af_unix/msg_oob.c23
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh9
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh55
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh304
-rw-r--r--tools/testing/selftests/net/forwarding/README2
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh54
-rwxr-xr-xtools/testing/selftests/net/forwarding/custom_multipath_hash.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh8
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh64
-rwxr-xr-xtools/testing/selftests/net/forwarding/local_termination.sh435
-rwxr-xr-xtools/testing/selftests/net/forwarding/no_forwarding.sh3
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh.sh40
-rw-r--r--tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh13
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh_res.sh58
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multipath.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh46
-rw-r--r--tools/testing/selftests/net/lib.sh15
-rw-r--r--tools/testing/selftests/net/lib/csum.c16
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py60
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile2
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh2
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh17
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh565
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh21
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh1
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh2
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c10
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh1
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh1
-rw-r--r--tools/testing/selftests/net/ncdevmem.c570
-rwxr-xr-xtools/testing/selftests/net/netdevice.sh60
-rw-r--r--tools/testing/selftests/net/netfilter/config2
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh129
-rw-r--r--tools/testing/selftests/net/packetdrill/Makefile10
-rw-r--r--tools/testing/selftests/net/packetdrill/config11
-rwxr-xr-xtools/testing/selftests/net/packetdrill/defaults.sh63
-rwxr-xr-xtools/testing/selftests/net/packetdrill/ksft_runner.sh41
-rwxr-xr-xtools/testing/selftests/net/packetdrill/set_sysctls.py38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt51
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt51
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt28
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt56
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt33
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt34
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt42
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt35
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt39
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt50
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt36
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt63
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt55
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt41
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt30
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt44
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt61
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt63
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt66
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt56
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt44
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt118
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt57
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh10
-rw-r--r--tools/testing/selftests/net/psock_fanout.c6
-rw-r--r--tools/testing/selftests/net/rds/Makefile12
-rw-r--r--tools/testing/selftests/net/rds/README.txt41
-rwxr-xr-xtools/testing/selftests/net/rds/config.sh53
-rwxr-xr-xtools/testing/selftests/net/rds/run.sh224
-rw-r--r--tools/testing/selftests/net/rds/test.py262
-rw-r--r--tools/testing/selftests/net/rxtimestamp.c18
-rw-r--r--tools/testing/selftests/net/sk_so_peek_off.c202
-rw-r--r--tools/testing/selftests/net/tcp_ao/Makefile3
-rw-r--r--tools/testing/selftests/net/tcp_ao/bench-lookups.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/config1
-rw-r--r--tools/testing/selftests/net/tcp_ao/connect-deny.c25
-rw-r--r--tools/testing/selftests/net/tcp_ao/connect.c6
-rw-r--r--tools/testing/selftests/net/tcp_ao/icmps-discard.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/key-management.c18
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/aolib.h180
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c559
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/ftrace.c543
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/kconfig.c31
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/setup.c17
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/sock.c1
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/utils.c26
-rw-r--r--tools/testing/selftests/net/tcp_ao/restore.c30
-rw-r--r--tools/testing/selftests/net/tcp_ao/rst.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/self-connect.c19
-rw-r--r--tools/testing/selftests/net/tcp_ao/seq-ext.c28
-rw-r--r--tools/testing/selftests/net/tcp_ao/setsockopt-closed.c6
-rw-r--r--tools/testing/selftests/net/tcp_ao/unsigned-md5.c35
-rw-r--r--tools/testing/selftests/net/txtimestamp.c6
-rwxr-xr-xtools/testing/selftests/net/udpgro.sh53
-rwxr-xr-xtools/testing/selftests/net/unicast_extensions.sh9
-rwxr-xr-xtools/testing/selftests/net/vrf_route_leaking.sh3
-rwxr-xr-xtools/testing/selftests/net/xfrm_policy_add_speed.sh83
-rw-r--r--tools/testing/selftests/nolibc/Makefile41
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c9
-rwxr-xr-xtools/testing/selftests/nolibc/run-tests.sh16
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/exec_target.c16
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh38
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFcommon.i6862
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFcommon.ppc64le1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFcommon.x86_642
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/TINY20
-rw-r--r--tools/testing/selftests/resctrl/cat_test.c7
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_bottomup.c2
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_default.c2
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_test.h67
-rw-r--r--tools/testing/selftests/rtc/rtctest.c7
-rw-r--r--tools/testing/selftests/rust/config3
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.py1
-rw-r--r--tools/testing/selftests/timers/change_skew.c3
-rw-r--r--tools/testing/selftests/timers/posix_timers.c550
-rw-r--r--tools/testing/selftests/timers/skew_consistency.c2
-rw-r--r--tools/testing/selftests/timers/threadtest.c4
-rwxr-xr-xtools/testing/selftests/tpm2/test_async.sh2
-rwxr-xr-xtools/testing/selftests/tpm2/test_smoke.sh2
-rwxr-xr-xtools/testing/selftests/tpm2/test_space.sh2
-rw-r--r--tools/testing/selftests/user/Makefile9
-rw-r--r--tools/testing/selftests/user/config1
-rwxr-xr-xtools/testing/selftests/user/test_user_copy.sh18
-rw-r--r--tools/testing/selftests/vDSO/Makefile21
-rw-r--r--tools/testing/selftests/vDSO/parse_vdso.c17
-rw-r--r--tools/testing/selftests/vDSO/vdso_call.h70
-rw-r--r--tools/testing/selftests/vDSO/vdso_config.h18
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_abi.c14
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_chacha.c103
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_correctness.c21
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getcpu.c3
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getrandom.c128
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_gettimeofday.c3
-rw-r--r--tools/testing/selftests/x86/Makefile2
-rw-r--r--tools/testing/selftests/x86/srso.c70
-rw-r--r--tools/testing/vsock/util.c6
-rw-r--r--tools/testing/vsock/util.h3
-rw-r--r--tools/testing/vsock/vsock_test.c85
300 files changed, 12722 insertions, 2046 deletions
diff --git a/tools/arch/arm64/vdso b/tools/arch/arm64/vdso
new file mode 120000
index 000000000000..233c7a26f6e5
--- /dev/null
+++ b/tools/arch/arm64/vdso
@@ -0,0 +1 @@
+../../../arch/arm64/kernel/vdso \ No newline at end of file
diff --git a/tools/arch/loongarch/vdso b/tools/arch/loongarch/vdso
new file mode 120000
index 000000000000..ebda43a82db7
--- /dev/null
+++ b/tools/arch/loongarch/vdso
@@ -0,0 +1 @@
+../../../arch/loongarch/vdso \ No newline at end of file
diff --git a/tools/arch/powerpc/vdso b/tools/arch/powerpc/vdso
new file mode 120000
index 000000000000..4e676d1d1cb4
--- /dev/null
+++ b/tools/arch/powerpc/vdso
@@ -0,0 +1 @@
+../../../arch/powerpc/kernel/vdso \ No newline at end of file
diff --git a/tools/arch/s390/vdso b/tools/arch/s390/vdso
new file mode 120000
index 000000000000..6cf4c1cebdcd
--- /dev/null
+++ b/tools/arch/s390/vdso
@@ -0,0 +1 @@
+../../../arch/s390/kernel/vdso64 \ No newline at end of file
diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv
index e0c25b75327e..d751eb8585d0 100644
--- a/tools/arch/x86/kcpuid/cpuid.csv
+++ b/tools/arch/x86/kcpuid/cpuid.csv
@@ -1,451 +1,1053 @@
-# The basic row format is:
-# LEAF, SUBLEAF, register_name, bits, short_name, long_description
-
-# Leaf 00H
- 0, 0, EAX, 31:0, max_basic_leafs, Max input value for supported subleafs
-
-# Leaf 01H
- 1, 0, EAX, 3:0, stepping, Stepping ID
- 1, 0, EAX, 7:4, model, Model
- 1, 0, EAX, 11:8, family, Family ID
- 1, 0, EAX, 13:12, processor, Processor Type
- 1, 0, EAX, 19:16, model_ext, Extended Model ID
- 1, 0, EAX, 27:20, family_ext, Extended Family ID
-
- 1, 0, EBX, 7:0, brand, Brand Index
- 1, 0, EBX, 15:8, clflush_size, CLFLUSH line size (value * 8) in bytes
- 1, 0, EBX, 23:16, max_cpu_id, Maxim number of addressable logic cpu in this package
- 1, 0, EBX, 31:24, apic_id, Initial APIC ID
-
- 1, 0, ECX, 0, sse3, Streaming SIMD Extensions 3(SSE3)
- 1, 0, ECX, 1, pclmulqdq, PCLMULQDQ instruction supported
- 1, 0, ECX, 2, dtes64, DS area uses 64-bit layout
- 1, 0, ECX, 3, mwait, MONITOR/MWAIT supported
- 1, 0, ECX, 4, ds_cpl, CPL Qualified Debug Store which allows for branch message storage qualified by CPL
- 1, 0, ECX, 5, vmx, Virtual Machine Extensions supported
- 1, 0, ECX, 6, smx, Safer Mode Extension supported
- 1, 0, ECX, 7, eist, Enhanced Intel SpeedStep Technology
- 1, 0, ECX, 8, tm2, Thermal Monitor 2
- 1, 0, ECX, 9, ssse3, Supplemental Streaming SIMD Extensions 3 (SSSE3)
- 1, 0, ECX, 10, l1_ctx_id, L1 data cache could be set to either adaptive mode or shared mode (check IA32_MISC_ENABLE bit 24 definition)
- 1, 0, ECX, 11, sdbg, IA32_DEBUG_INTERFACE MSR for silicon debug supported
- 1, 0, ECX, 12, fma, FMA extensions using YMM state supported
- 1, 0, ECX, 13, cmpxchg16b, 'CMPXCHG16B - Compare and Exchange Bytes' supported
- 1, 0, ECX, 14, xtpr_update, xTPR Update Control supported
- 1, 0, ECX, 15, pdcm, Perfmon and Debug Capability present
- 1, 0, ECX, 17, pcid, Process-Context Identifiers feature present
- 1, 0, ECX, 18, dca, Prefetching data from a memory mapped device supported
- 1, 0, ECX, 19, sse4_1, SSE4.1 feature present
- 1, 0, ECX, 20, sse4_2, SSE4.2 feature present
- 1, 0, ECX, 21, x2apic, x2APIC supported
- 1, 0, ECX, 22, movbe, MOVBE instruction supported
- 1, 0, ECX, 23, popcnt, POPCNT instruction supported
- 1, 0, ECX, 24, tsc_deadline_timer, LAPIC supports one-shot operation using a TSC deadline value
- 1, 0, ECX, 25, aesni, AESNI instruction supported
- 1, 0, ECX, 26, xsave, XSAVE/XRSTOR processor extended states (XSETBV/XGETBV/XCR0)
- 1, 0, ECX, 27, osxsave, OS has set CR4.OSXSAVE bit to enable XSETBV/XGETBV/XCR0
- 1, 0, ECX, 28, avx, AVX instruction supported
- 1, 0, ECX, 29, f16c, 16-bit floating-point conversion instruction supported
- 1, 0, ECX, 30, rdrand, RDRAND instruction supported
-
- 1, 0, EDX, 0, fpu, x87 FPU on chip
- 1, 0, EDX, 1, vme, Virtual-8086 Mode Enhancement
- 1, 0, EDX, 2, de, Debugging Extensions
- 1, 0, EDX, 3, pse, Page Size Extensions
- 1, 0, EDX, 4, tsc, Time Stamp Counter
- 1, 0, EDX, 5, msr, RDMSR and WRMSR Support
- 1, 0, EDX, 6, pae, Physical Address Extensions
- 1, 0, EDX, 7, mce, Machine Check Exception
- 1, 0, EDX, 8, cx8, CMPXCHG8B instr
- 1, 0, EDX, 9, apic, APIC on Chip
- 1, 0, EDX, 11, sep, SYSENTER and SYSEXIT instrs
- 1, 0, EDX, 12, mtrr, Memory Type Range Registers
- 1, 0, EDX, 13, pge, Page Global Bit
- 1, 0, EDX, 14, mca, Machine Check Architecture
- 1, 0, EDX, 15, cmov, Conditional Move Instrs
- 1, 0, EDX, 16, pat, Page Attribute Table
- 1, 0, EDX, 17, pse36, 36-Bit Page Size Extension
- 1, 0, EDX, 18, psn, Processor Serial Number
- 1, 0, EDX, 19, clflush, CLFLUSH instr
-# 1, 0, EDX, 20,
- 1, 0, EDX, 21, ds, Debug Store
- 1, 0, EDX, 22, acpi, Thermal Monitor and Software Controlled Clock Facilities
- 1, 0, EDX, 23, mmx, Intel MMX Technology
- 1, 0, EDX, 24, fxsr, XSAVE and FXRSTOR Instrs
- 1, 0, EDX, 25, sse, SSE
- 1, 0, EDX, 26, sse2, SSE2
- 1, 0, EDX, 27, ss, Self Snoop
- 1, 0, EDX, 28, hit, Max APIC IDs
- 1, 0, EDX, 29, tm, Thermal Monitor
-# 1, 0, EDX, 30,
- 1, 0, EDX, 31, pbe, Pending Break Enable
-
-# Leaf 02H
-# cache and TLB descriptor info
-
-# Leaf 03H
-# Precessor Serial Number, introduced on Pentium III, not valid for
-# latest models
-
-# Leaf 04H
-# thread/core and cache topology
- 4, 0, EAX, 4:0, cache_type, Cache type like instr/data or unified
- 4, 0, EAX, 7:5, cache_level, Cache Level (starts at 1)
- 4, 0, EAX, 8, cache_self_init, Cache Self Initialization
- 4, 0, EAX, 9, fully_associate, Fully Associative cache
-# 4, 0, EAX, 13:10, resvd, resvd
- 4, 0, EAX, 25:14, max_logical_id, Max number of addressable IDs for logical processors sharing the cache
- 4, 0, EAX, 31:26, max_phy_id, Max number of addressable IDs for processors in phy package
-
- 4, 0, EBX, 11:0, cache_linesize, Size of a cache line in bytes
- 4, 0, EBX, 21:12, cache_partition, Physical Line partitions
- 4, 0, EBX, 31:22, cache_ways, Ways of associativity
- 4, 0, ECX, 31:0, cache_sets, Number of Sets - 1
- 4, 0, EDX, 0, c_wbinvd, 1 means WBINVD/INVD is not ganranteed to act upon lower level caches of non-originating threads sharing this cache
- 4, 0, EDX, 1, c_incl, Whether cache is inclusive of lower cache level
- 4, 0, EDX, 2, c_comp_index, Complex Cache Indexing
-
-# Leaf 05H
-# MONITOR/MWAIT
- 5, 0, EAX, 15:0, min_mon_size, Smallest monitor line size in bytes
- 5, 0, EBX, 15:0, max_mon_size, Largest monitor line size in bytes
- 5, 0, ECX, 0, mwait_ext, Enum of Monitor-Mwait extensions supported
- 5, 0, ECX, 1, mwait_irq_break, Largest monitor line size in bytes
- 5, 0, EDX, 3:0, c0_sub_stats, Number of C0* sub C-states supported using MWAIT
- 5, 0, EDX, 7:4, c1_sub_stats, Number of C1* sub C-states supported using MWAIT
- 5, 0, EDX, 11:8, c2_sub_stats, Number of C2* sub C-states supported using MWAIT
- 5, 0, EDX, 15:12, c3_sub_stats, Number of C3* sub C-states supported using MWAIT
- 5, 0, EDX, 19:16, c4_sub_stats, Number of C4* sub C-states supported using MWAIT
- 5, 0, EDX, 23:20, c5_sub_stats, Number of C5* sub C-states supported using MWAIT
- 5, 0, EDX, 27:24, c6_sub_stats, Number of C6* sub C-states supported using MWAIT
- 5, 0, EDX, 31:28, c7_sub_stats, Number of C7* sub C-states supported using MWAIT
-
-# Leaf 06H
-# Thermal & Power Management
-
- 6, 0, EAX, 0, dig_temp, Digital temperature sensor supported
- 6, 0, EAX, 1, turbo, Intel Turbo Boost
- 6, 0, EAX, 2, arat, Always running APIC timer
-# 6, 0, EAX, 3, resv, Reserved
- 6, 0, EAX, 4, pln, Power limit notifications supported
- 6, 0, EAX, 5, ecmd, Clock modulation duty cycle extension supported
- 6, 0, EAX, 6, ptm, Package thermal management supported
- 6, 0, EAX, 7, hwp, HWP base register
- 6, 0, EAX, 8, hwp_notify, HWP notification
- 6, 0, EAX, 9, hwp_act_window, HWP activity window
- 6, 0, EAX, 10, hwp_energy, HWP energy performance preference
- 6, 0, EAX, 11, hwp_pkg_req, HWP package level request
-# 6, 0, EAX, 12, resv, Reserved
- 6, 0, EAX, 13, hdc, HDC base registers supported
- 6, 0, EAX, 14, turbo3, Turbo Boost Max 3.0
- 6, 0, EAX, 15, hwp_cap, Highest Performance change supported
- 6, 0, EAX, 16, hwp_peci, HWP PECI override is supported
- 6, 0, EAX, 17, hwp_flex, Flexible HWP is supported
- 6, 0, EAX, 18, hwp_fast, Fast access mode for the IA32_HWP_REQUEST MSR is supported
-# 6, 0, EAX, 19, resv, Reserved
- 6, 0, EAX, 20, hwp_ignr, Ignoring Idle Logical Processor HWP request is supported
-
- 6, 0, EBX, 3:0, therm_irq_thresh, Number of Interrupt Thresholds in Digital Thermal Sensor
- 6, 0, ECX, 0, aperfmperf, Presence of IA32_MPERF and IA32_APERF
- 6, 0, ECX, 3, energ_bias, Performance-energy bias preference supported
-
-# Leaf 07H
-# ECX == 0
-# AVX512 refers to https://en.wikipedia.org/wiki/AVX-512
-# XXX: Do we really need to enumerate each and every AVX512 sub features
-
- 7, 0, EBX, 0, fsgsbase, RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE supported
- 7, 0, EBX, 1, tsc_adjust, TSC_ADJUST MSR supported
- 7, 0, EBX, 2, sgx, Software Guard Extensions
- 7, 0, EBX, 3, bmi1, BMI1
- 7, 0, EBX, 4, hle, Hardware Lock Elision
- 7, 0, EBX, 5, avx2, AVX2
-# 7, 0, EBX, 6, fdp_excp_only, x87 FPU Data Pointer updated only on x87 exceptions
- 7, 0, EBX, 7, smep, Supervisor-Mode Execution Prevention
- 7, 0, EBX, 8, bmi2, BMI2
- 7, 0, EBX, 9, rep_movsb, Enhanced REP MOVSB/STOSB
- 7, 0, EBX, 10, invpcid, INVPCID instruction
- 7, 0, EBX, 11, rtm, Restricted Transactional Memory
- 7, 0, EBX, 12, rdt_m, Intel RDT Monitoring capability
- 7, 0, EBX, 13, depc_fpu_cs_ds, Deprecates FPU CS and FPU DS
- 7, 0, EBX, 14, mpx, Memory Protection Extensions
- 7, 0, EBX, 15, rdt_a, Intel RDT Allocation capability
- 7, 0, EBX, 16, avx512f, AVX512 Foundation instr
- 7, 0, EBX, 17, avx512dq, AVX512 Double and Quadword AVX512 instr
- 7, 0, EBX, 18, rdseed, RDSEED instr
- 7, 0, EBX, 19, adx, ADX instr
- 7, 0, EBX, 20, smap, Supervisor Mode Access Prevention
- 7, 0, EBX, 21, avx512ifma, AVX512 Integer Fused Multiply Add
-# 7, 0, EBX, 22, resvd, resvd
- 7, 0, EBX, 23, clflushopt, CLFLUSHOPT instr
- 7, 0, EBX, 24, clwb, CLWB instr
- 7, 0, EBX, 25, intel_pt, Intel Processor Trace instr
- 7, 0, EBX, 26, avx512pf, Prefetch
- 7, 0, EBX, 27, avx512er, AVX512 Exponent Reciproca instr
- 7, 0, EBX, 28, avx512cd, AVX512 Conflict Detection instr
- 7, 0, EBX, 29, sha, Intel Secure Hash Algorithm Extensions instr
- 7, 0, EBX, 30, avx512bw, AVX512 Byte & Word instr
- 7, 0, EBX, 31, avx512vl, AVX512 Vector Length Extentions (VL)
- 7, 0, ECX, 0, prefetchwt1, X
- 7, 0, ECX, 1, avx512vbmi, AVX512 Vector Byte Manipulation Instructions
- 7, 0, ECX, 2, umip, User-mode Instruction Prevention
-
- 7, 0, ECX, 3, pku, Protection Keys for User-mode pages
- 7, 0, ECX, 4, ospke, CR4 PKE set to enable protection keys
-# 7, 0, ECX, 16:5, resvd, resvd
- 7, 0, ECX, 21:17, mawau, The value of MAWAU used by the BNDLDX and BNDSTX instructions in 64-bit mode
- 7, 0, ECX, 22, rdpid, RDPID and IA32_TSC_AUX
-# 7, 0, ECX, 29:23, resvd, resvd
- 7, 0, ECX, 30, sgx_lc, SGX Launch Configuration
-# 7, 0, ECX, 31, resvd, resvd
-
-# Leaf 08H
-#
-
-
-# Leaf 09H
-# Direct Cache Access (DCA) information
- 9, 0, ECX, 31:0, dca_cap, The value of IA32_PLATFORM_DCA_CAP
+# SPDX-License-Identifier: CC0-1.0
+# Generator: x86-cpuid-db v1.0
-# Leaf 0AH
-# Architectural Performance Monitoring
#
-# Do we really need to print out the PMU related stuff?
-# Does normal user really care about it?
+# Auto-generated file.
+# Please submit all updates and bugfixes to https://x86-cpuid.org
#
- 0xA, 0, EAX, 7:0, pmu_ver, Performance Monitoring Unit version
- 0xA, 0, EAX, 15:8, pmu_gp_cnt_num, Numer of general-purose PMU counters per logical CPU
- 0xA, 0, EAX, 23:16, pmu_cnt_bits, Bit wideth of PMU counter
- 0xA, 0, EAX, 31:24, pmu_ebx_bits, Length of EBX bit vector to enumerate PMU events
-
- 0xA, 0, EBX, 0, pmu_no_core_cycle_evt, Core cycle event not available
- 0xA, 0, EBX, 1, pmu_no_instr_ret_evt, Instruction retired event not available
- 0xA, 0, EBX, 2, pmu_no_ref_cycle_evt, Reference cycles event not available
- 0xA, 0, EBX, 3, pmu_no_llc_ref_evt, Last-level cache reference event not available
- 0xA, 0, EBX, 4, pmu_no_llc_mis_evt, Last-level cache misses event not available
- 0xA, 0, EBX, 5, pmu_no_br_instr_ret_evt, Branch instruction retired event not available
- 0xA, 0, EBX, 6, pmu_no_br_mispredict_evt, Branch mispredict retired event not available
-
- 0xA, 0, ECX, 4:0, pmu_fixed_cnt_num, Performance Monitoring Unit version
- 0xA, 0, ECX, 12:5, pmu_fixed_cnt_bits, Numer of PMU counters per logical CPU
-
-# Leaf 0BH
-# Extended Topology Enumeration Leaf
-#
-
- 0xB, 0, EAX, 4:0, id_shift, Number of bits to shift right on x2APIC ID to get a unique topology ID of the next level type
- 0xB, 0, EBX, 15:0, cpu_nr, Number of logical processors at this level type
- 0xB, 0, ECX, 15:8, lvl_type, 0-Invalid 1-SMT 2-Core
- 0xB, 0, EDX, 31:0, x2apic_id, x2APIC ID the current logical processor
-
-
-# Leaf 0DH
-# Processor Extended State
- 0xD, 0, EAX, 0, x87, X87 state
- 0xD, 0, EAX, 1, sse, SSE state
- 0xD, 0, EAX, 2, avx, AVX state
- 0xD, 0, EAX, 4:3, mpx, MPX state
- 0xD, 0, EAX, 7:5, avx512, AVX-512 state
- 0xD, 0, EAX, 9, pkru, PKRU state
-
- 0xD, 0, EBX, 31:0, max_sz_xcr0, Maximum size (bytes) required by enabled features in XCR0
- 0xD, 0, ECX, 31:0, max_sz_xsave, Maximum size (bytes) of the XSAVE/XRSTOR save area
-
- 0xD, 1, EAX, 0, xsaveopt, XSAVEOPT available
- 0xD, 1, EAX, 1, xsavec, XSAVEC and compacted form supported
- 0xD, 1, EAX, 2, xgetbv, XGETBV supported
- 0xD, 1, EAX, 3, xsaves, XSAVES/XRSTORS and IA32_XSS supported
-
- 0xD, 1, EBX, 31:0, max_sz_xcr0, Maximum size (bytes) required by enabled features in XCR0
- 0xD, 1, ECX, 8, pt, PT state
- 0xD, 1, ECX, 11, cet_usr, CET user state
- 0xD, 1, ECX, 12, cet_supv, CET supervisor state
- 0xD, 1, ECX, 13, hdc, HDC state
- 0xD, 1, ECX, 16, hwp, HWP state
-
-# Leaf 0FH
-# Intel RDT Monitoring
-
- 0xF, 0, EBX, 31:0, rmid_range, Maximum range (zero-based) of RMID within this physical processor of all types
- 0xF, 0, EDX, 1, l3c_rdt_mon, L3 Cache RDT Monitoring supported
-
- 0xF, 1, ECX, 31:0, rmid_range, Maximum range (zero-based) of RMID of this types
- 0xF, 1, EDX, 0, l3c_ocp_mon, L3 Cache occupancy Monitoring supported
- 0xF, 1, EDX, 1, l3c_tbw_mon, L3 Cache Total Bandwidth Monitoring supported
- 0xF, 1, EDX, 2, l3c_lbw_mon, L3 Cache Local Bandwidth Monitoring supported
+# The basic row format is:
+# LEAF, SUBLEAVES, reg, bits, short_name , long_description
+
+# Leaf 0H
+# Maximum standard leaf number + CPU vendor string
+
+ 0, 0, eax, 31:0, max_std_leaf , Highest cpuid standard leaf supported
+ 0, 0, ebx, 31:0, cpu_vendorid_0 , CPU vendor ID string bytes 0 - 3
+ 0, 0, ecx, 31:0, cpu_vendorid_2 , CPU vendor ID string bytes 8 - 11
+ 0, 0, edx, 31:0, cpu_vendorid_1 , CPU vendor ID string bytes 4 - 7
+
+# Leaf 1H
+# CPU FMS (Family/Model/Stepping) + standard feature flags
+
+ 1, 0, eax, 3:0, stepping , Stepping ID
+ 1, 0, eax, 7:4, base_model , Base CPU model ID
+ 1, 0, eax, 11:8, base_family_id , Base CPU family ID
+ 1, 0, eax, 13:12, cpu_type , CPU type
+ 1, 0, eax, 19:16, ext_model , Extended CPU model ID
+ 1, 0, eax, 27:20, ext_family , Extended CPU family ID
+ 1, 0, ebx, 7:0, brand_id , Brand index
+ 1, 0, ebx, 15:8, clflush_size , CLFLUSH instruction cache line size
+ 1, 0, ebx, 23:16, n_logical_cpu , Logical CPU (HW threads) count
+ 1, 0, ebx, 31:24, local_apic_id , Initial local APIC physical ID
+ 1, 0, ecx, 0, pni , Streaming SIMD Extensions 3 (SSE3)
+ 1, 0, ecx, 1, pclmulqdq , PCLMULQDQ instruction support
+ 1, 0, ecx, 2, dtes64 , 64-bit DS save area
+ 1, 0, ecx, 3, monitor , MONITOR/MWAIT support
+ 1, 0, ecx, 4, ds_cpl , CPL Qualified Debug Store
+ 1, 0, ecx, 5, vmx , Virtual Machine Extensions
+ 1, 0, ecx, 6, smx , Safer Mode Extensions
+ 1, 0, ecx, 7, est , Enhanced Intel SpeedStep
+ 1, 0, ecx, 8, tm2 , Thermal Monitor 2
+ 1, 0, ecx, 9, ssse3 , Supplemental SSE3
+ 1, 0, ecx, 10, cid , L1 Context ID
+ 1, 0, ecx, 11, sdbg , Sillicon Debug
+ 1, 0, ecx, 12, fma , FMA extensions using YMM state
+ 1, 0, ecx, 13, cx16 , CMPXCHG16B instruction support
+ 1, 0, ecx, 14, xtpr , xTPR Update Control
+ 1, 0, ecx, 15, pdcm , Perfmon and Debug Capability
+ 1, 0, ecx, 17, pcid , Process-context identifiers
+ 1, 0, ecx, 18, dca , Direct Cache Access
+ 1, 0, ecx, 19, sse4_1 , SSE4.1
+ 1, 0, ecx, 20, sse4_2 , SSE4.2
+ 1, 0, ecx, 21, x2apic , X2APIC support
+ 1, 0, ecx, 22, movbe , MOVBE instruction support
+ 1, 0, ecx, 23, popcnt , POPCNT instruction support
+ 1, 0, ecx, 24, tsc_deadline_timer , APIC timer one-shot operation
+ 1, 0, ecx, 25, aes , AES instructions
+ 1, 0, ecx, 26, xsave , XSAVE (and related instructions) support
+ 1, 0, ecx, 27, osxsave , XSAVE (and related instructions) are enabled by OS
+ 1, 0, ecx, 28, avx , AVX instructions support
+ 1, 0, ecx, 29, f16c , Half-precision floating-point conversion support
+ 1, 0, ecx, 30, rdrand , RDRAND instruction support
+ 1, 0, ecx, 31, guest_status , System is running as guest; (para-)virtualized system
+ 1, 0, edx, 0, fpu , Floating-Point Unit on-chip (x87)
+ 1, 0, edx, 1, vme , Virtual-8086 Mode Extensions
+ 1, 0, edx, 2, de , Debugging Extensions
+ 1, 0, edx, 3, pse , Page Size Extension
+ 1, 0, edx, 4, tsc , Time Stamp Counter
+ 1, 0, edx, 5, msr , Model-Specific Registers (RDMSR and WRMSR support)
+ 1, 0, edx, 6, pae , Physical Address Extensions
+ 1, 0, edx, 7, mce , Machine Check Exception
+ 1, 0, edx, 8, cx8 , CMPXCHG8B instruction
+ 1, 0, edx, 9, apic , APIC on-chip
+ 1, 0, edx, 11, sep , SYSENTER, SYSEXIT, and associated MSRs
+ 1, 0, edx, 12, mtrr , Memory Type Range Registers
+ 1, 0, edx, 13, pge , Page Global Extensions
+ 1, 0, edx, 14, mca , Machine Check Architecture
+ 1, 0, edx, 15, cmov , Conditional Move Instruction
+ 1, 0, edx, 16, pat , Page Attribute Table
+ 1, 0, edx, 17, pse36 , Page Size Extension (36-bit)
+ 1, 0, edx, 18, pn , Processor Serial Number
+ 1, 0, edx, 19, clflush , CLFLUSH instruction
+ 1, 0, edx, 21, dts , Debug Store
+ 1, 0, edx, 22, acpi , Thermal monitor and clock control
+ 1, 0, edx, 23, mmx , MMX instructions
+ 1, 0, edx, 24, fxsr , FXSAVE and FXRSTOR instructions
+ 1, 0, edx, 25, sse , SSE instructions
+ 1, 0, edx, 26, sse2 , SSE2 instructions
+ 1, 0, edx, 27, ss , Self Snoop
+ 1, 0, edx, 28, ht , Hyper-threading
+ 1, 0, edx, 29, tm , Thermal Monitor
+ 1, 0, edx, 30, ia64 , Legacy IA-64 (Itanium) support bit, now resreved
+ 1, 0, edx, 31, pbe , Pending Break Enable
+
+# Leaf 2H
+# Intel cache and TLB information one-byte descriptors
+
+ 2, 0, eax, 7:0, iteration_count , Number of times this CPUD leaf must be queried
+ 2, 0, eax, 15:8, desc1 , Descriptor #1
+ 2, 0, eax, 23:16, desc2 , Descriptor #2
+ 2, 0, eax, 30:24, desc3 , Descriptor #3
+ 2, 0, eax, 31, eax_invalid , Descriptors 1-3 are invalid if set
+ 2, 0, ebx, 7:0, desc4 , Descriptor #4
+ 2, 0, ebx, 15:8, desc5 , Descriptor #5
+ 2, 0, ebx, 23:16, desc6 , Descriptor #6
+ 2, 0, ebx, 30:24, desc7 , Descriptor #7
+ 2, 0, ebx, 31, ebx_invalid , Descriptors 4-7 are invalid if set
+ 2, 0, ecx, 7:0, desc8 , Descriptor #8
+ 2, 0, ecx, 15:8, desc9 , Descriptor #9
+ 2, 0, ecx, 23:16, desc10 , Descriptor #10
+ 2, 0, ecx, 30:24, desc11 , Descriptor #11
+ 2, 0, ecx, 31, ecx_invalid , Descriptors 8-11 are invalid if set
+ 2, 0, edx, 7:0, desc12 , Descriptor #12
+ 2, 0, edx, 15:8, desc13 , Descriptor #13
+ 2, 0, edx, 23:16, desc14 , Descriptor #14
+ 2, 0, edx, 30:24, desc15 , Descriptor #15
+ 2, 0, edx, 31, edx_invalid , Descriptors 12-15 are invalid if set
+
+# Leaf 4H
+# Intel deterministic cache parameters
+
+ 4, 31:0, eax, 4:0, cache_type , Cache type field
+ 4, 31:0, eax, 7:5, cache_level , Cache level (1-based)
+ 4, 31:0, eax, 8, cache_self_init , Self-initialializing cache level
+ 4, 31:0, eax, 9, fully_associative , Fully-associative cache
+ 4, 31:0, eax, 25:14, num_threads_sharing , Number logical CPUs sharing this cache
+ 4, 31:0, eax, 31:26, num_cores_on_die , Number of cores in the physical package
+ 4, 31:0, ebx, 11:0, cache_linesize , System coherency line size (0-based)
+ 4, 31:0, ebx, 21:12, cache_npartitions , Physical line partitions (0-based)
+ 4, 31:0, ebx, 31:22, cache_nways , Ways of associativity (0-based)
+ 4, 31:0, ecx, 30:0, cache_nsets , Cache number of sets (0-based)
+ 4, 31:0, edx, 0, wbinvd_rll_no_guarantee, WBINVD/INVD not guaranteed for Remote Lower-Level caches
+ 4, 31:0, edx, 1, ll_inclusive , Cache is inclusive of Lower-Level caches
+ 4, 31:0, edx, 2, complex_indexing , Not a direct-mapped cache (complex function)
+
+# Leaf 5H
+# MONITOR/MWAIT instructions enumeration
+
+ 5, 0, eax, 15:0, min_mon_size , Smallest monitor-line size, in bytes
+ 5, 0, ebx, 15:0, max_mon_size , Largest monitor-line size, in bytes
+ 5, 0, ecx, 0, mwait_ext , Enumeration of MONITOR/MWAIT extensions is supported
+ 5, 0, ecx, 1, mwait_irq_break , Interrupts as a break-event for MWAIT is supported
+ 5, 0, edx, 3:0, n_c0_substates , Number of C0 sub C-states supported using MWAIT
+ 5, 0, edx, 7:4, n_c1_substates , Number of C1 sub C-states supported using MWAIT
+ 5, 0, edx, 11:8, n_c2_substates , Number of C2 sub C-states supported using MWAIT
+ 5, 0, edx, 15:12, n_c3_substates , Number of C3 sub C-states supported using MWAIT
+ 5, 0, edx, 19:16, n_c4_substates , Number of C4 sub C-states supported using MWAIT
+ 5, 0, edx, 23:20, n_c5_substates , Number of C5 sub C-states supported using MWAIT
+ 5, 0, edx, 27:24, n_c6_substates , Number of C6 sub C-states supported using MWAIT
+ 5, 0, edx, 31:28, n_c7_substates , Number of C7 sub C-states supported using MWAIT
+
+# Leaf 6H
+# Thermal and Power Management enumeration
+
+ 6, 0, eax, 0, dtherm , Digital temprature sensor
+ 6, 0, eax, 1, turbo_boost , Intel Turbo Boost
+ 6, 0, eax, 2, arat , Always-Running APIC Timer (not affected by p-state)
+ 6, 0, eax, 4, pln , Power Limit Notification (PLN) event
+ 6, 0, eax, 5, ecmd , Clock modulation duty cycle extension
+ 6, 0, eax, 6, pts , Package thermal management
+ 6, 0, eax, 7, hwp , HWP (Hardware P-states) base registers are supported
+ 6, 0, eax, 8, hwp_notify , HWP notification (IA32_HWP_INTERRUPT MSR)
+ 6, 0, eax, 9, hwp_act_window , HWP activity window (IA32_HWP_REQUEST[bits 41:32]) supported
+ 6, 0, eax, 10, hwp_epp , HWP Energy Performance Preference
+ 6, 0, eax, 11, hwp_pkg_req , HWP Package Level Request
+ 6, 0, eax, 13, hdc_base_regs , HDC base registers are supported
+ 6, 0, eax, 14, turbo_boost_3_0 , Intel Turbo Boost Max 3.0
+ 6, 0, eax, 15, hwp_capabilities , HWP Highest Performance change
+ 6, 0, eax, 16, hwp_peci_override , HWP PECI override
+ 6, 0, eax, 17, hwp_flexible , Flexible HWP
+ 6, 0, eax, 18, hwp_fast , IA32_HWP_REQUEST MSR fast access mode
+ 6, 0, eax, 19, hfi , HW_FEEDBACK MSRs supported
+ 6, 0, eax, 20, hwp_ignore_idle , Ignoring idle logical CPU HWP req is supported
+ 6, 0, eax, 23, thread_director , Intel thread director support
+ 6, 0, eax, 24, therm_interrupt_bit25 , IA32_THERM_INTERRUPT MSR bit 25 is supported
+ 6, 0, ebx, 3:0, n_therm_thresholds , Digital thermometer thresholds
+ 6, 0, ecx, 0, aperfmperf , MPERF/APERF MSRs (effective frequency interface)
+ 6, 0, ecx, 3, epb , IA32_ENERGY_PERF_BIAS MSR support
+ 6, 0, ecx, 15:8, thrd_director_nclasses , Number of classes, Intel thread director
+ 6, 0, edx, 0, perfcap_reporting , Performance capability reporting
+ 6, 0, edx, 1, encap_reporting , Energy efficiency capability reporting
+ 6, 0, edx, 11:8, feedback_sz , HW feedback interface struct size, in 4K pages
+ 6, 0, edx, 31:16, this_lcpu_hwfdbk_idx , This logical CPU index @ HW feedback struct, 0-based
+
+# Leaf 7H
+# Extended CPU features enumeration
+
+ 7, 0, eax, 31:0, leaf7_n_subleaves , Number of cpuid 0x7 subleaves
+ 7, 0, ebx, 0, fsgsbase , FSBASE/GSBASE read/write support
+ 7, 0, ebx, 1, tsc_adjust , IA32_TSC_ADJUST MSR supported
+ 7, 0, ebx, 2, sgx , Intel SGX (Software Guard Extensions)
+ 7, 0, ebx, 3, bmi1 , Bit manipulation extensions group 1
+ 7, 0, ebx, 4, hle , Hardware Lock Elision
+ 7, 0, ebx, 5, avx2 , AVX2 instruction set
+ 7, 0, ebx, 6, fdp_excptn_only , FPU Data Pointer updated only on x87 exceptions
+ 7, 0, ebx, 7, smep , Supervisor Mode Execution Protection
+ 7, 0, ebx, 8, bmi2 , Bit manipulation extensions group 2
+ 7, 0, ebx, 9, erms , Enhanced REP MOVSB/STOSB
+ 7, 0, ebx, 10, invpcid , INVPCID instruction (Invalidate Processor Context ID)
+ 7, 0, ebx, 11, rtm , Intel restricted transactional memory
+ 7, 0, ebx, 12, cqm , Intel RDT-CMT / AMD Platform-QoS cache monitoring
+ 7, 0, ebx, 13, zero_fcs_fds , Deprecated FPU CS/DS (stored as zero)
+ 7, 0, ebx, 14, mpx , Intel memory protection extensions
+ 7, 0, ebx, 15, rdt_a , Intel RDT / AMD Platform-QoS Enforcemeent
+ 7, 0, ebx, 16, avx512f , AVX-512 foundation instructions
+ 7, 0, ebx, 17, avx512dq , AVX-512 double/quadword instructions
+ 7, 0, ebx, 18, rdseed , RDSEED instruction
+ 7, 0, ebx, 19, adx , ADCX/ADOX instructions
+ 7, 0, ebx, 20, smap , Supervisor mode access prevention
+ 7, 0, ebx, 21, avx512ifma , AVX-512 integer fused multiply add
+ 7, 0, ebx, 23, clflushopt , CLFLUSHOPT instruction
+ 7, 0, ebx, 24, clwb , CLWB instruction
+ 7, 0, ebx, 25, intel_pt , Intel processor trace
+ 7, 0, ebx, 26, avx512pf , AVX-512 prefetch instructions
+ 7, 0, ebx, 27, avx512er , AVX-512 exponent/reciprocal instrs
+ 7, 0, ebx, 28, avx512cd , AVX-512 conflict detection instrs
+ 7, 0, ebx, 29, sha_ni , SHA/SHA256 instructions
+ 7, 0, ebx, 30, avx512bw , AVX-512 BW (byte/word granular) instructions
+ 7, 0, ebx, 31, avx512vl , AVX-512 VL (128/256 vector length) extensions
+ 7, 0, ecx, 0, prefetchwt1 , PREFETCHWT1 (Intel Xeon Phi only)
+ 7, 0, ecx, 1, avx512vbmi , AVX-512 Vector byte manipulation instrs
+ 7, 0, ecx, 2, umip , User mode instruction protection
+ 7, 0, ecx, 3, pku , Protection keys for user-space
+ 7, 0, ecx, 4, ospke , OS protection keys enable
+ 7, 0, ecx, 5, waitpkg , WAITPKG instructions
+ 7, 0, ecx, 6, avx512_vbmi2 , AVX-512 vector byte manipulation instrs group 2
+ 7, 0, ecx, 7, cet_ss , CET shadow stack features
+ 7, 0, ecx, 8, gfni , Galois field new instructions
+ 7, 0, ecx, 9, vaes , Vector AES instrs
+ 7, 0, ecx, 10, vpclmulqdq , VPCLMULQDQ 256-bit instruction support
+ 7, 0, ecx, 11, avx512_vnni , Vector neural network instructions
+ 7, 0, ecx, 12, avx512_bitalg , AVX-512 bit count/shiffle
+ 7, 0, ecx, 13, tme , Intel total memory encryption
+ 7, 0, ecx, 14, avx512_vpopcntdq , AVX-512: POPCNT for vectors of DW/QW
+ 7, 0, ecx, 16, la57 , 57-bit linear addreses (five-level paging)
+ 7, 0, ecx, 21:17, mawau_val_lm , BNDLDX/BNDSTX MAWAU value in 64-bit mode
+ 7, 0, ecx, 22, rdpid , RDPID instruction
+ 7, 0, ecx, 23, key_locker , Intel key locker support
+ 7, 0, ecx, 24, bus_lock_detect , OS bus-lock detection
+ 7, 0, ecx, 25, cldemote , CLDEMOTE instruction
+ 7, 0, ecx, 27, movdiri , MOVDIRI instruction
+ 7, 0, ecx, 28, movdir64b , MOVDIR64B instruction
+ 7, 0, ecx, 29, enqcmd , Enqueue stores supported (ENQCMD{,S})
+ 7, 0, ecx, 30, sgx_lc , Intel SGX launch configuration
+ 7, 0, ecx, 31, pks , Protection keys for supervisor-mode pages
+ 7, 0, edx, 1, sgx_keys , Intel SGX attestation services
+ 7, 0, edx, 2, avx512_4vnniw , AVX-512 neural network instructions
+ 7, 0, edx, 3, avx512_4fmaps , AVX-512 multiply accumulation single precision
+ 7, 0, edx, 4, fsrm , Fast short REP MOV
+ 7, 0, edx, 5, uintr , CPU supports user interrupts
+ 7, 0, edx, 8, avx512_vp2intersect , VP2INTERSECT{D,Q} instructions
+ 7, 0, edx, 9, srdbs_ctrl , SRBDS mitigation MSR available
+ 7, 0, edx, 10, md_clear , VERW MD_CLEAR microcode support
+ 7, 0, edx, 11, rtm_always_abort , XBEGIN (RTM transaction) always aborts
+ 7, 0, edx, 13, tsx_force_abort , MSR TSX_FORCE_ABORT, RTM_ABORT bit, supported
+ 7, 0, edx, 14, serialize , SERIALIZE instruction
+ 7, 0, edx, 15, hybrid_cpu , The CPU is identified as a 'hybrid part'
+ 7, 0, edx, 16, tsxldtrk , TSX suspend/resume load address tracking
+ 7, 0, edx, 18, pconfig , PCONFIG instruction
+ 7, 0, edx, 19, arch_lbr , Intel architectural LBRs
+ 7, 0, edx, 20, ibt , CET indirect branch tracking
+ 7, 0, edx, 22, amx_bf16 , AMX-BF16: tile bfloat16 support
+ 7, 0, edx, 23, avx512_fp16 , AVX-512 FP16 instructions
+ 7, 0, edx, 24, amx_tile , AMX-TILE: tile architecture support
+ 7, 0, edx, 25, amx_int8 , AMX-INT8: tile 8-bit integer support
+ 7, 0, edx, 26, spec_ctrl , Speculation Control (IBRS/IBPB: indirect branch restrictions)
+ 7, 0, edx, 27, intel_stibp , Single thread indirect branch predictors
+ 7, 0, edx, 28, flush_l1d , FLUSH L1D cache: IA32_FLUSH_CMD MSR
+ 7, 0, edx, 29, arch_capabilities , Intel IA32_ARCH_CAPABILITIES MSR
+ 7, 0, edx, 30, core_capabilities , IA32_CORE_CAPABILITIES MSR
+ 7, 0, edx, 31, spec_ctrl_ssbd , Speculative store bypass disable
+ 7, 1, eax, 4, avx_vnni , AVX-VNNI instructions
+ 7, 1, eax, 5, avx512_bf16 , AVX-512 bFloat16 instructions
+ 7, 1, eax, 6, lass , Linear address space separation
+ 7, 1, eax, 7, cmpccxadd , CMPccXADD instructions
+ 7, 1, eax, 8, arch_perfmon_ext , ArchPerfmonExt: CPUID leaf 0x23 is supported
+ 7, 1, eax, 10, fzrm , Fast zero-length REP MOVSB
+ 7, 1, eax, 11, fsrs , Fast short REP STOSB
+ 7, 1, eax, 12, fsrc , Fast Short REP CMPSB/SCASB
+ 7, 1, eax, 17, fred , FRED: Flexible return and event delivery transitions
+ 7, 1, eax, 18, lkgs , LKGS: Load 'kernel' (userspace) GS
+ 7, 1, eax, 19, wrmsrns , WRMSRNS instr (WRMSR-non-serializing)
+ 7, 1, eax, 21, amx_fp16 , AMX-FP16: FP16 tile operations
+ 7, 1, eax, 22, hreset , History reset support
+ 7, 1, eax, 23, avx_ifma , Integer fused multiply add
+ 7, 1, eax, 26, lam , Linear address masking
+ 7, 1, eax, 27, rd_wr_msrlist , RDMSRLIST/WRMSRLIST instructions
+ 7, 1, ebx, 0, intel_ppin , Protected processor inventory number (PPIN{,_CTL} MSRs)
+ 7, 1, edx, 4, avx_vnni_int8 , AVX-VNNI-INT8 instructions
+ 7, 1, edx, 5, avx_ne_convert , AVX-NE-CONVERT instructions
+ 7, 1, edx, 8, amx_complex , AMX-COMPLEX instructions (starting from Granite Rapids)
+ 7, 1, edx, 14, prefetchit_0_1 , PREFETCHIT0/1 instructions
+ 7, 1, edx, 18, cet_sss , CET supervisor shadow stacks safe to use
+ 7, 2, edx, 0, intel_psfd , Intel predictive store forward disable
+ 7, 2, edx, 1, ipred_ctrl , MSR bits IA32_SPEC_CTRL.IPRED_DIS_{U,S}
+ 7, 2, edx, 2, rrsba_ctrl , MSR bits IA32_SPEC_CTRL.RRSBA_DIS_{U,S}
+ 7, 2, edx, 3, ddp_ctrl , MSR bit IA32_SPEC_CTRL.DDPD_U
+ 7, 2, edx, 4, bhi_ctrl , MSR bit IA32_SPEC_CTRL.BHI_DIS_S
+ 7, 2, edx, 5, mcdt_no , MCDT mitigation not needed
+ 7, 2, edx, 6, uclock_disable , UC-lock disable is supported
+
+# Leaf 9H
+# Intel DCA (Direct Cache Access) enumeration
+
+ 9, 0, eax, 0, dca_enabled_in_bios , DCA is enabled in BIOS
+
+# Leaf AH
+# Intel PMU (Performance Monitoring Unit) enumeration
+
+ 0xa, 0, eax, 7:0, pmu_version , Performance monitoring unit version ID
+ 0xa, 0, eax, 15:8, pmu_n_gcounters , Number of general PMU counters per logical CPU
+ 0xa, 0, eax, 23:16, pmu_gcounters_nbits , Bitwidth of PMU general counters
+ 0xa, 0, eax, 31:24, pmu_cpuid_ebx_bits , Length of cpuid leaf 0xa EBX bit vector
+ 0xa, 0, ebx, 0, no_core_cycle_evt , Core cycle event not available
+ 0xa, 0, ebx, 1, no_insn_retired_evt , Instruction retired event not available
+ 0xa, 0, ebx, 2, no_refcycle_evt , Reference cycles event not available
+ 0xa, 0, ebx, 3, no_llc_ref_evt , LLC-reference event not available
+ 0xa, 0, ebx, 4, no_llc_miss_evt , LLC-misses event not available
+ 0xa, 0, ebx, 5, no_br_insn_ret_evt , Branch instruction retired event not available
+ 0xa, 0, ebx, 6, no_br_mispredict_evt , Branch mispredict retired event not available
+ 0xa, 0, ebx, 7, no_td_slots_evt , Topdown slots event not available
+ 0xa, 0, ecx, 31:0, pmu_fcounters_bitmap , Fixed-function PMU counters support bitmap
+ 0xa, 0, edx, 4:0, pmu_n_fcounters , Number of fixed PMU counters
+ 0xa, 0, edx, 12:5, pmu_fcounters_nbits , Bitwidth of PMU fixed counters
+ 0xa, 0, edx, 15, anythread_depr , AnyThread deprecation
+
+# Leaf BH
+# CPUs v1 extended topology enumeration
+
+ 0xb, 1:0, eax, 4:0, x2apic_id_shift , Bit width of this level (previous levels inclusive)
+ 0xb, 1:0, ebx, 15:0, domain_lcpus_count , Logical CPUs count across all instances of this domain
+ 0xb, 1:0, ecx, 7:0, domain_nr , This domain level (subleaf ID)
+ 0xb, 1:0, ecx, 15:8, domain_type , This domain type
+ 0xb, 1:0, edx, 31:0, x2apic_id , x2APIC ID of current logical CPU
+
+# Leaf DH
+# Processor extended state enumeration
+
+ 0xd, 0, eax, 0, xcr0_x87 , XCR0.X87 (bit 0) supported
+ 0xd, 0, eax, 1, xcr0_sse , XCR0.SEE (bit 1) supported
+ 0xd, 0, eax, 2, xcr0_avx , XCR0.AVX (bit 2) supported
+ 0xd, 0, eax, 3, xcr0_mpx_bndregs , XCR0.BNDREGS (bit 3) supported (MPX BND0-BND3 regs)
+ 0xd, 0, eax, 4, xcr0_mpx_bndcsr , XCR0.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS regs)
+ 0xd, 0, eax, 5, xcr0_avx512_opmask , XCR0.OPMASK (bit 5) supported (AVX-512 k0-k7 regs)
+ 0xd, 0, eax, 6, xcr0_avx512_zmm_hi256 , XCR0.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 regs)
+ 0xd, 0, eax, 7, xcr0_avx512_hi16_zmm , XCR0.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 regs)
+ 0xd, 0, eax, 9, xcr0_pkru , XCR0.PKRU (bit 9) supported (XSAVE PKRU reg)
+ 0xd, 0, eax, 11, xcr0_cet_u , AMD XCR0.CET_U (bit 11) supported (CET supervisor state)
+ 0xd, 0, eax, 12, xcr0_cet_s , AMD XCR0.CET_S (bit 12) support (CET user state)
+ 0xd, 0, eax, 17, xcr0_tileconfig , XCR0.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG)
+ 0xd, 0, eax, 18, xcr0_tiledata , XCR0.TILEDATA (bit 18) supported (AMX can manage TILEDATA)
+ 0xd, 0, ebx, 31:0, xsave_sz_xcr0_enabled , XSAVE/XRSTR area byte size, for XCR0 enabled features
+ 0xd, 0, ecx, 31:0, xsave_sz_max , XSAVE/XRSTR area max byte size, all CPU features
+ 0xd, 0, edx, 30, xcr0_lwp , AMD XCR0.LWP (bit 62) supported (Light-weight Profiling)
+ 0xd, 1, eax, 0, xsaveopt , XSAVEOPT instruction
+ 0xd, 1, eax, 1, xsavec , XSAVEC instruction
+ 0xd, 1, eax, 2, xgetbv1 , XGETBV instruction with ECX = 1
+ 0xd, 1, eax, 3, xsaves , XSAVES/XRSTORS instructions (and XSS MSR)
+ 0xd, 1, eax, 4, xfd , Extended feature disable support
+ 0xd, 1, ebx, 31:0, xsave_sz_xcr0_xmms_enabled, XSAVE area size, all XCR0 and XMMS features enabled
+ 0xd, 1, ecx, 8, xss_pt , PT state, supported
+ 0xd, 1, ecx, 10, xss_pasid , PASID state, supported
+ 0xd, 1, ecx, 11, xss_cet_u , CET user state, supported
+ 0xd, 1, ecx, 12, xss_cet_p , CET supervisor state, supported
+ 0xd, 1, ecx, 13, xss_hdc , HDC state, supported
+ 0xd, 1, ecx, 14, xss_uintr , UINTR state, supported
+ 0xd, 1, ecx, 15, xss_lbr , LBR state, supported
+ 0xd, 1, ecx, 16, xss_hwp , HWP state, supported
+ 0xd, 63:2, eax, 31:0, xsave_sz , Size of save area for subleaf-N feature, in bytes
+ 0xd, 63:2, ebx, 31:0, xsave_offset , Offset of save area for subleaf-N feature, in bytes
+ 0xd, 63:2, ecx, 0, is_xss_bit , Subleaf N describes an XSS bit, otherwise XCR0 bit
+ 0xd, 63:2, ecx, 1, compacted_xsave_64byte_aligned, When compacted, subleaf-N feature xsave area is 64-byte aligned
+
+# Leaf FH
+# Intel RDT / AMD PQoS resource monitoring
+
+ 0xf, 0, ebx, 31:0, core_rmid_max , RMID max, within this core, all types (0-based)
+ 0xf, 0, edx, 1, cqm_llc , LLC QoS-monitoring supported
+ 0xf, 1, eax, 7:0, l3c_qm_bitwidth , L3 QoS-monitoring counter bitwidth (24-based)
+ 0xf, 1, eax, 8, l3c_qm_overflow_bit , QM_CTR MSR bit 61 is an overflow bit
+ 0xf, 1, ebx, 31:0, l3c_qm_conver_factor , QM_CTR MSR conversion factor to bytes
+ 0xf, 1, ecx, 31:0, l3c_qm_rmid_max , L3 QoS-monitoring max RMID
+ 0xf, 1, edx, 0, cqm_occup_llc , L3 QoS occupancy monitoring supported
+ 0xf, 1, edx, 1, cqm_mbm_total , L3 QoS total bandwidth monitoring supported
+ 0xf, 1, edx, 2, cqm_mbm_local , L3 QoS local bandwidth monitoring supported
# Leaf 10H
-# Intel RDT Allocation
-
- 0x10, 0, EBX, 1, l3c_rdt_alloc, L3 Cache Allocation supported
- 0x10, 0, EBX, 2, l2c_rdt_alloc, L2 Cache Allocation supported
- 0x10, 0, EBX, 3, mem_bw_alloc, Memory Bandwidth Allocation supported
-
+# Intel RDT / AMD PQoS allocation enumeration
+
+ 0x10, 0, ebx, 1, cat_l3 , L3 Cache Allocation Technology supported
+ 0x10, 0, ebx, 2, cat_l2 , L2 Cache Allocation Technology supported
+ 0x10, 0, ebx, 3, mba , Memory Bandwidth Allocation supported
+ 0x10, 2:1, eax, 4:0, cat_cbm_len , L3/L2_CAT capacity bitmask length, minus-one notation
+ 0x10, 2:1, ebx, 31:0, cat_units_bitmap , L3/L2_CAT bitmap of allocation units
+ 0x10, 2:1, ecx, 1, l3_cat_cos_infreq_updates, L3_CAT COS updates should be infrequent
+ 0x10, 2:1, ecx, 2, cdp_l3 , L3/L2_CAT CDP (Code and Data Prioritization)
+ 0x10, 2:1, ecx, 3, cat_sparse_1s , L3/L2_CAT non-contiguous 1s value supported
+ 0x10, 2:1, edx, 15:0, cat_cos_max , L3/L2_CAT max COS (Class of Service) supported
+ 0x10, 3, eax, 11:0, mba_max_delay , Max MBA throttling value; minus-one notation
+ 0x10, 3, ecx, 0, per_thread_mba , Per-thread MBA controls are supported
+ 0x10, 3, ecx, 2, mba_delay_linear , Delay values are linear
+ 0x10, 3, edx, 15:0, mba_cos_max , MBA max Class of Service supported
# Leaf 12H
-# SGX Capability
-#
-# Some detailed SGX features not added yet
-
- 0x12, 0, EAX, 0, sgx1, L3 Cache Allocation supported
- 0x12, 1, EAX, 0, sgx2, L3 Cache Allocation supported
-
+# Intel Software Guard Extensions (SGX) enumeration
+
+ 0x12, 0, eax, 0, sgx1 , SGX1 leaf functions supported
+ 0x12, 0, eax, 1, sgx2 , SGX2 leaf functions supported
+ 0x12, 0, eax, 5, enclv_leaves , ENCLV leaves (E{INC,DEC}VIRTCHILD, ESETCONTEXT) supported
+ 0x12, 0, eax, 6, encls_leaves , ENCLS leaves (ENCLS ETRACKC, ERDINFO, ELDBC, ELDUC) supported
+ 0x12, 0, eax, 7, enclu_everifyreport2 , ENCLU leaf EVERIFYREPORT2 supported
+ 0x12, 0, eax, 10, encls_eupdatesvn , ENCLS leaf EUPDATESVN supported
+ 0x12, 0, eax, 11, sgx_edeccssa , ENCLU leaf EDECCSSA supported
+ 0x12, 0, ebx, 0, miscselect_exinfo , SSA.MISC frame: reporting #PF and #GP exceptions inside enclave supported
+ 0x12, 0, ebx, 1, miscselect_cpinfo , SSA.MISC frame: reporting #CP exceptions inside enclave supported
+ 0x12, 0, edx, 7:0, max_enclave_sz_not64 , Maximum enclave size in non-64-bit mode (log2)
+ 0x12, 0, edx, 15:8, max_enclave_sz_64 , Maximum enclave size in 64-bit mode (log2)
+ 0x12, 1, eax, 0, secs_attr_init , ATTRIBUTES.INIT supported (enclave initialized by EINIT)
+ 0x12, 1, eax, 1, secs_attr_debug , ATTRIBUTES.DEBUG supported (enclave permits debugger read/write)
+ 0x12, 1, eax, 2, secs_attr_mode64bit , ATTRIBUTES.MODE64BIT supported (enclave runs in 64-bit mode)
+ 0x12, 1, eax, 4, secs_attr_provisionkey , ATTRIBUTES.PROVISIONKEY supported (provisioning key available)
+ 0x12, 1, eax, 5, secs_attr_einittoken_key, ATTRIBUTES.EINITTOKEN_KEY supported (EINIT token key available)
+ 0x12, 1, eax, 6, secs_attr_cet , ATTRIBUTES.CET supported (enable CET attributes)
+ 0x12, 1, eax, 7, secs_attr_kss , ATTRIBUTES.KSS supported (Key Separation and Sharing enabled)
+ 0x12, 1, eax, 10, secs_attr_aexnotify , ATTRIBUTES.AEXNOTIFY supported (enclave threads may get AEX notifications
+ 0x12, 1, ecx, 0, xfrm_x87 , Enclave XFRM.X87 (bit 0) supported
+ 0x12, 1, ecx, 1, xfrm_sse , Enclave XFRM.SEE (bit 1) supported
+ 0x12, 1, ecx, 2, xfrm_avx , Enclave XFRM.AVX (bit 2) supported
+ 0x12, 1, ecx, 3, xfrm_mpx_bndregs , Enclave XFRM.BNDREGS (bit 3) supported (MPX BND0-BND3 regs)
+ 0x12, 1, ecx, 4, xfrm_mpx_bndcsr , Enclave XFRM.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS regs)
+ 0x12, 1, ecx, 5, xfrm_avx512_opmask , Enclave XFRM.OPMASK (bit 5) supported (AVX-512 k0-k7 regs)
+ 0x12, 1, ecx, 6, xfrm_avx512_zmm_hi256 , Enclave XFRM.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 regs)
+ 0x12, 1, ecx, 7, xfrm_avx512_hi16_zmm , Enclave XFRM.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 regs)
+ 0x12, 1, ecx, 9, xfrm_pkru , Enclave XFRM.PKRU (bit 9) supported (XSAVE PKRU reg)
+ 0x12, 1, ecx, 17, xfrm_tileconfig , Enclave XFRM.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG)
+ 0x12, 1, ecx, 18, xfrm_tiledata , Enclave XFRM.TILEDATA (bit 18) supported (AMX can manage TILEDATA)
+ 0x12, 31:2, eax, 3:0, subleaf_type , Subleaf type (dictates output layout)
+ 0x12, 31:2, eax, 31:12, epc_sec_base_addr_0 , EPC section base addr, bits[12:31]
+ 0x12, 31:2, ebx, 19:0, epc_sec_base_addr_1 , EPC section base addr, bits[32:51]
+ 0x12, 31:2, ecx, 3:0, epc_sec_type , EPC section type / property encoding
+ 0x12, 31:2, ecx, 31:12, epc_sec_size_0 , EPC section size, bits[12:31]
+ 0x12, 31:2, edx, 19:0, epc_sec_size_1 , EPC section size, bits[32:51]
# Leaf 14H
-# Intel Processor Tracer
-#
+# Intel Processor Trace enumeration
+
+ 0x14, 0, eax, 31:0, pt_max_subleaf , Max cpuid 0x14 subleaf
+ 0x14, 0, ebx, 0, cr3_filtering , IA32_RTIT_CR3_MATCH is accessible
+ 0x14, 0, ebx, 1, psb_cyc , Configurable PSB and cycle-accurate mode
+ 0x14, 0, ebx, 2, ip_filtering , IP/TraceStop filtering; Warm-reset PT MSRs preservation
+ 0x14, 0, ebx, 3, mtc_timing , MTC timing packet; COFI-based packets suppression
+ 0x14, 0, ebx, 4, ptwrite , PTWRITE support
+ 0x14, 0, ebx, 5, power_event_trace , Power Event Trace support
+ 0x14, 0, ebx, 6, psb_pmi_preserve , PSB and PMI preservation support
+ 0x14, 0, ebx, 7, event_trace , Event Trace packet generation through IA32_RTIT_CTL.EventEn
+ 0x14, 0, ebx, 8, tnt_disable , TNT packet generation disable through IA32_RTIT_CTL.DisTNT
+ 0x14, 0, ecx, 0, topa_output , ToPA output scheme support
+ 0x14, 0, ecx, 1, topa_multiple_entries , ToPA tables can hold multiple entries
+ 0x14, 0, ecx, 2, single_range_output , Single-range output scheme supported
+ 0x14, 0, ecx, 3, trance_transport_output, Trace Transport subsystem output support
+ 0x14, 0, ecx, 31, ip_payloads_lip , IP payloads have LIP values (CS base included)
+ 0x14, 1, eax, 2:0, num_address_ranges , Filtering number of configurable Address Ranges
+ 0x14, 1, eax, 31:16, mtc_periods_bmp , Bitmap of supported MTC period encodings
+ 0x14, 1, ebx, 15:0, cycle_thresholds_bmp , Bitmap of supported Cycle Threshold encodings
+ 0x14, 1, ebx, 31:16, psb_periods_bmp , Bitmap of supported Configurable PSB frequency encodings
# Leaf 15H
-# Time Stamp Counter and Nominal Core Crystal Clock Information
+# Intel TSC (Time Stamp Counter) enumeration
- 0x15, 0, EAX, 31:0, tsc_denominator, The denominator of the TSC/”core crystal clock” ratio
- 0x15, 0, EBX, 31:0, tsc_numerator, The numerator of the TSC/”core crystal clock” ratio
- 0x15, 0, ECX, 31:0, nom_freq, Nominal frequency of the core crystal clock in Hz
+ 0x15, 0, eax, 31:0, tsc_denominator , Denominator of the TSC/'core crystal clock' ratio
+ 0x15, 0, ebx, 31:0, tsc_numerator , Numerator of the TSC/'core crystal clock' ratio
+ 0x15, 0, ecx, 31:0, cpu_crystal_hz , Core crystal clock nominal frequency, in Hz
# Leaf 16H
-# Processor Frequency Information
+# Intel processor fequency enumeration
- 0x16, 0, EAX, 15:0, cpu_base_freq, Processor Base Frequency in MHz
- 0x16, 0, EBX, 15:0, cpu_max_freq, Maximum Frequency in MHz
- 0x16, 0, ECX, 15:0, bus_freq, Bus (Reference) Frequency in MHz
+ 0x16, 0, eax, 15:0, cpu_base_mhz , Processor base frequency, in MHz
+ 0x16, 0, ebx, 15:0, cpu_max_mhz , Processor max frequency, in MHz
+ 0x16, 0, ecx, 15:0, bus_mhz , Bus reference frequency, in MHz
# Leaf 17H
-# System-On-Chip Vendor Attribute
-
- 0x17, 0, EAX, 31:0, max_socid, Maximum input value of supported sub-leaf
- 0x17, 0, EBX, 15:0, soc_vid, SOC Vendor ID
- 0x17, 0, EBX, 16, std_vid, SOC Vendor ID is assigned via an industry standard scheme
- 0x17, 0, ECX, 31:0, soc_pid, SOC Project ID assigned by vendor
- 0x17, 0, EDX, 31:0, soc_sid, SOC Stepping ID
+# Intel SoC vendor attributes enumeration
+
+ 0x17, 0, eax, 31:0, soc_max_subleaf , Max cpuid leaf 0x17 subleaf
+ 0x17, 0, ebx, 15:0, soc_vendor_id , SoC vendor ID
+ 0x17, 0, ebx, 16, is_vendor_scheme , Assigned by industry enumaeratoion scheme (not Intel)
+ 0x17, 0, ecx, 31:0, soc_proj_id , SoC project ID, assigned by vendor
+ 0x17, 0, edx, 31:0, soc_stepping_id , Soc project stepping ID, assigned by vendor
+ 0x17, 3:1, eax, 31:0, vendor_brand_a , Vendor Brand ID string, bytes subleaf_nr * (0 -> 3)
+ 0x17, 3:1, ebx, 31:0, vendor_brand_b , Vendor Brand ID string, bytes subleaf_nr * (4 -> 7)
+ 0x17, 3:1, ecx, 31:0, vendor_brand_c , Vendor Brand ID string, bytes subleaf_nr * (8 -> 11)
+ 0x17, 3:1, edx, 31:0, vendor_brand_d , Vendor Brand ID string, bytes subleaf_nr * (12 -> 15)
# Leaf 18H
-# Deterministic Address Translation Parameters
-
+# Intel determenestic address translation (TLB) parameters
+
+ 0x18, 31:0, eax, 31:0, tlb_max_subleaf , Max cpuid 0x18 subleaf
+ 0x18, 31:0, ebx, 0, tlb_4k_page , TLB 4KB-page entries supported
+ 0x18, 31:0, ebx, 1, tlb_2m_page , TLB 2MB-page entries supported
+ 0x18, 31:0, ebx, 2, tlb_4m_page , TLB 4MB-page entries supported
+ 0x18, 31:0, ebx, 3, tlb_1g_page , TLB 1GB-page entries supported
+ 0x18, 31:0, ebx, 10:8, hard_partitioning , (Hard/Soft) partitioning between logical CPUs sharing this struct
+ 0x18, 31:0, ebx, 31:16, n_way_associative , Ways of associativity
+ 0x18, 31:0, ecx, 31:0, n_sets , Number of sets
+ 0x18, 31:0, edx, 4:0, tlb_type , Translation cache type (TLB type)
+ 0x18, 31:0, edx, 7:5, tlb_cache_level , Translation cache level (1-based)
+ 0x18, 31:0, edx, 8, is_fully_associative , Fully-associative structure
+ 0x18, 31:0, edx, 25:14, tlb_max_addressible_ids, Max num of addressible IDs for logical CPUs sharing this TLB - 1
# Leaf 19H
-# Key Locker Leaf
+# Intel Key Locker enumeration
+ 0x19, 0, eax, 0, kl_cpl0_only , CPL0-only key Locker restriction supported
+ 0x19, 0, eax, 1, kl_no_encrypt , No-encrypt key locker restriction supported
+ 0x19, 0, eax, 2, kl_no_decrypt , No-decrypt key locker restriction supported
+ 0x19, 0, ebx, 0, aes_keylocker , AES key locker instructions supported
+ 0x19, 0, ebx, 2, aes_keylocker_wide , AES wide key locker instructions supported
+ 0x19, 0, ebx, 4, kl_msr_iwkey , Key locker MSRs and IWKEY backups supported
+ 0x19, 0, ecx, 0, loadiwkey_no_backup , LOADIWKEY NoBackup parameter supported
+ 0x19, 0, ecx, 1, iwkey_rand , IWKEY randomization (KeySource encoding 1) supported
# Leaf 1AH
-# Hybrid Information
-
- 0x1A, 0, EAX, 31:24, core_type, 20H-Intel_Atom 40H-Intel_Core
-
+# Intel hybrid CPUs identification (e.g. Atom, Core)
+
+ 0x1a, 0, eax, 23:0, core_native_model , This core's native model ID
+ 0x1a, 0, eax, 31:24, core_type , This core's type
+
+# Leaf 1BH
+# Intel PCONFIG (Platform configuration) enumeration
+
+ 0x1b, 31:0, eax, 11:0, pconfig_subleaf_type , CPUID 0x1b subleaf type
+ 0x1b, 31:0, ebx, 31:0, pconfig_target_id_x , A supported PCONFIG target ID
+ 0x1b, 31:0, ecx, 31:0, pconfig_target_id_y , A supported PCONFIG target ID
+ 0x1b, 31:0, edx, 31:0, pconfig_target_id_z , A supported PCONFIG target ID
+
+# Leaf 1CH
+# Intel LBR (Last Branch Record) enumeration
+
+ 0x1c, 0, eax, 0, lbr_depth_8 , Max stack depth (number of LBR entries) = 8
+ 0x1c, 0, eax, 1, lbr_depth_16 , Max stack depth (number of LBR entries) = 16
+ 0x1c, 0, eax, 2, lbr_depth_24 , Max stack depth (number of LBR entries) = 24
+ 0x1c, 0, eax, 3, lbr_depth_32 , Max stack depth (number of LBR entries) = 32
+ 0x1c, 0, eax, 4, lbr_depth_40 , Max stack depth (number of LBR entries) = 40
+ 0x1c, 0, eax, 5, lbr_depth_48 , Max stack depth (number of LBR entries) = 48
+ 0x1c, 0, eax, 6, lbr_depth_56 , Max stack depth (number of LBR entries) = 56
+ 0x1c, 0, eax, 7, lbr_depth_64 , Max stack depth (number of LBR entries) = 64
+ 0x1c, 0, eax, 30, lbr_deep_c_reset , LBRs maybe cleared on MWAIT C-state > C1
+ 0x1c, 0, eax, 31, lbr_ip_is_lip , LBR IP contain Last IP, otherwise effective IP
+ 0x1c, 0, ebx, 0, lbr_cpl , CPL filtering (non-zero IA32_LBR_CTL[2:1]) supported
+ 0x1c, 0, ebx, 1, lbr_branch_filter , Branch filtering (non-zero IA32_LBR_CTL[22:16]) supported
+ 0x1c, 0, ebx, 2, lbr_call_stack , Call-stack mode (IA32_LBR_CTL[3] = 1) supported
+ 0x1c, 0, ecx, 0, lbr_mispredict , Branch misprediction bit supported (IA32_LBR_x_INFO[63])
+ 0x1c, 0, ecx, 1, lbr_timed_lbr , Timed LBRs (CPU cycles since last LBR entry) supported
+ 0x1c, 0, ecx, 2, lbr_branch_type , Branch type field (IA32_LBR_INFO_x[59:56]) supported
+ 0x1c, 0, ecx, 19:16, lbr_events_gpc_bmp , LBR PMU-events logging support; bitmap for first 4 GP (general-purpose) Counters
+
+# Leaf 1DH
+# Intel AMX (Advanced Matrix Extensions) tile information
+
+ 0x1d, 0, eax, 31:0, amx_max_palette , Highest palette ID / subleaf ID
+ 0x1d, 1, eax, 15:0, amx_palette_size , AMX palette total tiles size, in bytes
+ 0x1d, 1, eax, 31:16, amx_tile_size , AMX single tile's size, in bytes
+ 0x1d, 1, ebx, 15:0, amx_tile_row_size , AMX tile single row's size, in bytes
+ 0x1d, 1, ebx, 31:16, amx_palette_nr_tiles , AMX palette number of tiles
+ 0x1d, 1, ecx, 15:0, amx_tile_nr_rows , AMX tile max number of rows
+
+# Leaf 1EH
+# Intel AMX, TMUL (Tile-matrix MULtiply) accelerator unit enumeration
+
+ 0x1e, 0, ebx, 7:0, tmul_maxk , TMUL unit maximum height, K (rows or columns)
+ 0x1e, 0, ebx, 23:8, tmul_maxn , TMUL unit maxiumum SIMD dimension, N (column bytes)
# Leaf 1FH
-# V2 Extended Topology - A preferred superset to leaf 0BH
-
-
-# According to SDM
-# 40000000H - 4FFFFFFFH is invalid range
+# Intel extended topology enumeration v2
+
+ 0x1f, 5:0, eax, 4:0, x2apic_id_shift , Bit width of this level (previous levels inclusive)
+ 0x1f, 5:0, ebx, 15:0, domain_lcpus_count , Logical CPUs count across all instances of this domain
+ 0x1f, 5:0, ecx, 7:0, domain_level , This domain level (subleaf ID)
+ 0x1f, 5:0, ecx, 15:8, domain_type , This domain type
+ 0x1f, 5:0, edx, 31:0, x2apic_id , x2APIC ID of current logical CPU
+
+# Leaf 20H
+# Intel HRESET (History Reset) enumeration
+
+ 0x20, 0, eax, 31:0, hreset_nr_subleaves , CPUID 0x20 max subleaf + 1
+ 0x20, 0, ebx, 0, hreset_thread_director , HRESET of Intel thread director is supported
+
+# Leaf 21H
+# Intel TD (Trust Domain) guest execution environment enumeration
+
+ 0x21, 0, ebx, 31:0, tdx_vendorid_0 , TDX vendor ID string bytes 0 - 3
+ 0x21, 0, ecx, 31:0, tdx_vendorid_2 , CPU vendor ID string bytes 8 - 11
+ 0x21, 0, edx, 31:0, tdx_vendorid_1 , CPU vendor ID string bytes 4 - 7
+
+# Leaf 23H
+# Intel Architectural Performance Monitoring Extended (ArchPerfmonExt)
+
+ 0x23, 0, eax, 1, subleaf_1_counters , Subleaf 1, PMU counters bitmaps, is valid
+ 0x23, 0, eax, 3, subleaf_3_events , Subleaf 3, PMU events bitmaps, is valid
+ 0x23, 0, ebx, 0, unitmask2 , IA32_PERFEVTSELx MSRs UnitMask2 is supported
+ 0x23, 0, ebx, 1, zbit , IA32_PERFEVTSELx MSRs Z-bit is supported
+ 0x23, 1, eax, 31:0, pmu_gp_counters_bitmap , General-purpose PMU counters bitmap
+ 0x23, 1, ebx, 31:0, pmu_f_counters_bitmap , Fixed PMU counters bitmap
+ 0x23, 3, eax, 0, core_cycles_evt , Core cycles event supported
+ 0x23, 3, eax, 1, insn_retired_evt , Instructions retired event supported
+ 0x23, 3, eax, 2, ref_cycles_evt , Reference cycles event supported
+ 0x23, 3, eax, 3, llc_refs_evt , Last-level cache references event supported
+ 0x23, 3, eax, 4, llc_misses_evt , Last-level cache misses event supported
+ 0x23, 3, eax, 5, br_insn_ret_evt , Branch instruction retired event supported
+ 0x23, 3, eax, 6, br_mispr_evt , Branch mispredict retired event supported
+ 0x23, 3, eax, 7, td_slots_evt , Topdown slots event supported
+ 0x23, 3, eax, 8, td_backend_bound_evt , Topdown backend bound event supported
+ 0x23, 3, eax, 9, td_bad_spec_evt , Topdown bad speculation event supported
+ 0x23, 3, eax, 10, td_frontend_bound_evt , Topdown frontend bound event supported
+ 0x23, 3, eax, 11, td_retiring_evt , Topdown retiring event support
+
+# Leaf 40000000H
+# Maximum hypervisor standard leaf + hypervisor vendor string
+
+0x40000000, 0, eax, 31:0, max_hyp_leaf , Maximum hypervisor standard leaf number
+0x40000000, 0, ebx, 31:0, hypervisor_id_0 , Hypervisor ID string bytes 0 - 3
+0x40000000, 0, ecx, 31:0, hypervisor_id_1 , Hypervisor ID string bytes 4 - 7
+0x40000000, 0, edx, 31:0, hypervisor_id_2 , Hypervisor ID string bytes 8 - 11
+
+# Leaf 80000000H
+# Maximum extended leaf number + CPU vendor string (AMD)
+
+0x80000000, 0, eax, 31:0, max_ext_leaf , Maximum extended cpuid leaf supported
+0x80000000, 0, ebx, 31:0, cpu_vendorid_0 , Vendor ID string bytes 0 - 3
+0x80000000, 0, ecx, 31:0, cpu_vendorid_2 , Vendor ID string bytes 8 - 11
+0x80000000, 0, edx, 31:0, cpu_vendorid_1 , Vendor ID string bytes 4 - 7
# Leaf 80000001H
-# Extended Processor Signature and Feature Bits
-
-0x80000001, 0, EAX, 27:20, extfamily, Extended family
-0x80000001, 0, EAX, 19:16, extmodel, Extended model
-0x80000001, 0, EAX, 11:8, basefamily, Description of Family
-0x80000001, 0, EAX, 11:8, basemodel, Model numbers vary with product
-0x80000001, 0, EAX, 3:0, stepping, Processor stepping (revision) for a specific model
-
-0x80000001, 0, EBX, 31:28, pkgtype, Specifies the package type
-
-0x80000001, 0, ECX, 0, lahf_lm, LAHF/SAHF available in 64-bit mode
-0x80000001, 0, ECX, 1, cmplegacy, Core multi-processing legacy mode
-0x80000001, 0, ECX, 2, svm, Indicates support for: VMRUN, VMLOAD, VMSAVE, CLGI, VMMCALL, and INVLPGA
-0x80000001, 0, ECX, 3, extapicspace, Extended APIC register space
-0x80000001, 0, ECX, 4, altmovecr8, Indicates support for LOCK MOV CR0 means MOV CR8
-0x80000001, 0, ECX, 5, lzcnt, LZCNT
-0x80000001, 0, ECX, 6, sse4a, EXTRQ, INSERTQ, MOVNTSS, and MOVNTSD instruction support
-0x80000001, 0, ECX, 7, misalignsse, Misaligned SSE Mode
-0x80000001, 0, ECX, 8, prefetchw, PREFETCHW
-0x80000001, 0, ECX, 9, osvw, OS Visible Work-around support
-0x80000001, 0, ECX, 10, ibs, Instruction Based Sampling
-0x80000001, 0, ECX, 11, xop, Extended operation support
-0x80000001, 0, ECX, 12, skinit, SKINIT and STGI support
-0x80000001, 0, ECX, 13, wdt, Watchdog timer support
-0x80000001, 0, ECX, 15, lwp, Lightweight profiling support
-0x80000001, 0, ECX, 16, fma4, Four-operand FMA instruction support
-0x80000001, 0, ECX, 17, tce, Translation cache extension
-0x80000001, 0, ECX, 22, TopologyExtensions, Indicates support for Core::X86::Cpuid::CachePropEax0 and Core::X86::Cpuid::ExtApicId
-0x80000001, 0, ECX, 23, perfctrextcore, Indicates support for Core::X86::Msr::PERF_CTL0 - 5 and Core::X86::Msr::PERF_CTR
-0x80000001, 0, ECX, 24, perfctrextdf, Indicates support for Core::X86::Msr::DF_PERF_CTL and Core::X86::Msr::DF_PERF_CTR
-0x80000001, 0, ECX, 26, databreakpointextension, Indicates data breakpoint support for Core::X86::Msr::DR0_ADDR_MASK, Core::X86::Msr::DR1_ADDR_MASK, Core::X86::Msr::DR2_ADDR_MASK and Core::X86::Msr::DR3_ADDR_MASK
-0x80000001, 0, ECX, 27, perftsc, Performance time-stamp counter supported
-0x80000001, 0, ECX, 28, perfctrextllc, Indicates support for L3 performance counter extensions
-0x80000001, 0, ECX, 29, mwaitextended, MWAITX and MONITORX capability is supported
-0x80000001, 0, ECX, 30, admskextn, Indicates support for address mask extension (to 32 bits and to all 4 DRs) for instruction breakpoints
-
-0x80000001, 0, EDX, 0, fpu, x87 floating point unit on-chip
-0x80000001, 0, EDX, 1, vme, Virtual-mode enhancements
-0x80000001, 0, EDX, 2, de, Debugging extensions, IO breakpoints, CR4.DE
-0x80000001, 0, EDX, 3, pse, Page-size extensions (4 MB pages)
-0x80000001, 0, EDX, 4, tsc, Time stamp counter, RDTSC/RDTSCP instructions, CR4.TSD
-0x80000001, 0, EDX, 5, msr, Model-specific registers (MSRs), with RDMSR and WRMSR instructions
-0x80000001, 0, EDX, 6, pae, Physical-address extensions (PAE)
-0x80000001, 0, EDX, 7, mce, Machine Check Exception, CR4.MCE
-0x80000001, 0, EDX, 8, cmpxchg8b, CMPXCHG8B instruction
-0x80000001, 0, EDX, 9, apic, advanced programmable interrupt controller (APIC) exists and is enabled
-0x80000001, 0, EDX, 11, sysret, SYSCALL/SYSRET supported
-0x80000001, 0, EDX, 12, mtrr, Memory-type range registers
-0x80000001, 0, EDX, 13, pge, Page global extension, CR4.PGE
-0x80000001, 0, EDX, 14, mca, Machine check architecture, MCG_CAP
-0x80000001, 0, EDX, 15, cmov, Conditional move instructions, CMOV, FCOMI, FCMOV
-0x80000001, 0, EDX, 16, pat, Page attribute table
-0x80000001, 0, EDX, 17, pse36, Page-size extensions
-0x80000001, 0, EDX, 20, exec_dis, Execute Disable Bit available
-0x80000001, 0, EDX, 22, mmxext, AMD extensions to MMX instructions
-0x80000001, 0, EDX, 23, mmx, MMX instructions
-0x80000001, 0, EDX, 24, fxsr, FXSAVE and FXRSTOR instructions
-0x80000001, 0, EDX, 25, ffxsr, FXSAVE and FXRSTOR instruction optimizations
-0x80000001, 0, EDX, 26, 1gb_page, 1GB page supported
-0x80000001, 0, EDX, 27, rdtscp, RDTSCP and IA32_TSC_AUX are available
-0x80000001, 0, EDX, 29, lm, 64b Architecture supported
-0x80000001, 0, EDX, 30, threednowext, AMD extensions to 3DNow! instructions
-0x80000001, 0, EDX, 31, threednow, 3DNow! instructions
-
-# Leaf 80000002H/80000003H/80000004H
-# Processor Brand String
+# Extended CPU feature identifiers
+
+0x80000001, 0, eax, 3:0, e_stepping_id , Stepping ID
+0x80000001, 0, eax, 7:4, e_base_model , Base processor model
+0x80000001, 0, eax, 11:8, e_base_family , Base processor family
+0x80000001, 0, eax, 19:16, e_ext_model , Extended processor model
+0x80000001, 0, eax, 27:20, e_ext_family , Extended processor family
+0x80000001, 0, ebx, 15:0, brand_id , Brand ID
+0x80000001, 0, ebx, 31:28, pkg_type , Package type
+0x80000001, 0, ecx, 0, lahf_lm , LAHF and SAHF in 64-bit mode
+0x80000001, 0, ecx, 1, cmp_legacy , Multi-processing legacy mode (No HT)
+0x80000001, 0, ecx, 2, svm , Secure Virtual Machine
+0x80000001, 0, ecx, 3, extapic , Extended APIC space
+0x80000001, 0, ecx, 4, cr8_legacy , LOCK MOV CR0 means MOV CR8
+0x80000001, 0, ecx, 5, abm , LZCNT advanced bit manipulation
+0x80000001, 0, ecx, 6, sse4a , SSE4A support
+0x80000001, 0, ecx, 7, misalignsse , Misaligned SSE mode
+0x80000001, 0, ecx, 8, 3dnowprefetch , 3DNow PREFETCH/PREFETCHW support
+0x80000001, 0, ecx, 9, osvw , OS visible workaround
+0x80000001, 0, ecx, 10, ibs , Instruction based sampling
+0x80000001, 0, ecx, 11, xop , XOP: extended operation (AVX instructions)
+0x80000001, 0, ecx, 12, skinit , SKINIT/STGI support
+0x80000001, 0, ecx, 13, wdt , Watchdog timer support
+0x80000001, 0, ecx, 15, lwp , Lightweight profiling
+0x80000001, 0, ecx, 16, fma4 , 4-operand FMA instruction
+0x80000001, 0, ecx, 17, tce , Translation cache extension
+0x80000001, 0, ecx, 19, nodeid_msr , NodeId MSR (0xc001100c)
+0x80000001, 0, ecx, 21, tbm , Trailing bit manipulations
+0x80000001, 0, ecx, 22, topoext , Topology Extensions (cpuid leaf 0x8000001d)
+0x80000001, 0, ecx, 23, perfctr_core , Core performance counter extensions
+0x80000001, 0, ecx, 24, perfctr_nb , NB/DF performance counter extensions
+0x80000001, 0, ecx, 26, bpext , Data access breakpoint extension
+0x80000001, 0, ecx, 27, ptsc , Performance time-stamp counter
+0x80000001, 0, ecx, 28, perfctr_llc , LLC (L3) performance counter extensions
+0x80000001, 0, ecx, 29, mwaitx , MWAITX/MONITORX support
+0x80000001, 0, ecx, 30, addr_mask_ext , Breakpoint address mask extension (to bit 31)
+0x80000001, 0, edx, 0, e_fpu , Floating-Point Unit on-chip (x87)
+0x80000001, 0, edx, 1, e_vme , Virtual-8086 Mode Extensions
+0x80000001, 0, edx, 2, e_de , Debugging Extensions
+0x80000001, 0, edx, 3, e_pse , Page Size Extension
+0x80000001, 0, edx, 4, e_tsc , Time Stamp Counter
+0x80000001, 0, edx, 5, e_msr , Model-Specific Registers (RDMSR and WRMSR support)
+0x80000001, 0, edx, 6, pae , Physical Address Extensions
+0x80000001, 0, edx, 7, mce , Machine Check Exception
+0x80000001, 0, edx, 8, cx8 , CMPXCHG8B instruction
+0x80000001, 0, edx, 9, apic , APIC on-chip
+0x80000001, 0, edx, 11, syscall , SYSCALL and SYSRET instructions
+0x80000001, 0, edx, 12, mtrr , Memory Type Range Registers
+0x80000001, 0, edx, 13, pge , Page Global Extensions
+0x80000001, 0, edx, 14, mca , Machine Check Architecture
+0x80000001, 0, edx, 15, cmov , Conditional Move Instruction
+0x80000001, 0, edx, 16, pat , Page Attribute Table
+0x80000001, 0, edx, 17, pse36 , Page Size Extension (36-bit)
+0x80000001, 0, edx, 19, mp , Out-of-spec AMD Multiprocessing bit
+0x80000001, 0, edx, 20, nx , No-execute page protection
+0x80000001, 0, edx, 22, mmxext , AMD MMX extensions
+0x80000001, 0, edx, 24, e_fxsr , FXSAVE and FXRSTOR instructions
+0x80000001, 0, edx, 25, fxsr_opt , FXSAVE and FXRSTOR optimizations
+0x80000001, 0, edx, 26, pdpe1gb , 1-GB large page support
+0x80000001, 0, edx, 27, rdtscp , RDTSCP instruction
+0x80000001, 0, edx, 29, lm , Long mode (x86-64, 64-bit support)
+0x80000001, 0, edx, 30, 3dnowext , AMD 3DNow extensions
+0x80000001, 0, edx, 31, 3dnow , 3DNow instructions
+
+# Leaf 80000002H
+# CPU brand ID string, bytes 0 - 15
+
+0x80000002, 0, eax, 31:0, cpu_brandid_0 , CPU brand ID string, bytes 0 - 3
+0x80000002, 0, ebx, 31:0, cpu_brandid_1 , CPU brand ID string, bytes 4 - 7
+0x80000002, 0, ecx, 31:0, cpu_brandid_2 , CPU brand ID string, bytes 8 - 11
+0x80000002, 0, edx, 31:0, cpu_brandid_3 , CPU brand ID string, bytes 12 - 15
+
+# Leaf 80000003H
+# CPU brand ID string, bytes 16 - 31
+
+0x80000003, 0, eax, 31:0, cpu_brandid_4 , CPU brand ID string bytes, 16 - 19
+0x80000003, 0, ebx, 31:0, cpu_brandid_5 , CPU brand ID string bytes, 20 - 23
+0x80000003, 0, ecx, 31:0, cpu_brandid_6 , CPU brand ID string bytes, 24 - 27
+0x80000003, 0, edx, 31:0, cpu_brandid_7 , CPU brand ID string bytes, 28 - 31
+
+# Leaf 80000004H
+# CPU brand ID string, bytes 32 - 47
+
+0x80000004, 0, eax, 31:0, cpu_brandid_8 , CPU brand ID string, bytes 32 - 35
+0x80000004, 0, ebx, 31:0, cpu_brandid_9 , CPU brand ID string, bytes 36 - 39
+0x80000004, 0, ecx, 31:0, cpu_brandid_10 , CPU brand ID string, bytes 40 - 43
+0x80000004, 0, edx, 31:0, cpu_brandid_11 , CPU brand ID string, bytes 44 - 47
# Leaf 80000005H
-# Reserved
+# AMD L1 cache and L1 TLB enumeration
+
+0x80000005, 0, eax, 7:0, l1_itlb_2m_4m_nentries , L1 ITLB #entires, 2M and 4M pages
+0x80000005, 0, eax, 15:8, l1_itlb_2m_4m_assoc , L1 ITLB associativity, 2M and 4M pages
+0x80000005, 0, eax, 23:16, l1_dtlb_2m_4m_nentries , L1 DTLB #entires, 2M and 4M pages
+0x80000005, 0, eax, 31:24, l1_dtlb_2m_4m_assoc , L1 DTLB associativity, 2M and 4M pages
+0x80000005, 0, ebx, 7:0, l1_itlb_4k_nentries , L1 ITLB #entries, 4K pages
+0x80000005, 0, ebx, 15:8, l1_itlb_4k_assoc , L1 ITLB associativity, 4K pages
+0x80000005, 0, ebx, 23:16, l1_dtlb_4k_nentries , L1 DTLB #entries, 4K pages
+0x80000005, 0, ebx, 31:24, l1_dtlb_4k_assoc , L1 DTLB associativity, 4K pages
+0x80000005, 0, ecx, 7:0, l1_dcache_line_size , L1 dcache line size, in bytes
+0x80000005, 0, ecx, 15:8, l1_dcache_nlines , L1 dcache lines per tag
+0x80000005, 0, ecx, 23:16, l1_dcache_assoc , L1 dcache associativity
+0x80000005, 0, ecx, 31:24, l1_dcache_size_kb , L1 dcache size, in KB
+0x80000005, 0, edx, 7:0, l1_icache_line_size , L1 icache line size, in bytes
+0x80000005, 0, edx, 15:8, l1_icache_nlines , L1 icache lines per tag
+0x80000005, 0, edx, 23:16, l1_icache_assoc , L1 icache associativity
+0x80000005, 0, edx, 31:24, l1_icache_size_kb , L1 icache size, in KB
# Leaf 80000006H
-# Extended L2 Cache Features
-
-0x80000006, 0, ECX, 7:0, clsize, Cache Line size in bytes
-0x80000006, 0, ECX, 15:12, l2c_assoc, L2 Associativity
-0x80000006, 0, ECX, 31:16, csize, Cache size in 1K units
-
+# (Mostly AMD) L2 TLB, L2 cache, and L3 cache enumeration
+
+0x80000006, 0, eax, 11:0, l2_itlb_2m_4m_nentries , L2 iTLB #entries, 2M and 4M pages
+0x80000006, 0, eax, 15:12, l2_itlb_2m_4m_assoc , L2 iTLB associativity, 2M and 4M pages
+0x80000006, 0, eax, 27:16, l2_dtlb_2m_4m_nentries , L2 dTLB #entries, 2M and 4M pages
+0x80000006, 0, eax, 31:28, l2_dtlb_2m_4m_assoc , L2 dTLB associativity, 2M and 4M pages
+0x80000006, 0, ebx, 11:0, l2_itlb_4k_nentries , L2 iTLB #entries, 4K pages
+0x80000006, 0, ebx, 15:12, l2_itlb_4k_assoc , L2 iTLB associativity, 4K pages
+0x80000006, 0, ebx, 27:16, l2_dtlb_4k_nentries , L2 dTLB #entries, 4K pages
+0x80000006, 0, ebx, 31:28, l2_dtlb_4k_assoc , L2 dTLB associativity, 4K pages
+0x80000006, 0, ecx, 7:0, l2_line_size , L2 cache line size, in bytes
+0x80000006, 0, ecx, 11:8, l2_nlines , L2 cache number of lines per tag
+0x80000006, 0, ecx, 15:12, l2_assoc , L2 cache associativity
+0x80000006, 0, ecx, 31:16, l2_size_kb , L2 cache size, in KB
+0x80000006, 0, edx, 7:0, l3_line_size , L3 cache line size, in bytes
+0x80000006, 0, edx, 11:8, l3_nlines , L3 cache number of lines per tag
+0x80000006, 0, edx, 15:12, l3_assoc , L3 cache associativity
+0x80000006, 0, edx, 31:18, l3_size_range , L3 cache size range
# Leaf 80000007H
-
-0x80000007, 0, EDX, 8, nonstop_tsc, Invariant TSC available
-
+# CPU power management (mostly AMD) and AMD RAS enumeration
+
+0x80000007, 0, ebx, 0, overflow_recov , MCA overflow conditions not fatal
+0x80000007, 0, ebx, 1, succor , Software containment of UnCORRectable errors
+0x80000007, 0, ebx, 2, hw_assert , Hardware assert MSRs
+0x80000007, 0, ebx, 3, smca , Scalable MCA (MCAX MSRs)
+0x80000007, 0, ecx, 31:0, cpu_pwr_sample_ratio , CPU power sample time ratio
+0x80000007, 0, edx, 0, digital_temp , Digital temprature sensor
+0x80000007, 0, edx, 1, powernow_freq_id , PowerNOW! frequency scaling
+0x80000007, 0, edx, 2, powernow_volt_id , PowerNOW! voltage scaling
+0x80000007, 0, edx, 3, thermal_trip , THERMTRIP (Thermal Trip)
+0x80000007, 0, edx, 4, hw_thermal_control , Hardware thermal control
+0x80000007, 0, edx, 5, sw_thermal_control , Software thermal control
+0x80000007, 0, edx, 6, 100mhz_steps , 100 MHz multiplier control
+0x80000007, 0, edx, 7, hw_pstate , Hardware P-state control
+0x80000007, 0, edx, 8, constant_tsc , TSC ticks at constant rate across all P and C states
+0x80000007, 0, edx, 9, cpb , Core performance boost
+0x80000007, 0, edx, 10, eff_freq_ro , Read-only effective frequency interface
+0x80000007, 0, edx, 11, proc_feedback , Processor feedback interface (deprecated)
+0x80000007, 0, edx, 12, acc_power , Processor power reporting interface
+0x80000007, 0, edx, 13, connected_standby , CPU Connected Standby support
+0x80000007, 0, edx, 14, rapl , Runtime Average Power Limit interface
# Leaf 80000008H
-
-0x80000008, 0, EAX, 7:0, phy_adr_bits, Physical Address Bits
-0x80000008, 0, EAX, 15:8, lnr_adr_bits, Linear Address Bits
-0x80000007, 0, EBX, 9, wbnoinvd, WBNOINVD
-
-# 0x8000001E
-# EAX: Extended APIC ID
-0x8000001E, 0, EAX, 31:0, extended_apic_id, Extended APIC ID
-# EBX: Core Identifiers
-0x8000001E, 0, EBX, 7:0, core_id, Identifies the logical core ID
-0x8000001E, 0, EBX, 15:8, threads_per_core, The number of threads per core is threads_per_core + 1
-# ECX: Node Identifiers
-0x8000001E, 0, ECX, 7:0, node_id, Node ID
-0x8000001E, 0, ECX, 10:8, nodes_per_processor, Nodes per processor { 0: 1 node, else reserved }
-
-# 8000001F: AMD Secure Encryption
-0x8000001F, 0, EAX, 0, sme, Secure Memory Encryption
-0x8000001F, 0, EAX, 1, sev, Secure Encrypted Virtualization
-0x8000001F, 0, EAX, 2, vmpgflush, VM Page Flush MSR
-0x8000001F, 0, EAX, 3, seves, SEV Encrypted State
-0x8000001F, 0, EBX, 5:0, c-bit, Page table bit number used to enable memory encryption
-0x8000001F, 0, EBX, 11:6, mem_encrypt_physaddr_width, Reduction of physical address space in bits with SME enabled
-0x8000001F, 0, ECX, 31:0, num_encrypted_guests, Maximum ASID value that may be used for an SEV-enabled guest
-0x8000001F, 0, EDX, 31:0, minimum_sev_asid, Minimum ASID value that must be used for an SEV-enabled, SEV-ES-disabled guest
+# CPU capacity parameters and extended feature flags (mostly AMD)
+
+0x80000008, 0, eax, 7:0, phys_addr_bits , Max physical address bits
+0x80000008, 0, eax, 15:8, virt_addr_bits , Max virtual address bits
+0x80000008, 0, eax, 23:16, guest_phys_addr_bits , Max nested-paging guest physical address bits
+0x80000008, 0, ebx, 0, clzero , CLZERO supported
+0x80000008, 0, ebx, 1, irperf , Instruction retired counter MSR
+0x80000008, 0, ebx, 2, xsaveerptr , XSAVE/XRSTOR always saves/restores FPU error pointers
+0x80000008, 0, ebx, 3, invlpgb , INVLPGB broadcasts a TLB invalidate to all threads
+0x80000008, 0, ebx, 4, rdpru , RDPRU (Read Processor Register at User level) supported
+0x80000008, 0, ebx, 6, mba , Memory Bandwidth Allocation (AMD bit)
+0x80000008, 0, ebx, 8, mcommit , MCOMMIT (Memory commit) supported
+0x80000008, 0, ebx, 9, wbnoinvd , WBNOINVD supported
+0x80000008, 0, ebx, 12, amd_ibpb , Indirect Branch Prediction Barrier
+0x80000008, 0, ebx, 13, wbinvd_int , Interruptible WBINVD/WBNOINVD
+0x80000008, 0, ebx, 14, amd_ibrs , Indirect Branch Restricted Speculation
+0x80000008, 0, ebx, 15, amd_stibp , Single Thread Indirect Branch Prediction mode
+0x80000008, 0, ebx, 16, ibrs_always_on , IBRS always-on preferred
+0x80000008, 0, ebx, 17, amd_stibp_always_on , STIBP always-on preferred
+0x80000008, 0, ebx, 18, ibrs_fast , IBRS is preferred over software solution
+0x80000008, 0, ebx, 19, ibrs_same_mode , IBRS provides same mode protection
+0x80000008, 0, ebx, 20, no_efer_lmsle , EFER[LMSLE] bit (Long-Mode Segment Limit Enable) unsupported
+0x80000008, 0, ebx, 21, tlb_flush_nested , INVLPGB RAX[5] bit can be set (nested translations)
+0x80000008, 0, ebx, 23, amd_ppin , Protected Processor Inventory Number
+0x80000008, 0, ebx, 24, amd_ssbd , Speculative Store Bypass Disable
+0x80000008, 0, ebx, 25, virt_ssbd , virtualized SSBD (Speculative Store Bypass Disable)
+0x80000008, 0, ebx, 26, amd_ssb_no , SSBD not needed (fixed in HW)
+0x80000008, 0, ebx, 27, cppc , Collaborative Processor Performance Control
+0x80000008, 0, ebx, 28, amd_psfd , Predictive Store Forward Disable
+0x80000008, 0, ebx, 29, btc_no , CPU not affected by Branch Type Confusion
+0x80000008, 0, ebx, 30, ibpb_ret , IBPB clears RSB/RAS too
+0x80000008, 0, ebx, 31, brs , Branch Sampling supported
+0x80000008, 0, ecx, 7:0, cpu_nthreads , Number of physical threads - 1
+0x80000008, 0, ecx, 15:12, apicid_coreid_len , Number of thread core ID bits (shift) in APIC ID
+0x80000008, 0, ecx, 17:16, perf_tsc_len , Performance time-stamp counter size
+0x80000008, 0, edx, 15:0, invlpgb_max_pages , INVLPGB maximum page count
+0x80000008, 0, edx, 31:16, rdpru_max_reg_id , RDPRU max register ID (ECX input)
+
+# Leaf 8000000AH
+# AMD SVM (Secure Virtual Machine) enumeration
+
+0x8000000a, 0, eax, 7:0, svm_version , SVM revision number
+0x8000000a, 0, ebx, 31:0, svm_nasid , Number of address space identifiers (ASID)
+0x8000000a, 0, edx, 0, npt , Nested paging
+0x8000000a, 0, edx, 1, lbrv , LBR virtualization
+0x8000000a, 0, edx, 2, svm_lock , SVM lock
+0x8000000a, 0, edx, 3, nrip_save , NRIP save support on #VMEXIT
+0x8000000a, 0, edx, 4, tsc_scale , MSR based TSC rate control
+0x8000000a, 0, edx, 5, vmcb_clean , VMCB clean bits support
+0x8000000a, 0, edx, 6, flushbyasid , Flush by ASID + Extended VMCB TLB_Control
+0x8000000a, 0, edx, 7, decodeassists , Decode Assists support
+0x8000000a, 0, edx, 10, pausefilter , Pause intercept filter
+0x8000000a, 0, edx, 12, pfthreshold , Pause filter threshold
+0x8000000a, 0, edx, 13, avic , Advanced virtual interrupt controller
+0x8000000a, 0, edx, 15, v_vmsave_vmload , Virtual VMSAVE/VMLOAD (nested virt)
+0x8000000a, 0, edx, 16, vgif , Virtualize the Global Interrupt Flag
+0x8000000a, 0, edx, 17, gmet , Guest mode execution trap
+0x8000000a, 0, edx, 18, x2avic , Virtual x2APIC
+0x8000000a, 0, edx, 19, sss_check , Supervisor Shadow Stack restrictions
+0x8000000a, 0, edx, 20, v_spec_ctrl , Virtual SPEC_CTRL
+0x8000000a, 0, edx, 21, ro_gpt , Read-Only guest page table support
+0x8000000a, 0, edx, 23, h_mce_override , Host MCE override
+0x8000000a, 0, edx, 24, tlbsync_int , TLBSYNC intercept + INVLPGB/TLBSYNC in VMCB
+0x8000000a, 0, edx, 25, vnmi , NMI virtualization
+0x8000000a, 0, edx, 26, ibs_virt , IBS Virtualization
+0x8000000a, 0, edx, 27, ext_lvt_off_chg , Extended LVT offset fault change
+0x8000000a, 0, edx, 28, svme_addr_chk , Guest SVME addr check
+
+# Leaf 80000019H
+# AMD TLB 1G-pages enumeration
+
+0x80000019, 0, eax, 11:0, l1_itlb_1g_nentries , L1 iTLB #entries, 1G pages
+0x80000019, 0, eax, 15:12, l1_itlb_1g_assoc , L1 iTLB associativity, 1G pages
+0x80000019, 0, eax, 27:16, l1_dtlb_1g_nentries , L1 dTLB #entries, 1G pages
+0x80000019, 0, eax, 31:28, l1_dtlb_1g_assoc , L1 dTLB associativity, 1G pages
+0x80000019, 0, ebx, 11:0, l2_itlb_1g_nentries , L2 iTLB #entries, 1G pages
+0x80000019, 0, ebx, 15:12, l2_itlb_1g_assoc , L2 iTLB associativity, 1G pages
+0x80000019, 0, ebx, 27:16, l2_dtlb_1g_nentries , L2 dTLB #entries, 1G pages
+0x80000019, 0, ebx, 31:28, l2_dtlb_1g_assoc , L2 dTLB associativity, 1G pages
+
+# Leaf 8000001AH
+# AMD instruction optimizations enumeration
+
+0x8000001a, 0, eax, 0, fp_128 , Internal FP/SIMD exec data path is 128-bits wide
+0x8000001a, 0, eax, 1, movu_preferred , SSE: MOVU* better than MOVL*/MOVH*
+0x8000001a, 0, eax, 2, fp_256 , internal FP/SSE exec data path is 256-bits wide
+
+# Leaf 8000001BH
+# AMD IBS (Instruction-Based Sampling) enumeration
+
+0x8000001b, 0, eax, 0, ibs_flags_valid , IBS feature flags valid
+0x8000001b, 0, eax, 1, ibs_fetch_sampling , IBS fetch sampling supported
+0x8000001b, 0, eax, 2, ibs_op_sampling , IBS execution sampling supported
+0x8000001b, 0, eax, 3, ibs_rdwr_op_counter , IBS read/write of op counter supported
+0x8000001b, 0, eax, 4, ibs_op_count , IBS OP counting mode supported
+0x8000001b, 0, eax, 5, ibs_branch_target , IBS branch target address reporting supported
+0x8000001b, 0, eax, 6, ibs_op_counters_ext , IBS IbsOpCurCnt/IbsOpMaxCnt extend by 7 bits
+0x8000001b, 0, eax, 7, ibs_rip_invalid_chk , IBS invalid RIP indication supported
+0x8000001b, 0, eax, 8, ibs_op_branch_fuse , IBS fused branch micro-op indication supported
+0x8000001b, 0, eax, 9, ibs_fetch_ctl_ext , IBS Fetch Control Extended MSR (0xc001103c) supported
+0x8000001b, 0, eax, 10, ibs_op_data_4 , IBS op data 4 MSR supported
+0x8000001b, 0, eax, 11, ibs_l3_miss_filter , IBS L3-miss filtering supported (Zen4+)
+
+# Leaf 8000001CH
+# AMD LWP (Lightweight Profiling)
+
+0x8000001c, 0, eax, 0, os_lwp_avail , LWP is available to application programs (supported by OS)
+0x8000001c, 0, eax, 1, os_lpwval , LWPVAL instruction (EventId=1) is supported by OS
+0x8000001c, 0, eax, 2, os_lwp_ire , Instructions Retired Event (EventId=2) is supported by OS
+0x8000001c, 0, eax, 3, os_lwp_bre , Branch Retired Event (EventId=3) is supported by OS
+0x8000001c, 0, eax, 4, os_lwp_dme , DCache Miss Event (EventId=4) is supported by OS
+0x8000001c, 0, eax, 5, os_lwp_cnh , CPU Clocks Not Halted event (EventId=5) is supported by OS
+0x8000001c, 0, eax, 6, os_lwp_rnh , CPU Reference clocks Not Halted event (EventId=6) is supported by OS
+0x8000001c, 0, eax, 29, os_lwp_cont , LWP sampling in continuous mode is supported by OS
+0x8000001c, 0, eax, 30, os_lwp_ptsc , Performance Time Stamp Counter in event records is supported by OS
+0x8000001c, 0, eax, 31, os_lwp_int , Interrupt on threshold overflow is supported by OS
+0x8000001c, 0, ebx, 7:0, lwp_lwpcb_sz , LWP Control Block size, in quadwords
+0x8000001c, 0, ebx, 15:8, lwp_event_sz , LWP event record size, in bytes
+0x8000001c, 0, ebx, 23:16, lwp_max_events , LWP max supported EventId value (EventID 255 not included)
+0x8000001c, 0, ebx, 31:24, lwp_event_offset , LWP events area offset in the LWP Control Block
+0x8000001c, 0, ecx, 4:0, lwp_latency_max , Num of bits in cache latency counters (10 to 31)
+0x8000001c, 0, ecx, 5, lwp_data_adddr , Cache miss events report the data address of the reference
+0x8000001c, 0, ecx, 8:6, lwp_latency_rnd , Amount by which cache latency is rounded
+0x8000001c, 0, ecx, 15:9, lwp_version , LWP implementation version
+0x8000001c, 0, ecx, 23:16, lwp_buf_min_sz , LWP event ring buffer min size, in units of 32 event records
+0x8000001c, 0, ecx, 28, lwp_branch_predict , Branches Retired events can be filtered
+0x8000001c, 0, ecx, 29, lwp_ip_filtering , IP filtering (IPI, IPF, BaseIP, and LimitIP @ LWPCP) supported
+0x8000001c, 0, ecx, 30, lwp_cache_levels , Cache-related events can be filtered by cache level
+0x8000001c, 0, ecx, 31, lwp_cache_latency , Cache-related events can be filtered by latency
+0x8000001c, 0, edx, 0, hw_lwp_avail , LWP is available in Hardware
+0x8000001c, 0, edx, 1, hw_lpwval , LWPVAL instruction (EventId=1) is available in HW
+0x8000001c, 0, edx, 2, hw_lwp_ire , Instructions Retired Event (EventId=2) is available in HW
+0x8000001c, 0, edx, 3, hw_lwp_bre , Branch Retired Event (EventId=3) is available in HW
+0x8000001c, 0, edx, 4, hw_lwp_dme , DCache Miss Event (EventId=4) is available in HW
+0x8000001c, 0, edx, 5, hw_lwp_cnh , CPU Clocks Not Halted event (EventId=5) is available in HW
+0x8000001c, 0, edx, 6, hw_lwp_rnh , CPU Reference clocks Not Halted event (EventId=6) is available in HW
+0x8000001c, 0, edx, 29, hw_lwp_cont , LWP sampling in continuous mode is available in HW
+0x8000001c, 0, edx, 30, hw_lwp_ptsc , Performance Time Stamp Counter in event records is available in HW
+0x8000001c, 0, edx, 31, hw_lwp_int , Interrupt on threshold overflow is available in HW
+
+# Leaf 8000001DH
+# AMD deterministic cache parameters
+
+0x8000001d, 31:0, eax, 4:0, cache_type , Cache type field
+0x8000001d, 31:0, eax, 7:5, cache_level , Cache level (1-based)
+0x8000001d, 31:0, eax, 8, cache_self_init , Self-initializing cache level
+0x8000001d, 31:0, eax, 9, fully_associative , Fully-associative cache
+0x8000001d, 31:0, eax, 25:14, num_threads_sharing , Number of logical CPUs sharing cache
+0x8000001d, 31:0, ebx, 11:0, cache_linesize , System coherency line size (0-based)
+0x8000001d, 31:0, ebx, 21:12, cache_npartitions , Physical line partitions (0-based)
+0x8000001d, 31:0, ebx, 31:22, cache_nways , Ways of associativity (0-based)
+0x8000001d, 31:0, ecx, 30:0, cache_nsets , Cache number of sets (0-based)
+0x8000001d, 31:0, edx, 0, wbinvd_rll_no_guarantee, WBINVD/INVD not guaranteed for Remote Lower-Level caches
+0x8000001d, 31:0, edx, 1, ll_inclusive , Cache is inclusive of Lower-Level caches
+
+# Leaf 8000001EH
+# AMD CPU topology enumeration
+
+0x8000001e, 0, eax, 31:0, ext_apic_id , Extended APIC ID
+0x8000001e, 0, ebx, 7:0, core_id , Unique per-socket logical core unit ID
+0x8000001e, 0, ebx, 15:8, core_nthreas , #Threads per core (zero-based)
+0x8000001e, 0, ecx, 7:0, node_id , Node (die) ID of invoking logical CPU
+0x8000001e, 0, ecx, 10:8, nnodes_per_socket , #nodes in invoking logical CPU's package/socket
+
+# Leaf 8000001FH
+# AMD encrypted memory capabilities enumeration (SME/SEV)
+
+0x8000001f, 0, eax, 0, sme , Secure Memory Encryption supported
+0x8000001f, 0, eax, 1, sev , Secure Encrypted Virtualization supported
+0x8000001f, 0, eax, 2, vm_page_flush , VM Page Flush MSR (0xc001011e) available
+0x8000001f, 0, eax, 3, sev_es , SEV Encrypted State supported
+0x8000001f, 0, eax, 4, sev_nested_paging , SEV secure nested paging supported
+0x8000001f, 0, eax, 5, vm_permission_levels , VMPL supported
+0x8000001f, 0, eax, 6, rpmquery , RPMQUERY instruction supported
+0x8000001f, 0, eax, 7, vmpl_sss , VMPL supervisor shadwo stack supported
+0x8000001f, 0, eax, 8, secure_tsc , Secure TSC supported
+0x8000001f, 0, eax, 9, v_tsc_aux , Hardware virtualizes TSC_AUX
+0x8000001f, 0, eax, 10, sme_coherent , HW enforces cache coherency across encryption domains
+0x8000001f, 0, eax, 11, req_64bit_hypervisor , SEV guest mandates 64-bit hypervisor
+0x8000001f, 0, eax, 12, restricted_injection , Restricted Injection supported
+0x8000001f, 0, eax, 13, alternate_injection , Alternate Injection supported
+0x8000001f, 0, eax, 14, debug_swap , SEV-ES: full debug state swap is supported
+0x8000001f, 0, eax, 15, disallow_host_ibs , SEV-ES: Disallowing IBS use by the host is supported
+0x8000001f, 0, eax, 16, virt_transparent_enc , Virtual Transparent Encryption
+0x8000001f, 0, eax, 17, vmgexit_paremeter , VmgexitParameter is supported in SEV_FEATURES
+0x8000001f, 0, eax, 18, virt_tom_msr , Virtual TOM MSR is supported
+0x8000001f, 0, eax, 19, virt_ibs , IBS state virtualization is supported for SEV-ES guests
+0x8000001f, 0, eax, 24, vmsa_reg_protection , VMSA register protection is supported
+0x8000001f, 0, eax, 25, smt_protection , SMT protection is supported
+0x8000001f, 0, eax, 28, svsm_page_msr , SVSM communication page MSR (0xc001f000h) is supported
+0x8000001f, 0, eax, 29, nested_virt_snp_msr , VIRT_RMPUPDATE/VIRT_PSMASH MSRs are supported
+0x8000001f, 0, ebx, 5:0, pte_cbit_pos , PTE bit number used to enable memory encryption
+0x8000001f, 0, ebx, 11:6, phys_addr_reduction_nbits, Reduction of phys address space when encryption is enabled, in bits
+0x8000001f, 0, ebx, 15:12, vmpl_count , Number of VM permission levels (VMPL) supported
+0x8000001f, 0, ecx, 31:0, enc_guests_max , Max supported number of simultaneous encrypted guests
+0x8000001f, 0, edx, 31:0, min_sev_asid_no_sev_es , Mininum ASID for SEV-enabled SEV-ES-disabled guest
+
+# Leaf 80000020H
+# AMD Platform QoS extended feature IDs
+
+0x80000020, 0, ebx, 1, mba , Memory Bandwidth Allocation support
+0x80000020, 0, ebx, 2, smba , Slow Memory Bandwidth Allocation support
+0x80000020, 0, ebx, 3, bmec , Bandwidth Monitoring Event Configuration support
+0x80000020, 0, ebx, 4, l3rr , L3 Range Reservation support
+0x80000020, 1, eax, 31:0, mba_limit_len , MBA enforcement limit size
+0x80000020, 1, edx, 31:0, mba_cos_max , MBA max Class of Service number (zero-based)
+0x80000020, 2, eax, 31:0, smba_limit_len , SMBA enforcement limit size
+0x80000020, 2, edx, 31:0, smba_cos_max , SMBA max Class of Service number (zero-based)
+0x80000020, 3, ebx, 7:0, bmec_num_events , BMEC number of bandwidth events available
+0x80000020, 3, ecx, 0, bmec_local_reads , Local NUMA reads can be tracked
+0x80000020, 3, ecx, 1, bmec_remote_reads , Remote NUMA reads can be tracked
+0x80000020, 3, ecx, 2, bmec_local_nontemp_wr , Local NUMA non-temporal writes can be tracked
+0x80000020, 3, ecx, 3, bmec_remote_nontemp_wr , Remote NUMA non-temporal writes can be tracked
+0x80000020, 3, ecx, 4, bmec_local_slow_mem_rd , Local NUMA slow-memory reads can be tracked
+0x80000020, 3, ecx, 5, bmec_remote_slow_mem_rd, Remote NUMA slow-memory reads can be tracked
+0x80000020, 3, ecx, 6, bmec_all_dirty_victims , Dirty QoS victims to all types of memory can be tracked
+
+# Leaf 80000021H
+# AMD extended features enumeration 2
+
+0x80000021, 0, eax, 0, no_nested_data_bp , No nested data breakpoints
+0x80000021, 0, eax, 1, fsgs_non_serializing , WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing
+0x80000021, 0, eax, 2, lfence_rdtsc , LFENCE always serializing / synchronizes RDTSC
+0x80000021, 0, eax, 3, smm_page_cfg_lock , SMM paging configuration lock is supported
+0x80000021, 0, eax, 6, null_sel_clr_base , Null selector clears base
+0x80000021, 0, eax, 7, upper_addr_ignore , EFER MSR Upper Address Ignore Enable bit supported
+0x80000021, 0, eax, 8, autoibrs , EFER MSR Automatic IBRS enable bit supported
+0x80000021, 0, eax, 9, no_smm_ctl_msr , SMM_CTL MSR (0xc0010116) is not present
+0x80000021, 0, eax, 10, fsrs_supported , Fast Short Rep Stosb (FSRS) is supported
+0x80000021, 0, eax, 11, fsrc_supported , Fast Short Repe Cmpsb (FSRC) is supported
+0x80000021, 0, eax, 13, prefetch_ctl_msr , Prefetch control MSR is supported
+0x80000021, 0, eax, 17, user_cpuid_disable , #GP when executing CPUID at CPL > 0 is supported
+0x80000021, 0, eax, 18, epsf_supported , Enhanced Predictive Store Forwarding (EPSF) is supported
+0x80000021, 0, ebx, 11:0, microcode_patch_size , Size of microcode patch, in 16-byte units
+
+# Leaf 80000022H
+# AMD Performance Monitoring v2 enumeration
+
+0x80000022, 0, eax, 0, perfmon_v2 , Performance monitoring v2 supported
+0x80000022, 0, eax, 1, lbr_v2 , Last Branch Record v2 extensions (LBR Stack)
+0x80000022, 0, eax, 2, lbr_pmc_freeze , Freezing core performance counters / LBR Stack supported
+0x80000022, 0, ebx, 3:0, n_pmc_core , Number of core perfomance counters
+0x80000022, 0, ebx, 9:4, lbr_v2_stack_size , Number of available LBR stack entries
+0x80000022, 0, ebx, 15:10, n_pmc_northbridge , Number of available northbridge (data fabric) performance counters
+0x80000022, 0, ebx, 21:16, n_pmc_umc , Number of available UMC performance counters
+0x80000022, 0, ecx, 31:0, active_umc_bitmask , Active UMCs bitmask
+
+# Leaf 80000023H
+# AMD Secure Multi-key Encryption enumeration
+
+0x80000023, 0, eax, 0, mem_hmk_mode , MEM-HMK encryption mode is supported
+0x80000023, 0, ebx, 15:0, mem_hmk_avail_keys , MEM-HMK mode: total num of available encryption keys
+
+# Leaf 80000026H
+# AMD extended topology enumeration v2
+
+0x80000026, 3:0, eax, 4:0, x2apic_id_shift , Bit width of this level (previous levels inclusive)
+0x80000026, 3:0, eax, 29, core_has_pwreff_ranking, This core has a power efficiency ranking
+0x80000026, 3:0, eax, 30, domain_has_hybrid_cores, This domain level has hybrid (E, P) cores
+0x80000026, 3:0, eax, 31, domain_core_count_asymm, The 'Core' domain has asymmetric cores count
+0x80000026, 3:0, ebx, 15:0, domain_lcpus_count , Number of logical CPUs at this domain instance
+0x80000026, 3:0, ebx, 23:16, core_pwreff_ranking , This core's static power efficiency ranking
+0x80000026, 3:0, ebx, 27:24, core_native_model_id , This core's native model ID
+0x80000026, 3:0, ebx, 31:28, core_type , This core's type
+0x80000026, 3:0, ecx, 7:0, domain_level , This domain level (subleaf ID)
+0x80000026, 3:0, ecx, 15:8, domain_type , This domain type
+0x80000026, 3:0, edx, 31:0, x2apic_id , x2APIC ID of current logical CPU
diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c
index 24b7d017ec2c..1b25c0a95d3f 100644
--- a/tools/arch/x86/kcpuid/kcpuid.c
+++ b/tools/arch/x86/kcpuid/kcpuid.c
@@ -7,7 +7,8 @@
#include <string.h>
#include <getopt.h>
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#define min(a, b) (((a) < (b)) ? (a) : (b))
typedef unsigned int u32;
typedef unsigned long long u64;
@@ -76,7 +77,6 @@ struct cpuid_range {
*/
struct cpuid_range *leafs_basic, *leafs_ext;
-static int num_leafs;
static bool is_amd;
static bool show_details;
static bool show_raw;
@@ -98,27 +98,17 @@ static inline void cpuid(u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
static inline bool has_subleafs(u32 f)
{
- if (f == 0x7 || f == 0xd)
- return true;
-
- if (is_amd) {
- if (f == 0x8000001d)
+ u32 with_subleaves[] = {
+ 0x4, 0x7, 0xb, 0xd, 0xf, 0x10, 0x12,
+ 0x14, 0x17, 0x18, 0x1b, 0x1d, 0x1f, 0x23,
+ 0x8000001d, 0x80000020, 0x80000026,
+ };
+
+ for (unsigned i = 0; i < ARRAY_SIZE(with_subleaves); i++)
+ if (f == with_subleaves[i])
return true;
- return false;
- }
- switch (f) {
- case 0x4:
- case 0xb:
- case 0xf:
- case 0x10:
- case 0x14:
- case 0x18:
- case 0x1f:
- return true;
- default:
- return false;
- }
+ return false;
}
static void leaf_print_raw(struct subleaf *leaf)
@@ -204,15 +194,12 @@ static void raw_dump_range(struct cpuid_range *range)
}
}
-#define MAX_SUBLEAF_NUM 32
+#define MAX_SUBLEAF_NUM 64
struct cpuid_range *setup_cpuid_range(u32 input_eax)
{
- u32 max_func, idx_func;
- int subleaf;
+ u32 max_func, idx_func, subleaf, max_subleaf;
+ u32 eax, ebx, ecx, edx, f = input_eax;
struct cpuid_range *range;
- u32 eax, ebx, ecx, edx;
- u32 f = input_eax;
- int max_subleaf;
bool allzero;
eax = input_eax;
@@ -246,7 +233,6 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax)
allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx);
if (allzero)
continue;
- num_leafs++;
if (!has_subleafs(f))
continue;
@@ -257,11 +243,18 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax)
* Some can provide the exact number of subleafs,
* others have to be tried (0xf)
*/
- if (f == 0x7 || f == 0x14 || f == 0x17 || f == 0x18)
- max_subleaf = (eax & 0xff) + 1;
-
+ if (f == 0x7 || f == 0x14 || f == 0x17 || f == 0x18 || f == 0x1d)
+ max_subleaf = min((eax & 0xff) + 1, max_subleaf);
if (f == 0xb)
max_subleaf = 2;
+ if (f == 0x1f)
+ max_subleaf = 6;
+ if (f == 0x23)
+ max_subleaf = 4;
+ if (f == 0x80000020)
+ max_subleaf = 4;
+ if (f == 0x80000026)
+ max_subleaf = 5;
for (subleaf = 1; subleaf < max_subleaf; subleaf++) {
eax = f;
@@ -272,7 +265,6 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax)
eax, ebx, ecx, edx);
if (allzero)
continue;
- num_leafs++;
}
}
@@ -313,6 +305,8 @@ static int parse_line(char *line)
struct bits_desc *bdesc;
int reg_index;
char *start, *end;
+ u32 subleaf_start, subleaf_end;
+ unsigned bit_start, bit_end;
/* Skip comments and NULL line */
if (line[0] == '#' || line[0] == '\n')
@@ -351,13 +345,25 @@ static int parse_line(char *line)
return 0;
/* subleaf */
- sub = strtoul(tokens[1], NULL, 0);
- if ((int)sub > func->nr)
- return -1;
+ buf = tokens[1];
+ end = strtok(buf, ":");
+ start = strtok(NULL, ":");
+ subleaf_end = strtoul(end, NULL, 0);
+
+ /* A subleaf range is given? */
+ if (start) {
+ subleaf_start = strtoul(start, NULL, 0);
+ subleaf_end = min(subleaf_end, (u32)(func->nr - 1));
+ if (subleaf_start > subleaf_end)
+ return 0;
+ } else {
+ subleaf_start = subleaf_end;
+ if (subleaf_start > (u32)(func->nr - 1))
+ return 0;
+ }
- leaf = &func->leafs[sub];
+ /* register */
buf = tokens[2];
-
if (strcasestr(buf, "EAX"))
reg_index = R_EAX;
else if (strcasestr(buf, "EBX"))
@@ -369,23 +375,23 @@ static int parse_line(char *line)
else
goto err_exit;
- reg = &leaf->info[reg_index];
- bdesc = &reg->descs[reg->nr++];
-
/* bit flag or bits field */
buf = tokens[3];
-
end = strtok(buf, ":");
- bdesc->end = strtoul(end, NULL, 0);
- bdesc->start = bdesc->end;
-
- /* start != NULL means it is bit fields */
start = strtok(NULL, ":");
- if (start)
- bdesc->start = strtoul(start, NULL, 0);
-
- strcpy(bdesc->simp, tokens[4]);
- strcpy(bdesc->detail, tokens[5]);
+ bit_end = strtoul(end, NULL, 0);
+ bit_start = (start) ? strtoul(start, NULL, 0) : bit_end;
+
+ for (sub = subleaf_start; sub <= subleaf_end; sub++) {
+ leaf = &func->leafs[sub];
+ reg = &leaf->info[reg_index];
+ bdesc = &reg->descs[reg->nr++];
+
+ bdesc->end = bit_end;
+ bdesc->start = bit_start;
+ strcpy(bdesc->simp, strtok(tokens[4], " \t"));
+ strcpy(bdesc->detail, tokens[5]);
+ }
return 0;
err_exit:
@@ -452,8 +458,9 @@ static void decode_bits(u32 value, struct reg_desc *rdesc, enum cpuid_reg reg)
if (start == end) {
/* single bit flag */
if (value & (1 << start))
- printf("\t%-20s %s%s\n",
+ printf("\t%-20s %s%s%s\n",
bdesc->simp,
+ show_flags_only ? "" : "\t\t\t",
show_details ? "-" : "",
show_details ? bdesc->detail : ""
);
diff --git a/tools/arch/x86/vdso b/tools/arch/x86/vdso
new file mode 120000
index 000000000000..7eb962fd3454
--- /dev/null
+++ b/tools/arch/x86/vdso
@@ -0,0 +1 @@
+../../../arch/x86/entry/vdso/ \ No newline at end of file
diff --git a/tools/crypto/ccp/dbc.c b/tools/crypto/ccp/dbc.c
index a807df0f0597..80248d3d3a5a 100644
--- a/tools/crypto/ccp/dbc.c
+++ b/tools/crypto/ccp/dbc.c
@@ -57,7 +57,6 @@ int process_param(int fd, int msg_index, __u8 *signature, int *data)
.msg_index = msg_index,
.param = *data,
};
- int ret;
assert(signature);
assert(data);
diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile
index d29c9c49e251..ed565eb52275 100644
--- a/tools/gpio/Makefile
+++ b/tools/gpio/Makefile
@@ -78,7 +78,7 @@ $(OUTPUT)gpio-watch: $(GPIO_WATCH_IN)
clean:
rm -f $(ALL_PROGRAMS)
rm -f $(OUTPUT)include/linux/gpio.h
- find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
+ find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.cmd' -delete
install: $(ALL_PROGRAMS)
install -d -m 755 $(DESTDIR)$(bindir); \
diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c
index 54fdf59dd320..ba0866eb3581 100644
--- a/tools/gpio/gpio-hammer.c
+++ b/tools/gpio/gpio-hammer.c
@@ -54,7 +54,7 @@ int hammer_device(const char *device_name, unsigned int *lines, int num_lines,
fprintf(stdout, "Hammer lines [");
for (i = 0; i < num_lines; i++) {
- fprintf(stdout, "%d", lines[i]);
+ fprintf(stdout, "%u", lines[i]);
if (i != (num_lines - 1))
fprintf(stdout, ", ");
}
@@ -89,7 +89,7 @@ int hammer_device(const char *device_name, unsigned int *lines, int num_lines,
fprintf(stdout, "[");
for (i = 0; i < num_lines; i++) {
- fprintf(stdout, "%d: %d", lines[i],
+ fprintf(stdout, "%u: %d", lines[i],
gpiotools_test_bit(values.bits, i));
if (i != (num_lines - 1))
fprintf(stdout, ", ");
diff --git a/tools/hv/Makefile b/tools/hv/Makefile
index 2e60e2c212cd..34ffcec264ab 100644
--- a/tools/hv/Makefile
+++ b/tools/hv/Makefile
@@ -52,7 +52,7 @@ $(OUTPUT)hv_fcopy_uio_daemon: $(HV_FCOPY_UIO_DAEMON_IN)
clean:
rm -f $(ALL_PROGRAMS)
- find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
+ find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.cmd' -delete
install: $(ALL_PROGRAMS)
install -d -m 755 $(DESTDIR)$(sbindir); \
diff --git a/tools/hv/lsvmbus b/tools/hv/lsvmbus
index 099f2c44dbed..f83698f14da2 100644..100755
--- a/tools/hv/lsvmbus
+++ b/tools/hv/lsvmbus
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
import os
diff --git a/tools/include/asm/alternative.h b/tools/include/asm/alternative.h
index 7ce02a223732..8e548ac8f740 100644
--- a/tools/include/asm/alternative.h
+++ b/tools/include/asm/alternative.h
@@ -2,8 +2,18 @@
#ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H
#define _TOOLS_ASM_ALTERNATIVE_ASM_H
+#if defined(__s390x__)
+#ifdef __ASSEMBLY__
+.macro ALTERNATIVE oldinstr, newinstr, feature
+ \oldinstr
+.endm
+#endif
+#else
+
/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
#define ALTERNATIVE #
#endif
+
+#endif
diff --git a/tools/include/generated/asm-offsets.h b/tools/include/generated/asm-offsets.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/include/generated/asm-offsets.h
diff --git a/tools/include/generated/asm/cpucap-defs.h b/tools/include/generated/asm/cpucap-defs.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/include/generated/asm/cpucap-defs.h
diff --git a/tools/include/generated/asm/sysreg-defs.h b/tools/include/generated/asm/sysreg-defs.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/include/generated/asm/sysreg-defs.h
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 6f7f22ac9da5..4366da278033 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -2,6 +2,8 @@
#ifndef _TOOLS_LINUX_COMPILER_H_
#define _TOOLS_LINUX_COMPILER_H_
+#ifndef __ASSEMBLY__
+
#include <linux/compiler_types.h>
#ifndef __compiletime_error
@@ -224,4 +226,6 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
__asm__ ("" : "=r" (var) : "0" (var))
#endif
+#endif /* __ASSEMBLY__ */
+
#endif /* _TOOLS_LINUX_COMPILER_H */
diff --git a/tools/include/linux/linkage.h b/tools/include/linux/linkage.h
index bc763d500262..a48ff086899c 100644
--- a/tools/include/linux/linkage.h
+++ b/tools/include/linux/linkage.h
@@ -1,4 +1,8 @@
#ifndef _TOOLS_INCLUDE_LINUX_LINKAGE_H
#define _TOOLS_INCLUDE_LINUX_LINKAGE_H
+#define SYM_FUNC_START(x) .globl x; x:
+
+#define SYM_FUNC_END(x)
+
#endif /* _TOOLS_INCLUDE_LINUX_LINKAGE_H */
diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
index e69c26abe1ea..a1f55fb24bb3 100644
--- a/tools/include/nolibc/Makefile
+++ b/tools/include/nolibc/Makefile
@@ -35,6 +35,7 @@ all_files := \
stackprotector.h \
std.h \
stdarg.h \
+ stdbool.h \
stdint.h \
stdlib.h \
string.h \
diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h
index b23ac1f04035..06fdef7b291a 100644
--- a/tools/include/nolibc/arch-aarch64.h
+++ b/tools/include/nolibc/arch-aarch64.h
@@ -142,13 +142,13 @@
})
/* startup code */
-void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
__asm__ volatile (
"mov x0, sp\n" /* save stack pointer to x0, as arg1 of _start_c */
"and sp, x0, -16\n" /* sp must be 16-byte aligned in the callee */
"bl _start_c\n" /* transfer to c runtime */
);
- __builtin_unreachable();
+ __nolibc_entrypoint_epilogue();
}
#endif /* _NOLIBC_ARCH_AARCH64_H */
diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
index cae4afa7c1c7..6180ff99ab43 100644
--- a/tools/include/nolibc/arch-arm.h
+++ b/tools/include/nolibc/arch-arm.h
@@ -185,15 +185,15 @@
})
/* startup code */
-void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
__asm__ volatile (
- "mov %r0, sp\n" /* save stack pointer to %r0, as arg1 of _start_c */
- "and ip, %r0, #-8\n" /* sp must be 8-byte aligned in the callee */
+ "mov r0, sp\n" /* save stack pointer to %r0, as arg1 of _start_c */
+ "and ip, r0, #-8\n" /* sp must be 8-byte aligned in the callee */
"mov sp, ip\n"
"bl _start_c\n" /* transfer to c runtime */
);
- __builtin_unreachable();
+ __nolibc_entrypoint_epilogue();
}
#endif /* _NOLIBC_ARCH_ARM_H */
diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
index 28c26a00a762..ff5afc35bbd8 100644
--- a/tools/include/nolibc/arch-i386.h
+++ b/tools/include/nolibc/arch-i386.h
@@ -162,7 +162,7 @@
* 2) The deepest stack frame should be set to zero
*
*/
-void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
__asm__ volatile (
"xor %ebp, %ebp\n" /* zero the stack frame */
@@ -174,7 +174,7 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_
"call _start_c\n" /* transfer to c runtime */
"hlt\n" /* ensure it does not return */
);
- __builtin_unreachable();
+ __nolibc_entrypoint_epilogue();
}
#endif /* _NOLIBC_ARCH_I386_H */
diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h
index 3f8ef8f86c0f..fb519545959e 100644
--- a/tools/include/nolibc/arch-loongarch.h
+++ b/tools/include/nolibc/arch-loongarch.h
@@ -149,14 +149,14 @@
#endif
/* startup code */
-void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
__asm__ volatile (
"move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */
LONG_BSTRINS " $sp, $zero, 3, 0\n" /* $sp must be 16-byte aligned */
"bl _start_c\n" /* transfer to c runtime */
);
- __builtin_unreachable();
+ __nolibc_entrypoint_epilogue();
}
#endif /* _NOLIBC_ARCH_LOONGARCH_H */
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
index 62cc50ef3288..1791a8ce58da 100644
--- a/tools/include/nolibc/arch-mips.h
+++ b/tools/include/nolibc/arch-mips.h
@@ -179,7 +179,7 @@
})
/* startup code, note that it's called __start on MIPS */
-void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector __start(void)
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __start(void)
{
__asm__ volatile (
".set push\n"
@@ -194,11 +194,13 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_
"li $t0, -8\n"
"and $sp, $sp, $t0\n" /* $sp must be 8-byte aligned */
"addiu $sp, $sp, -16\n" /* the callee expects to save a0..a3 there */
- "jal _start_c\n" /* transfer to c runtime */
+ "lui $t9, %hi(_start_c)\n" /* ABI requires current function address in $t9 */
+ "ori $t9, %lo(_start_c)\n"
+ "jalr $t9\n" /* transfer to c runtime */
" nop\n" /* delayed slot */
".set pop\n"
);
- __builtin_unreachable();
+ __nolibc_entrypoint_epilogue();
}
#endif /* _NOLIBC_ARCH_MIPS_H */
diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h
index ac212e6185b2..ee2fdb8d601d 100644
--- a/tools/include/nolibc/arch-powerpc.h
+++ b/tools/include/nolibc/arch-powerpc.h
@@ -172,7 +172,7 @@
_ret; \
})
-#ifndef __powerpc64__
+#if !defined(__powerpc64__) && !defined(__clang__)
/* FIXME: For 32-bit PowerPC, with newer gcc compilers (e.g. gcc 13.1.0),
* "omit-frame-pointer" fails with __attribute__((no_stack_protector)) but
* works with __attribute__((__optimize__("-fno-stack-protector")))
@@ -184,7 +184,7 @@
#endif /* !__powerpc64__ */
/* startup code */
-void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
#ifdef __powerpc64__
#if _CALL_ELF == 2
@@ -215,7 +215,7 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_
"bl _start_c\n" /* transfer to c runtime */
);
#endif
- __builtin_unreachable();
+ __nolibc_entrypoint_epilogue();
}
#endif /* _NOLIBC_ARCH_POWERPC_H */
diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
index 1927c643c739..8827bf936212 100644
--- a/tools/include/nolibc/arch-riscv.h
+++ b/tools/include/nolibc/arch-riscv.h
@@ -140,7 +140,7 @@
})
/* startup code */
-void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
__asm__ volatile (
".option push\n"
@@ -151,7 +151,7 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_
"andi sp, a0, -16\n" /* sp must be 16-byte aligned */
"call _start_c\n" /* transfer to c runtime */
);
- __builtin_unreachable();
+ __nolibc_entrypoint_epilogue();
}
#endif /* _NOLIBC_ARCH_RISCV_H */
diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h
index 5d60fd43f883..2ec13d8b9a2d 100644
--- a/tools/include/nolibc/arch-s390.h
+++ b/tools/include/nolibc/arch-s390.h
@@ -139,7 +139,7 @@
})
/* startup code */
-void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
__asm__ volatile (
"lgr %r2, %r15\n" /* save stack pointer to %r2, as arg1 of _start_c */
@@ -147,7 +147,7 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_
"xc 0(8,%r15), 0(%r15)\n" /* clear backchain */
"brasl %r14, _start_c\n" /* transfer to c runtime */
);
- __builtin_unreachable();
+ __nolibc_entrypoint_epilogue();
}
struct s390_mmap_arg_struct {
diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
index 68609f421934..1e40620a2b33 100644
--- a/tools/include/nolibc/arch-x86_64.h
+++ b/tools/include/nolibc/arch-x86_64.h
@@ -161,7 +161,7 @@
* 2) The deepest stack frame should be zero (the %rbp).
*
*/
-void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
__asm__ volatile (
"xor %ebp, %ebp\n" /* zero the stack frame */
@@ -170,7 +170,7 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_
"call _start_c\n" /* transfer to c runtime */
"hlt\n" /* ensure it does not return */
);
- __builtin_unreachable();
+ __nolibc_entrypoint_epilogue();
}
#define NOLIBC_ARCH_HAS_MEMMOVE
@@ -193,10 +193,10 @@ __asm__ (
"movq %rdi, %rdx\n\t"
"subq %rsi, %rdx\n\t"
"cmpq %rcx, %rdx\n\t"
- "jb .Lbackward_copy\n\t"
+ "jb 1f\n\t"
"rep movsb\n\t"
"retq\n"
-".Lbackward_copy:"
+"1:" /* backward copy */
"leaq -1(%rdi, %rcx, 1), %rdi\n\t"
"leaq -1(%rsi, %rcx, 1), %rsi\n\t"
"std\n\t"
diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h
index beddc3665d69..9bc6a706a332 100644
--- a/tools/include/nolibc/compiler.h
+++ b/tools/include/nolibc/compiler.h
@@ -6,20 +6,30 @@
#ifndef _NOLIBC_COMPILER_H
#define _NOLIBC_COMPILER_H
+#if defined(__has_attribute)
+# define __nolibc_has_attribute(attr) __has_attribute(attr)
+#else
+# define __nolibc_has_attribute(attr) 0
+#endif
+
+#if __nolibc_has_attribute(naked)
+# define __nolibc_entrypoint __attribute__((naked))
+# define __nolibc_entrypoint_epilogue()
+#else
+# define __nolibc_entrypoint __attribute__((optimize("Os", "omit-frame-pointer")))
+# define __nolibc_entrypoint_epilogue() __builtin_unreachable()
+#endif /* __nolibc_has_attribute(naked) */
+
#if defined(__SSP__) || defined(__SSP_STRONG__) || defined(__SSP_ALL__) || defined(__SSP_EXPLICIT__)
#define _NOLIBC_STACKPROTECTOR
#endif /* defined(__SSP__) ... */
-#if defined(__has_attribute)
-# if __has_attribute(no_stack_protector)
-# define __no_stack_protector __attribute__((no_stack_protector))
-# else
-# define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector")))
-# endif
+#if __nolibc_has_attribute(no_stack_protector)
+# define __no_stack_protector __attribute__((no_stack_protector))
#else
# define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector")))
-#endif /* defined(__has_attribute) */
+#endif /* __nolibc_has_attribute(no_stack_protector) */
#endif /* _NOLIBC_COMPILER_H */
diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h
index 43b551468c2a..bbcd5fd09806 100644
--- a/tools/include/nolibc/crt.h
+++ b/tools/include/nolibc/crt.h
@@ -13,23 +13,24 @@ const unsigned long *_auxv __attribute__((weak));
static void __stack_chk_init(void);
static void exit(int);
-extern void (*const __preinit_array_start[])(void) __attribute__((weak));
-extern void (*const __preinit_array_end[])(void) __attribute__((weak));
+extern void (*const __preinit_array_start[])(int, char **, char**) __attribute__((weak));
+extern void (*const __preinit_array_end[])(int, char **, char**) __attribute__((weak));
-extern void (*const __init_array_start[])(void) __attribute__((weak));
-extern void (*const __init_array_end[])(void) __attribute__((weak));
+extern void (*const __init_array_start[])(int, char **, char**) __attribute__((weak));
+extern void (*const __init_array_end[])(int, char **, char**) __attribute__((weak));
extern void (*const __fini_array_start[])(void) __attribute__((weak));
extern void (*const __fini_array_end[])(void) __attribute__((weak));
-__attribute__((weak))
+__attribute__((weak,used))
void _start_c(long *sp)
{
long argc;
char **argv;
char **envp;
int exitcode;
- void (* const *func)(void);
+ void (* const *ctor_func)(int, char **, char **);
+ void (* const *dtor_func)(void);
const unsigned long *auxv;
/* silence potential warning: conflicting types for 'main' */
int _nolibc_main(int, char **, char **) __asm__ ("main");
@@ -66,16 +67,16 @@ void _start_c(long *sp)
;
_auxv = auxv;
- for (func = __preinit_array_start; func < __preinit_array_end; func++)
- (*func)();
- for (func = __init_array_start; func < __init_array_end; func++)
- (*func)();
+ for (ctor_func = __preinit_array_start; ctor_func < __preinit_array_end; ctor_func++)
+ (*ctor_func)(argc, argv, envp);
+ for (ctor_func = __init_array_start; ctor_func < __init_array_end; ctor_func++)
+ (*ctor_func)(argc, argv, envp);
/* go to application */
exitcode = _nolibc_main(argc, argv, envp);
- for (func = __fini_array_end; func > __fini_array_start;)
- (*--func)();
+ for (dtor_func = __fini_array_end; dtor_func > __fini_array_start;)
+ (*--dtor_func)();
exit(exitcode);
}
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index 989e707263a4..92436b1e4441 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -74,7 +74,8 @@
* -I../nolibc -o hello hello.c -lgcc
*
* The available standard (but limited) include files are:
- * ctype.h, errno.h, signal.h, stdarg.h, stdio.h, stdlib.h, string.h, time.h
+ * ctype.h, errno.h, signal.h, stdarg.h, stdbool.h stdio.h, stdlib.h,
+ * string.h, time.h
*
* In addition, the following ones are expected to be provided by the compiler:
* float.h, stddef.h
diff --git a/tools/include/nolibc/stackprotector.h b/tools/include/nolibc/stackprotector.h
index 13f1d0e60387..1d0d5259ec41 100644
--- a/tools/include/nolibc/stackprotector.h
+++ b/tools/include/nolibc/stackprotector.h
@@ -18,7 +18,7 @@
* triggering stack protector errors themselves
*/
-__attribute__((weak,noreturn,section(".text.nolibc_stack_chk")))
+__attribute__((weak,used,noreturn,section(".text.nolibc_stack_chk")))
void __stack_chk_fail(void)
{
pid_t pid;
@@ -34,7 +34,7 @@ void __stack_chk_fail_local(void)
__stack_chk_fail();
}
-__attribute__((weak,section(".data.nolibc_stack_chk")))
+__attribute__((weak,used,section(".data.nolibc_stack_chk")))
uintptr_t __stack_chk_guard;
static __no_stack_protector void __stack_chk_init(void)
diff --git a/tools/include/nolibc/stdbool.h b/tools/include/nolibc/stdbool.h
new file mode 100644
index 000000000000..60feece22f17
--- /dev/null
+++ b/tools/include/nolibc/stdbool.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Boolean types support for NOLIBC
+ * Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+#ifndef _NOLIBC_STDBOOL_H
+#define _NOLIBC_STDBOOL_H
+
+#define bool _Bool
+#define true 1
+#define false 0
+
+#define __bool_true_false_are_defined 1
+
+#endif /* _NOLIBC_STDBOOL_H */
diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h
index f9ab28421e6d..9ec9c24f38c0 100644
--- a/tools/include/nolibc/string.h
+++ b/tools/include/nolibc/string.h
@@ -7,6 +7,7 @@
#ifndef _NOLIBC_STRING_H
#define _NOLIBC_STRING_H
+#include "arch.h"
#include "std.h"
static void *malloc(size_t len);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 35bcf52dbc65..e05b39e39c3f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2851,7 +2851,7 @@ union bpf_attr {
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
* **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
* **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
- * **TCP_BPF_RTO_MIN**.
+ * **TCP_BPF_RTO_MIN**, **TCP_BPF_SOCK_OPS_CB_FLAGS**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
* * **IPPROTO_IPV6**, which supports the following *optname*\ s:
* **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
@@ -7080,6 +7080,7 @@ enum {
TCP_BPF_SYN = 1005, /* Copy the TCP header */
TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */
TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
+ TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
};
enum {
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 43742ac5b00d..7c308f04e7a0 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -93,6 +93,7 @@ enum {
NETDEV_A_PAGE_POOL_INFLIGHT,
NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
NETDEV_A_PAGE_POOL_DETACH_TIME,
+ NETDEV_A_PAGE_POOL_DMABUF,
__NETDEV_A_PAGE_POOL_MAX,
NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1)
@@ -131,6 +132,7 @@ enum {
NETDEV_A_QUEUE_IFINDEX,
NETDEV_A_QUEUE_TYPE,
NETDEV_A_QUEUE_NAPI_ID,
+ NETDEV_A_QUEUE_DMABUF,
__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
@@ -174,6 +176,16 @@ enum {
};
enum {
+ NETDEV_A_DMABUF_IFINDEX = 1,
+ NETDEV_A_DMABUF_QUEUES,
+ NETDEV_A_DMABUF_FD,
+ NETDEV_A_DMABUF_ID,
+
+ __NETDEV_A_DMABUF_MAX,
+ NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1)
+};
+
+enum {
NETDEV_CMD_DEV_GET = 1,
NETDEV_CMD_DEV_ADD_NTF,
NETDEV_CMD_DEV_DEL_NTF,
@@ -186,6 +198,7 @@ enum {
NETDEV_CMD_QUEUE_GET,
NETDEV_CMD_NAPI_GET,
NETDEV_CMD_QSTATS_GET,
+ NETDEV_CMD_BIND_RX,
__NETDEV_CMD_MAX,
NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --git a/tools/memory-model/Documentation/README b/tools/memory-model/Documentation/README
index 304162743a5b..9999c1effdb6 100644
--- a/tools/memory-model/Documentation/README
+++ b/tools/memory-model/Documentation/README
@@ -9,6 +9,8 @@ depending on what you know and what you would like to learn. Please note
that the documents later in this list assume that the reader understands
the material provided by documents earlier in this list.
+If LKMM-specific terms lost you, glossary.txt might help you.
+
o You are new to Linux-kernel concurrency: simple.txt
o You have some background in Linux-kernel concurrency, and would
@@ -21,6 +23,9 @@ o You are familiar with the Linux-kernel concurrency primitives
that you need, and just want to get started with LKMM litmus
tests: litmus-tests.txt
+o You would like to access lock-protected shared variables without
+ having their corresponding locks held: locking.txt
+
o You are familiar with Linux-kernel concurrency, and would
like a detailed intuitive understanding of LKMM, including
situations involving more than two threads: recipes.txt
@@ -28,12 +33,18 @@ o You are familiar with Linux-kernel concurrency, and would
o You would like a detailed understanding of what your compiler can
and cannot do to control dependencies: control-dependencies.txt
+o You would like to mark concurrent normal accesses to shared
+ variables so that intentional "racy" accesses can be properly
+ documented, especially when you are responding to complaints
+ from KCSAN: access-marking.txt
+
o You are familiar with Linux-kernel concurrency and the use of
LKMM, and would like a quick reference: cheatsheet.txt
o You are familiar with Linux-kernel concurrency and the use
of LKMM, and would like to learn about LKMM's requirements,
- rationale, and implementation: explanation.txt
+ rationale, and implementation: explanation.txt and
+ herd-representation.txt
o You are interested in the publications related to LKMM, including
hardware manuals, academic literature, standards-committee
@@ -61,10 +72,21 @@ control-dependencies.txt
explanation.txt
Detailed description of the memory model.
+glossary.txt
+ Brief definitions of LKMM-related terms.
+
+herd-representation.txt
+ The (abstract) representation of the Linux-kernel concurrency
+ primitives in terms of events.
+
litmus-tests.txt
The format, features, capabilities, and limitations of the litmus
tests that LKMM can evaluate.
+locking.txt
+ Rules for accessing lock-protected shared variables outside of
+ their corresponding critical sections.
+
ordering.txt
Overview of the Linux kernel's low-level memory-ordering
primitives by category.
diff --git a/tools/memory-model/Documentation/herd-representation.txt b/tools/memory-model/Documentation/herd-representation.txt
new file mode 100644
index 000000000000..ed988906f2b7
--- /dev/null
+++ b/tools/memory-model/Documentation/herd-representation.txt
@@ -0,0 +1,110 @@
+#
+# Legend:
+# R, a Load event
+# W, a Store event
+# F, a Fence event
+# LKR, a Lock-Read event
+# LKW, a Lock-Write event
+# UL, an Unlock event
+# LF, a Lock-Fail event
+# RL, a Read-Locked event
+# RU, a Read-Unlocked event
+# R*, a Load event included in RMW
+# W*, a Store event included in RMW
+# SRCU, a Sleepable-Read-Copy-Update event
+#
+# po, a Program-Order link
+# rmw, a Read-Modify-Write link - every rmw link is a po link
+#
+# By convention, a blank line in a cell means "same as the preceding line".
+#
+# Disclaimer. The table includes representations of "add" and "and" operations;
+# corresponding/identical representations of "sub", "inc", "dec" and "or", "xor",
+# "andnot" operations are omitted.
+#
+ ------------------------------------------------------------------------------
+ | C macro | Events |
+ ------------------------------------------------------------------------------
+ | Non-RMW ops | |
+ ------------------------------------------------------------------------------
+ | READ_ONCE | R[once] |
+ | atomic_read | |
+ | WRITE_ONCE | W[once] |
+ | atomic_set | |
+ | smp_load_acquire | R[acquire] |
+ | atomic_read_acquire | |
+ | smp_store_release | W[release] |
+ | atomic_set_release | |
+ | smp_store_mb | W[once] ->po F[mb] |
+ | smp_mb | F[mb] |
+ | smp_rmb | F[rmb] |
+ | smp_wmb | F[wmb] |
+ | smp_mb__before_atomic | F[before-atomic] |
+ | smp_mb__after_atomic | F[after-atomic] |
+ | spin_unlock | UL |
+ | spin_is_locked | On success: RL |
+ | | On failure: RU |
+ | smp_mb__after_spinlock | F[after-spinlock] |
+ | smp_mb__after_unlock_lock | F[after-unlock-lock] |
+ | rcu_read_lock | F[rcu-lock] |
+ | rcu_read_unlock | F[rcu-unlock] |
+ | synchronize_rcu | F[sync-rcu] |
+ | rcu_dereference | R[once] |
+ | rcu_assign_pointer | W[release] |
+ | srcu_read_lock | R[srcu-lock] |
+ | srcu_down_read | |
+ | srcu_read_unlock | W[srcu-unlock] |
+ | srcu_up_read | |
+ | synchronize_srcu | SRCU[sync-srcu] |
+ | smp_mb__after_srcu_read_unlock | F[after-srcu-read-unlock] |
+ ------------------------------------------------------------------------------
+ | RMW ops w/o return value | |
+ ------------------------------------------------------------------------------
+ | atomic_add | R*[noreturn] ->rmw W*[once] |
+ | atomic_and | |
+ | spin_lock | LKR ->po LKW |
+ ------------------------------------------------------------------------------
+ | RMW ops w/ return value | |
+ ------------------------------------------------------------------------------
+ | atomic_add_return | F[mb] ->po R*[once] |
+ | | ->rmw W*[once] ->po F[mb] |
+ | atomic_fetch_add | |
+ | atomic_fetch_and | |
+ | atomic_xchg | |
+ | xchg | |
+ | atomic_add_negative | |
+ | atomic_add_return_relaxed | R*[once] ->rmw W*[once] |
+ | atomic_fetch_add_relaxed | |
+ | atomic_fetch_and_relaxed | |
+ | atomic_xchg_relaxed | |
+ | xchg_relaxed | |
+ | atomic_add_negative_relaxed | |
+ | atomic_add_return_acquire | R*[acquire] ->rmw W*[once] |
+ | atomic_fetch_add_acquire | |
+ | atomic_fetch_and_acquire | |
+ | atomic_xchg_acquire | |
+ | xchg_acquire | |
+ | atomic_add_negative_acquire | |
+ | atomic_add_return_release | R*[once] ->rmw W*[release] |
+ | atomic_fetch_add_release | |
+ | atomic_fetch_and_release | |
+ | atomic_xchg_release | |
+ | xchg_release | |
+ | atomic_add_negative_release | |
+ ------------------------------------------------------------------------------
+ | Conditional RMW ops | |
+ ------------------------------------------------------------------------------
+ | atomic_cmpxchg | On success: F[mb] ->po R*[once] |
+ | | ->rmw W*[once] ->po F[mb] |
+ | | On failure: R*[once] |
+ | cmpxchg | |
+ | atomic_add_unless | |
+ | atomic_cmpxchg_relaxed | On success: R*[once] ->rmw W*[once] |
+ | | On failure: R*[once] |
+ | atomic_cmpxchg_acquire | On success: R*[acquire] ->rmw W*[once] |
+ | | On failure: R*[once] |
+ | atomic_cmpxchg_release | On success: R*[once] ->rmw W*[release] |
+ | | On failure: R*[once] |
+ | spin_trylock | On success: LKR ->po LKW |
+ | | On failure: LF |
+ ------------------------------------------------------------------------------
diff --git a/tools/memory-model/Documentation/simple.txt b/tools/memory-model/Documentation/simple.txt
index 4c789ec8334f..21f06c1d1b70 100644
--- a/tools/memory-model/Documentation/simple.txt
+++ b/tools/memory-model/Documentation/simple.txt
@@ -266,5 +266,5 @@ More complex use cases
======================
If the alternatives above do not do what you need, please look at the
-recipes-pairs.txt file to peel off the next layer of the memory-ordering
+recipes.txt file to peel off the next layer of the memory-ordering
onion.
diff --git a/tools/net/ynl/lib/.gitignore b/tools/net/ynl/lib/.gitignore
index c18dd8d83cee..296c4035dbf2 100644
--- a/tools/net/ynl/lib/.gitignore
+++ b/tools/net/ynl/lib/.gitignore
@@ -1 +1,2 @@
__pycache__/
+*.d
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index fcb18a5a6d70..e16cef160bc2 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -696,14 +696,14 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
addr.nl_family = AF_NETLINK;
if (bind(ys->socket, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
__perr(yse, "unable to bind to a socket address");
- goto err_close_sock;;
+ goto err_close_sock;
}
memset(&addr, 0, sizeof(addr));
addrlen = sizeof(addr);
if (getsockname(ys->socket, (struct sockaddr *)&addr, &addrlen) < 0) {
__perr(yse, "unable to read socket address");
- goto err_close_sock;;
+ goto err_close_sock;
}
ys->portid = addr.nl_pid;
ys->seq = random();
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index d42c1d605969..c22c22bf2cb7 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -388,6 +388,8 @@ class NetlinkProtocol:
def decode(self, ynl, nl_msg, op):
msg = self._decode(nl_msg)
+ if op is None:
+ op = ynl.rsp_by_value[msg.cmd()]
fixed_header_size = ynl._struct_size(op.fixed_header)
msg.raw_attrs = NlAttrs(msg.raw, fixed_header_size)
return msg
@@ -921,8 +923,7 @@ class YnlFamily(SpecFamily):
print("Netlink done while checking for ntf!?")
continue
- op = self.rsp_by_value[nl_msg.cmd()]
- decoded = self.nlproto.decode(self, nl_msg, op)
+ decoded = self.nlproto.decode(self, nl_msg, None)
if decoded.cmd() not in self.async_msg_ids:
print("Unexpected msg id done while checking for ntf", decoded)
continue
@@ -980,7 +981,7 @@ class YnlFamily(SpecFamily):
if nl_msg.extack:
self._decode_extack(req_msg, op, nl_msg.extack)
else:
- op = self.rsp_by_value[nl_msg.cmd()]
+ op = None
req_flags = []
if nl_msg.error:
diff --git a/tools/net/ynl/samples/netdev.c b/tools/net/ynl/samples/netdev.c
index 3e7b29bd55d5..22609d44c89a 100644
--- a/tools/net/ynl/samples/netdev.c
+++ b/tools/net/ynl/samples/netdev.c
@@ -79,7 +79,10 @@ int main(int argc, char **argv)
goto err_close;
printf("Select ifc ($ifindex; or 0 = dump; or -2 ntf check): ");
- scanf("%d", &ifindex);
+ if (scanf("%d", &ifindex) != 1) {
+ fprintf(stderr, "Error: unable to parse input\n");
+ goto err_destroy;
+ }
if (ifindex > 0) {
struct netdev_dev_get_req *req;
@@ -119,6 +122,7 @@ int main(int argc, char **argv)
err_close:
fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_destroy:
ynl_sock_destroy(ys);
return 2;
}
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 51529fabd517..717530bc9c52 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -2668,13 +2668,15 @@ def main():
cw.p('#define ' + hdr_prot)
cw.nl()
+ hdr_file=os.path.basename(args.out_file[:-2]) + ".h"
+
if args.mode == 'kernel':
cw.p('#include <net/netlink.h>')
cw.p('#include <net/genetlink.h>')
cw.nl()
if not args.header:
if args.out_file:
- cw.p(f'#include "{os.path.basename(args.out_file[:-2])}.h"')
+ cw.p(f'#include "{hdr_file}"')
cw.nl()
headers = ['uapi/' + parsed.uapi_header]
headers += parsed.kernel_family.get('headers', [])
@@ -2686,7 +2688,7 @@ def main():
if family_contains_bitfield32(parsed):
cw.p('#include <linux/netlink.h>')
else:
- cw.p(f'#include "{parsed.name}-user.h"')
+ cw.p(f'#include "{hdr_file}"')
cw.p('#include "ynl.h"')
headers = [parsed.uapi_header]
for definition in parsed['definitions']:
diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c
index 5c9335fff2d3..9a95871afc95 100644
--- a/tools/perf/builtin-daemon.c
+++ b/tools/perf/builtin-daemon.c
@@ -691,7 +691,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out)
fprintf(out, "%c%" PRIu64,
/* session up time */
- csv_sep, (curr - daemon->start) / 60);
+ csv_sep, (uint64_t)((curr - daemon->start) / 60));
fprintf(out, "\n");
} else {
@@ -702,7 +702,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out)
fprintf(out, " lock: %s/lock\n",
daemon->base);
fprintf(out, " up: %" PRIu64 " minutes\n",
- (curr - daemon->start) / 60);
+ (uint64_t)((curr - daemon->start) / 60));
}
}
@@ -730,7 +730,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out)
fprintf(out, "%c%" PRIu64,
/* session up time */
- csv_sep, (curr - session->start) / 60);
+ csv_sep, (uint64_t)((curr - session->start) / 60));
fprintf(out, "\n");
} else {
@@ -747,7 +747,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out)
fprintf(out, " ack: %s/%s\n",
session->base, SESSION_ACK);
fprintf(out, " up: %" PRIu64 " minutes\n",
- (curr - session->start) / 60);
+ (uint64_t)((curr - session->start) / 60));
}
}
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index 40132655ccd1..c76f53a90a7b 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -456,11 +456,13 @@ static int test__name_cmp(struct test_suite *test __maybe_unused, int subtest __
/**
* Test perf_pmu__match() that's used to search for a PMU given a name passed
* on the command line. The name that's passed may also be a filename type glob
- * match.
+ * match. If the name does not match, perf_pmu__match() attempts to match the
+ * alias of the PMU, if provided.
*/
static int test__pmu_match(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
struct perf_pmu test_pmu;
+ test_pmu.alias_name = NULL;
test_pmu.name = "pmuname";
TEST_ASSERT_EQUAL("Exact match", perf_pmu__match(&test_pmu, "pmuname"), true);
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index b4cb3fe5cc25..bc4e92c0c08b 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -286,6 +286,9 @@ static void account_end_timestamp(struct lock_contention *con)
goto next;
for (int i = 0; i < total_cpus; i++) {
+ if (cpu_data[i].lock == 0)
+ continue;
+
update_lock_stat(stat_fd, -1, end_ts, aggr_mode,
&cpu_data[i]);
}
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 3be882b2e845..31a223eaf8e6 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -20,6 +20,7 @@
#include "util/env.h"
#include "util/kvm-stat.h"
#include "util/kwork.h"
+#include "util/sample.h"
#include "util/lock-contention.h"
#include <internal/lib.h>
#include "../builtin.h"
diff --git a/tools/power/cpupower/bindings/python/.gitignore b/tools/power/cpupower/bindings/python/.gitignore
new file mode 100644
index 000000000000..5c9a1f0212dd
--- /dev/null
+++ b/tools/power/cpupower/bindings/python/.gitignore
@@ -0,0 +1,8 @@
+__pycache__/
+raw_pylibcpupower_wrap.c
+*.o
+*.so
+*.py
+!test_raw_pylibcpupower.py
+# git keeps ignoring this file, use git add -f raw_libcpupower.i
+!raw_pylibcpupower.i
diff --git a/tools/power/cpupower/bindings/python/Makefile b/tools/power/cpupower/bindings/python/Makefile
new file mode 100644
index 000000000000..dc09c5b66ead
--- /dev/null
+++ b/tools/power/cpupower/bindings/python/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Makefile for libcpupower's Python bindings
+#
+# This Makefile expects you have already run the makefile for cpupower to build
+# the .o files in the lib directory for the bindings to be created.
+
+CC := gcc
+HAVE_SWIG := $(shell if which swig >/dev/null 2>&1; then echo 1; else echo 0; fi)
+HAVE_PYCONFIG := $(shell if which python-config >/dev/null 2>&1; then echo 1; else echo 0; fi)
+
+LIB_DIR := ../../lib
+PY_INCLUDE = $(firstword $(shell python-config --includes))
+OBJECTS_LIB = $(wildcard $(LIB_DIR)/*.o)
+
+all: _raw_pylibcpupower.so
+
+_raw_pylibcpupower.so: raw_pylibcpupower_wrap.o
+ $(CC) -shared $(OBJECTS_LIB) raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so
+
+raw_pylibcpupower_wrap.o: raw_pylibcpupower_wrap.c
+ $(CC) -fPIC -c raw_pylibcpupower_wrap.c $(PY_INCLUDE)
+
+raw_pylibcpupower_wrap.c: raw_pylibcpupower.i
+ifeq ($(HAVE_SWIG),0)
+ $(error "swig was not found. Make sure you have it installed and in the PATH to generate the bindings.")
+else ifeq ($(HAVE_PYCONFIG),0)
+ $(error "python-config was not found. Make sure you have it installed and in the PATH to generate the bindings.")
+endif
+ swig -python raw_pylibcpupower.i
+
+# Will only clean the bindings folder; will not clean the actual cpupower folder
+clean:
+ rm -f raw_pylibcpupower.py raw_pylibcpupower_wrap.c raw_pylibcpupower_wrap.o _raw_pylibcpupower.so
diff --git a/tools/power/cpupower/bindings/python/README b/tools/power/cpupower/bindings/python/README
new file mode 100644
index 000000000000..0a4bb2581e8a
--- /dev/null
+++ b/tools/power/cpupower/bindings/python/README
@@ -0,0 +1,59 @@
+This folder contains the necessary files to build the Python bindings for
+libcpupower (aside from the libcpupower object files).
+
+
+requirements
+------------
+
+* You need the object files in the libcpupower directory compiled by
+cpupower's makefile.
+* The SWIG program must be installed.
+* The Python's development libraries installed.
+
+Please check that your version of SWIG is compatible with the version of Python
+installed on your machine by checking the SWIG changelog on their website.
+https://swig.org/
+
+Note that while SWIG itself is GPL v3+ licensed; the resulting output,
+the bindings code: is permissively licensed + the license of libcpupower's .o
+files. For these bindings that means GPL v2.
+
+Please see https://swig.org/legal.html and the discussion [1] for more details.
+
+[1]
+https://lore.kernel.org/linux-pm/Zqv9BOjxLAgyNP5B@hatbackup/
+
+
+build
+-----
+
+Install SWIG and the Python development files provided by your distribution.
+
+Build the object files for libcpupower by running make in the cpupower
+directory.
+
+Return to the directory this README is in to run:
+
+$ make
+
+
+testing
+-------
+
+Please verify the _raw_pylibcpupower.so and raw_pylibcpupower.py files have
+been created.
+
+To run the test script:
+
+$ python test_raw_pylibcpupower.py
+
+
+credits
+-------
+
+Original Bindings Author:
+John B. Wyatt IV
+jwyatt@redhat.com
+sageofredondo@gmail.com
+
+Copyright (C) 2024 Red Hat
diff --git a/tools/power/cpupower/bindings/python/raw_pylibcpupower.i b/tools/power/cpupower/bindings/python/raw_pylibcpupower.i
new file mode 100644
index 000000000000..96556d87a745
--- /dev/null
+++ b/tools/power/cpupower/bindings/python/raw_pylibcpupower.i
@@ -0,0 +1,247 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+%module raw_pylibcpupower
+%{
+#include "../../lib/cpupower_intern.h"
+#include "../../lib/acpi_cppc.h"
+#include "../../lib/cpufreq.h"
+#include "../../lib/cpuidle.h"
+#include "../../lib/cpupower.h"
+#include "../../lib/powercap.h"
+%}
+
+/*
+ * cpupower_intern.h
+ */
+
+#define PATH_TO_CPU "/sys/devices/system/cpu/"
+#define MAX_LINE_LEN 4096
+#define SYSFS_PATH_MAX 255
+
+int is_valid_path(const char *path);
+
+unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen);
+
+unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen);
+
+/*
+ * acpi_cppc.h
+ */
+
+enum acpi_cppc_value {
+ HIGHEST_PERF,
+ LOWEST_PERF,
+ NOMINAL_PERF,
+ LOWEST_NONLINEAR_PERF,
+ LOWEST_FREQ,
+ NOMINAL_FREQ,
+ REFERENCE_PERF,
+ WRAPAROUND_TIME,
+ MAX_CPPC_VALUE_FILES
+};
+
+unsigned long acpi_cppc_get_data(unsigned int cpu,
+ enum acpi_cppc_value which);
+
+/*
+ * cpufreq.h
+ */
+
+struct cpufreq_policy {
+ unsigned long min;
+ unsigned long max;
+ char *governor;
+};
+
+struct cpufreq_available_governors {
+ char *governor;
+ struct cpufreq_available_governors *next;
+ struct cpufreq_available_governors *first;
+};
+
+struct cpufreq_available_frequencies {
+ unsigned long frequency;
+ struct cpufreq_available_frequencies *next;
+ struct cpufreq_available_frequencies *first;
+};
+
+
+struct cpufreq_affected_cpus {
+ unsigned int cpu;
+ struct cpufreq_affected_cpus *next;
+ struct cpufreq_affected_cpus *first;
+};
+
+struct cpufreq_stats {
+ unsigned long frequency;
+ unsigned long long time_in_state;
+ struct cpufreq_stats *next;
+ struct cpufreq_stats *first;
+};
+
+unsigned long cpufreq_get_freq_kernel(unsigned int cpu);
+
+unsigned long cpufreq_get_freq_hardware(unsigned int cpu);
+
+#define cpufreq_get(cpu) cpufreq_get_freq_kernel(cpu);
+
+unsigned long cpufreq_get_transition_latency(unsigned int cpu);
+
+int cpufreq_get_hardware_limits(unsigned int cpu,
+ unsigned long *min,
+ unsigned long *max);
+
+char *cpufreq_get_driver(unsigned int cpu);
+
+void cpufreq_put_driver(char *ptr);
+
+struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu);
+
+void cpufreq_put_policy(struct cpufreq_policy *policy);
+
+struct cpufreq_available_governors
+*cpufreq_get_available_governors(unsigned int cpu);
+
+void cpufreq_put_available_governors(
+ struct cpufreq_available_governors *first);
+
+struct cpufreq_available_frequencies
+*cpufreq_get_available_frequencies(unsigned int cpu);
+
+void cpufreq_put_available_frequencies(
+ struct cpufreq_available_frequencies *first);
+
+struct cpufreq_available_frequencies
+*cpufreq_get_boost_frequencies(unsigned int cpu);
+
+void cpufreq_put_boost_frequencies(
+ struct cpufreq_available_frequencies *first);
+
+struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned
+ int cpu);
+
+void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *first);
+
+struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned
+ int cpu);
+
+void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *first);
+
+struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu,
+ unsigned long long *total_time);
+
+void cpufreq_put_stats(struct cpufreq_stats *stats);
+
+unsigned long cpufreq_get_transitions(unsigned int cpu);
+
+int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy);
+
+int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq);
+
+int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq);
+
+int cpufreq_modify_policy_governor(unsigned int cpu, char *governor);
+
+int cpufreq_set_frequency(unsigned int cpu,
+ unsigned long target_frequency);
+
+unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu,
+ const char **table,
+ unsigned int index,
+ unsigned int size);
+
+/*
+ * cpuidle.h
+ */
+
+int cpuidle_is_state_disabled(unsigned int cpu,
+ unsigned int idlestate);
+int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate,
+ unsigned int disable);
+unsigned long cpuidle_state_latency(unsigned int cpu,
+ unsigned int idlestate);
+unsigned long cpuidle_state_usage(unsigned int cpu,
+ unsigned int idlestate);
+unsigned long long cpuidle_state_time(unsigned int cpu,
+ unsigned int idlestate);
+char *cpuidle_state_name(unsigned int cpu,
+ unsigned int idlestate);
+char *cpuidle_state_desc(unsigned int cpu,
+ unsigned int idlestate);
+unsigned int cpuidle_state_count(unsigned int cpu);
+
+char *cpuidle_get_governor(void);
+
+char *cpuidle_get_driver(void);
+
+/*
+ * cpupower.h
+ */
+
+struct cpupower_topology {
+ /* Amount of CPU cores, packages and threads per core in the system */
+ unsigned int cores;
+ unsigned int pkgs;
+ unsigned int threads; /* per core */
+
+ /* Array gets mallocated with cores entries, holding per core info */
+ struct cpuid_core_info *core_info;
+};
+
+struct cpuid_core_info {
+ int pkg;
+ int core;
+ int cpu;
+
+ /* flags */
+ unsigned int is_online:1;
+};
+
+int get_cpu_topology(struct cpupower_topology *cpu_top);
+
+void cpu_topology_release(struct cpupower_topology cpu_top);
+
+int cpupower_is_cpu_online(unsigned int cpu);
+
+/*
+ * powercap.h
+ */
+
+struct powercap_zone {
+ char name[MAX_LINE_LEN];
+ /*
+ * sys_name relative to PATH_TO_POWERCAP,
+ * do not forget the / in between
+ */
+ char sys_name[SYSFS_PATH_MAX];
+ int tree_depth;
+ struct powercap_zone *parent;
+ struct powercap_zone *children[POWERCAP_MAX_CHILD_ZONES];
+ /* More possible caps or attributes to be added? */
+ uint32_t has_power_uw:1,
+ has_energy_uj:1;
+
+};
+
+int powercap_walk_zones(struct powercap_zone *zone,
+ int (*f)(struct powercap_zone *zone));
+
+struct powercap_zone *powercap_init_zones(void);
+
+int powercap_get_enabled(int *mode);
+
+int powercap_set_enabled(int mode);
+
+int powercap_get_driver(char *driver, int buflen);
+
+int powercap_get_max_energy_range_uj(struct powercap_zone *zone, uint64_t *val);
+
+int powercap_get_energy_uj(struct powercap_zone *zone, uint64_t *val);
+
+int powercap_get_max_power_range_uw(struct powercap_zone *zone, uint64_t *val);
+
+int powercap_get_power_uw(struct powercap_zone *zone, uint64_t *val);
+
+int powercap_zone_get_enabled(struct powercap_zone *zone, int *mode);
+
+int powercap_zone_set_enabled(struct powercap_zone *zone, int mode);
diff --git a/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py b/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py
new file mode 100755
index 000000000000..3d6f62b9556a
--- /dev/null
+++ b/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+
+import raw_pylibcpupower as p
+
+# Simple function call
+
+"""
+Get cstate count
+"""
+cpu_cstates_count = p.cpuidle_state_count(0)
+if cpu_cstates_count > -1:
+ print(f"CPU 0 has {cpu_cstates_count} c-states")
+else:
+ print(f"cstate count error: return code: {cpu_cstates_count}")
+
+"""
+Disable cstate (will fail if the above is 0, ex: a virtual machine)
+"""
+cstate_disabled = p.cpuidle_state_disable(0, 0, 1)
+if cpu_cstates_count == 0:
+ print(f"CPU 0 has {cpu_cstates_count} c-states")
+else:
+ print(f"cstate count error: return code: {cpu_cstates_count}")
+
+match cstate_disabled:
+ case 0:
+ print(f"CPU state disabled")
+ case -1:
+ print(f"Idlestate not available")
+ case _:
+ print(f"Not documented")
+
+
+# Pointer example
+
+topo = p.cpupower_topology()
+total_cpus = p.get_cpu_topology(topo)
+if total_cpus > 0:
+ print(f"Number of total cpus: {total_cpus} and number of cores: {topo.cores}")
+else:
+ print(f"Error: could not get cpu topology")
diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c
index 479c5971aa6d..0ecac009273c 100644
--- a/tools/power/cpupower/lib/cpuidle.c
+++ b/tools/power/cpupower/lib/cpuidle.c
@@ -116,6 +116,7 @@ enum idlestate_value {
IDLESTATE_USAGE,
IDLESTATE_POWER,
IDLESTATE_LATENCY,
+ IDLESTATE_RESIDENCY,
IDLESTATE_TIME,
IDLESTATE_DISABLE,
MAX_IDLESTATE_VALUE_FILES
@@ -125,6 +126,7 @@ static const char *idlestate_value_files[MAX_IDLESTATE_VALUE_FILES] = {
[IDLESTATE_USAGE] = "usage",
[IDLESTATE_POWER] = "power",
[IDLESTATE_LATENCY] = "latency",
+ [IDLESTATE_RESIDENCY] = "residency",
[IDLESTATE_TIME] = "time",
[IDLESTATE_DISABLE] = "disable",
};
@@ -254,6 +256,12 @@ unsigned long cpuidle_state_latency(unsigned int cpu,
return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_LATENCY);
}
+unsigned long cpuidle_state_residency(unsigned int cpu,
+ unsigned int idlestate)
+{
+ return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_RESIDENCY);
+}
+
unsigned long cpuidle_state_usage(unsigned int cpu,
unsigned int idlestate)
{
diff --git a/tools/power/cpupower/lib/cpuidle.h b/tools/power/cpupower/lib/cpuidle.h
index 2e10fead2e1e..2ab404d40259 100644
--- a/tools/power/cpupower/lib/cpuidle.h
+++ b/tools/power/cpupower/lib/cpuidle.h
@@ -8,6 +8,8 @@ int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate,
unsigned int disable);
unsigned long cpuidle_state_latency(unsigned int cpu,
unsigned int idlestate);
+unsigned long cpuidle_state_residency(unsigned int cpu,
+ unsigned int idlestate);
unsigned long cpuidle_state_usage(unsigned int cpu,
unsigned int idlestate);
unsigned long long cpuidle_state_time(unsigned int cpu,
diff --git a/tools/power/cpupower/lib/powercap.c b/tools/power/cpupower/lib/powercap.c
index a7a59c6bacda..94a0c69e55ef 100644
--- a/tools/power/cpupower/lib/powercap.c
+++ b/tools/power/cpupower/lib/powercap.c
@@ -78,6 +78,14 @@ int powercap_get_enabled(int *mode)
}
/*
+ * TODO: implement function. Returns dummy 0 for now.
+ */
+int powercap_set_enabled(int mode)
+{
+ return 0;
+}
+
+/*
* Hardcoded, because rapl is the only powercap implementation
- * this needs to get more generic if more powercap implementations
* should show up
diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c
index 44126a87fa7a..e0d17f0de3fe 100644
--- a/tools/power/cpupower/utils/cpuidle-info.c
+++ b/tools/power/cpupower/utils/cpuidle-info.c
@@ -64,6 +64,8 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose)
printf(_("Latency: %lu\n"),
cpuidle_state_latency(cpu, idlestate));
+ printf(_("Residency: %lu\n"),
+ cpuidle_state_residency(cpu, idlestate));
printf(_("Usage: %lu\n"),
cpuidle_state_usage(cpu, idlestate));
printf(_("Duration: %llu\n"),
@@ -115,6 +117,8 @@ static void proc_cpuidle_cpu_output(unsigned int cpu)
printf(_("promotion[--] demotion[--] "));
printf(_("latency[%03lu] "),
cpuidle_state_latency(cpu, cstate));
+ printf(_("residency[%05lu] "),
+ cpuidle_state_residency(cpu, cstate));
printf(_("usage[%08lu] "),
cpuidle_state_usage(cpu, cstate));
printf(_("duration[%020Lu] \n"),
diff --git a/tools/power/pm-graph/.gitignore b/tools/power/pm-graph/.gitignore
new file mode 100644
index 000000000000..37762a8a06d6
--- /dev/null
+++ b/tools/power/pm-graph/.gitignore
@@ -0,0 +1,3 @@
+# sleepgraph.py artifacts
+suspend-[0-9]*-[0-9]*
+suspend-[0-9]*-[0-9]*-x[0-9]*
diff --git a/tools/power/pm-graph/Makefile b/tools/power/pm-graph/Makefile
index b5310832c19c..aeddbaf2d4c4 100644
--- a/tools/power/pm-graph/Makefile
+++ b/tools/power/pm-graph/Makefile
@@ -1,51 +1,86 @@
# SPDX-License-Identifier: GPL-2.0
-PREFIX ?= /usr
-DESTDIR ?=
+#
+# Copyright (c) 2013, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# Authors:
+# Todd Brandt <todd.e.brandt@linux.intel.com>
+
+# Prefix to the directories we're installing to
+DESTDIR ?=
+
+# Directory definitions. These are default and most probably
+# do not need to be changed. Please note that DESTDIR is
+# added in front of any of them
+
+BINDIR ?= /usr/bin
+MANDIR ?= /usr/share/man
+LIBDIR ?= /usr/lib
+
+# Toolchain: what tools do we use, and what options do they need:
+INSTALL = /usr/bin/install
+INSTALL_DATA = ${INSTALL} -m 644
all:
@echo "Nothing to build"
install : uninstall
- install -d $(DESTDIR)$(PREFIX)/lib/pm-graph
- install sleepgraph.py $(DESTDIR)$(PREFIX)/lib/pm-graph
- install bootgraph.py $(DESTDIR)$(PREFIX)/lib/pm-graph
- install -d $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/cgskip.txt $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/freeze-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/freeze.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/freeze-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/standby-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/standby.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/standby-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/suspend-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/suspend.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/suspend-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/suspend-x2-proc.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
-
- install -d $(DESTDIR)$(PREFIX)/bin
- ln -s ../lib/pm-graph/bootgraph.py $(DESTDIR)$(PREFIX)/bin/bootgraph
- ln -s ../lib/pm-graph/sleepgraph.py $(DESTDIR)$(PREFIX)/bin/sleepgraph
-
- install -d $(DESTDIR)$(PREFIX)/share/man/man8
- install bootgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8
- install sleepgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8
+ $(INSTALL) -d $(DESTDIR)$(LIBDIR)/pm-graph
+ $(INSTALL) sleepgraph.py $(DESTDIR)$(LIBDIR)/pm-graph
+ $(INSTALL) bootgraph.py $(DESTDIR)$(LIBDIR)/pm-graph
+ $(INSTALL) -d $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/cgskip.txt $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/freeze-callgraph.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/freeze.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/freeze-dev.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/standby-callgraph.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/standby.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/standby-dev.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/suspend-callgraph.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/suspend.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/suspend-dev.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/suspend-x2-proc.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+
+ $(INSTALL) -d $(DESTDIR)$(BINDIR)
+ ln -s ../lib/pm-graph/bootgraph.py $(DESTDIR)$(BINDIR)/bootgraph
+ ln -s ../lib/pm-graph/sleepgraph.py $(DESTDIR)$(BINDIR)/sleepgraph
+
+ $(INSTALL) -d $(DESTDIR)$(MANDIR)/man8
+ $(INSTALL) bootgraph.8 $(DESTDIR)$(MANDIR)/man8
+ $(INSTALL) sleepgraph.8 $(DESTDIR)$(MANDIR)/man8
uninstall :
- rm -f $(DESTDIR)$(PREFIX)/share/man/man8/bootgraph.8
- rm -f $(DESTDIR)$(PREFIX)/share/man/man8/sleepgraph.8
+ rm -f $(DESTDIR)$(MANDIR)/man8/bootgraph.8
+ rm -f $(DESTDIR)$(MANDIR)/man8/sleepgraph.8
- rm -f $(DESTDIR)$(PREFIX)/bin/bootgraph
- rm -f $(DESTDIR)$(PREFIX)/bin/sleepgraph
+ rm -f $(DESTDIR)$(BINDIR)/bootgraph
+ rm -f $(DESTDIR)$(BINDIR)/sleepgraph
- rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/config/*
- if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph/config ] ; then \
- rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph/config; \
+ rm -f $(DESTDIR)$(LIBDIR)/pm-graph/config/*
+ if [ -d $(DESTDIR)$(LIBDIR)/pm-graph/config ] ; then \
+ rmdir $(DESTDIR)$(LIBDIR)/pm-graph/config; \
fi;
- rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__/*
- if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__ ] ; then \
- rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__; \
+ rm -f $(DESTDIR)$(LIBDIR)/pm-graph/__pycache__/*
+ if [ -d $(DESTDIR)$(LIBDIR)/pm-graph/__pycache__ ] ; then \
+ rmdir $(DESTDIR)$(LIBDIR)/pm-graph/__pycache__; \
fi;
- rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/*
- if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph ] ; then \
- rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph; \
+ rm -f $(DESTDIR)$(LIBDIR)/pm-graph/*
+ if [ -d $(DESTDIR)$(LIBDIR)/pm-graph ] ; then \
+ rmdir $(DESTDIR)$(LIBDIR)/pm-graph; \
fi;
+
+help:
+ @echo 'Building targets:'
+ @echo ' all - Nothing to build'
+ @echo ' install - Install the program and create necessary directories'
+ @echo ' uninstall - Remove installed files and directories'
+
+.PHONY: all install uninstall help
diff --git a/tools/rcu/rcu-updaters.sh b/tools/rcu/rcu-updaters.sh
index 4ef1397927bb..8a5df3f22550 100755
--- a/tools/rcu/rcu-updaters.sh
+++ b/tools/rcu/rcu-updaters.sh
@@ -21,12 +21,10 @@ fi
bpftrace -e 'kprobe:kvfree_call_rcu,
kprobe:call_rcu,
kprobe:call_rcu_tasks,
- kprobe:call_rcu_tasks_rude,
kprobe:call_rcu_tasks_trace,
kprobe:call_srcu,
kprobe:rcu_barrier,
kprobe:rcu_barrier_tasks,
- kprobe:rcu_barrier_tasks_rude,
kprobe:rcu_barrier_tasks_trace,
kprobe:srcu_barrier,
kprobe:synchronize_rcu,
diff --git a/tools/sound/dapm-graph b/tools/sound/dapm-graph
index 57d78f6df041..f14bdfedee8f 100755
--- a/tools/sound/dapm-graph
+++ b/tools/sound/dapm-graph
@@ -8,6 +8,8 @@
set -eu
+STYLE_COMPONENT_ON="color=dodgerblue;style=bold"
+STYLE_COMPONENT_OFF="color=gray40;style=filled;fillcolor=gray90"
STYLE_NODE_ON="shape=box,style=bold,color=green4"
STYLE_NODE_OFF="shape=box,style=filled,color=gray30,fillcolor=gray95"
@@ -132,11 +134,17 @@ process_dapm_widget()
# Collect any links. We could use "in" links or "out" links,
# let's use "in" links
if echo "${line}" | grep -q '^in '; then
+ local w_route=$(echo "$line" | awk -F\" '{print $2}')
local w_src=$(echo "$line" |
awk -F\" '{print $6 "_" $4}' |
sed 's/^(null)_/ROOT_/')
dbg_echo " - Input route from: ${w_src}"
- echo " \"${w_src}\" -> \"$w_tag\"" >> "${links_file}"
+ dbg_echo " - Route: ${w_route}"
+ local w_edge_attrs=""
+ if [ "${w_route}" != "static" ]; then
+ w_edge_attrs=" [label=\"${w_route}\"]"
+ fi
+ echo " \"${w_src}\" -> \"$w_tag\"${w_edge_attrs}" >> "${links_file}"
fi
done
@@ -150,16 +158,20 @@ process_dapm_widget()
#
# $1 = temporary work dir
# $2 = component directory
-# $3 = forced component name (extracted for path if empty)
+# $3 = "ROOT" for the root card directory, empty otherwise
process_dapm_component()
{
local tmp_dir="${1}"
local c_dir="${2}"
local c_name="${3}"
+ local is_component=0
local dot_file="${tmp_dir}/main.dot"
local links_file="${tmp_dir}/links.dot"
+ local c_attribs=""
if [ -z "${c_name}" ]; then
+ is_component=1
+
# Extract directory name into component name:
# "./cs42l51.0-004a/dapm" -> "cs42l51.0-004a"
c_name="$(basename $(dirname "${c_dir}"))"
@@ -167,11 +179,23 @@ process_dapm_component()
dbg_echo " * Component: ${c_name}"
- echo "" >> "${dot_file}"
- echo " subgraph \"${c_name}\" {" >> "${dot_file}"
- echo " cluster = true" >> "${dot_file}"
- echo " label = \"${c_name}\"" >> "${dot_file}"
- echo " color=dodgerblue" >> "${dot_file}"
+ if [ ${is_component} = 1 ]; then
+ if [ -f "${c_dir}/bias_level" ]; then
+ c_onoff=$(sed -n -e 1p "${c_dir}/bias_level" | awk '{print $1}')
+ dbg_echo " - bias_level: ${c_onoff}"
+ if [ "$c_onoff" = "On" ]; then
+ c_attribs="${STYLE_COMPONENT_ON}"
+ elif [ "$c_onoff" = "Off" ]; then
+ c_attribs="${STYLE_COMPONENT_OFF}"
+ fi
+ fi
+
+ echo "" >> "${dot_file}"
+ echo " subgraph \"${c_name}\" {" >> "${dot_file}"
+ echo " cluster = true" >> "${dot_file}"
+ echo " label = \"${c_name}\"" >> "${dot_file}"
+ echo " ${c_attribs}" >> "${dot_file}"
+ fi
# Create empty file to ensure it will exist in all cases
>"${links_file}"
@@ -181,7 +205,9 @@ process_dapm_component()
process_dapm_widget "${tmp_dir}" "${c_name}" "${w_file}"
done
- echo " }" >> "${dot_file}"
+ if [ ${is_component} = 1 ]; then
+ echo " }" >> "${dot_file}"
+ fi
cat "${links_file}" >> "${dot_file}"
}
@@ -200,7 +226,7 @@ process_dapm_tree()
echo "digraph G {" > "${dot_file}"
echo " fontname=\"sans-serif\"" >> "${dot_file}"
echo " node [fontname=\"sans-serif\"]" >> "${dot_file}"
-
+ echo " edge [fontname=\"sans-serif\"]" >> "${dot_file}"
# Process root directory (no component)
process_dapm_component "${tmp_dir}" "${dapm_dir}/dapm" "ROOT"
diff --git a/tools/spi/spidev_fdx.c b/tools/spi/spidev_fdx.c
index 7d2a867cd4ae..bc9c4f6c3ba8 100644
--- a/tools/spi/spidev_fdx.c
+++ b/tools/spi/spidev_fdx.c
@@ -99,7 +99,7 @@ static void dumpstat(const char *name, int fd)
return;
}
- printf("%s: spi mode 0x%x, %d bits %sper word, %d Hz max\n",
+ printf("%s: spi mode 0x%x, %d bits %sper word, %u Hz max\n",
name, mode, bits, lsb ? "(lsb first) " : "", speed);
}
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 030b388800f0..3d1ca9e38b1f 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -14,6 +14,7 @@ ldflags-y += --wrap=cxl_dvsec_rr_decode
ldflags-y += --wrap=devm_cxl_add_rch_dport
ldflags-y += --wrap=cxl_rcd_component_reg_phys
ldflags-y += --wrap=cxl_endpoint_parse_cdat
+ldflags-y += --wrap=cxl_setup_parent_dport
DRIVERS := ../../../drivers
CXL_SRC := $(DRIVERS)/cxl
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 6f737941dc0e..d619672faa49 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -299,6 +299,18 @@ void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port)
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL);
+void __wrap_cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport)
+{
+ int index;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (!ops || !ops->is_mock_port(dport->dport_dev))
+ cxl_setup_parent_dport(host, dport);
+
+ put_cxl_mock_ops(index);
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_cxl_setup_parent_dport, CXL);
+
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(ACPI);
MODULE_IMPORT_NS(CXL);
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 7254c110ff23..61931c4926fd 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -72,7 +72,8 @@ class LinuxSourceTreeOperations:
raise ConfigError(e.output.decode())
def make(self, jobs: int, build_dir: str, make_options: Optional[List[str]]) -> None:
- command = ['make', 'ARCH=' + self._linux_arch, 'O=' + build_dir, '--jobs=' + str(jobs)]
+ command = ['make', 'all', 'compile_commands.json', 'ARCH=' + self._linux_arch,
+ 'O=' + build_dir, '--jobs=' + str(jobs)]
if make_options:
command.extend(make_options)
if self._cross_compile:
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index bc8fe9e8f7f2..b38199965f99 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
+TARGETS += acct
TARGETS += alsa
TARGETS += amd-pstate
TARGETS += arm64
@@ -65,9 +66,11 @@ TARGETS += net/af_unix
TARGETS += net/forwarding
TARGETS += net/hsr
TARGETS += net/mptcp
+TARGETS += net/netfilter
TARGETS += net/openvswitch
+TARGETS += net/packetdrill
+TARGETS += net/rds
TARGETS += net/tcp_ao
-TARGETS += net/netfilter
TARGETS += nsfs
TARGETS += perf_events
TARGETS += pidfd
@@ -107,7 +110,6 @@ TARGETS += tmpfs
TARGETS += tpm2
TARGETS += tty
TARGETS += uevent
-TARGETS += user
TARGETS += user_events
TARGETS += vDSO
TARGETS += mm
diff --git a/tools/testing/selftests/acct/.gitignore b/tools/testing/selftests/acct/.gitignore
new file mode 100644
index 000000000000..7e78aac19038
--- /dev/null
+++ b/tools/testing/selftests/acct/.gitignore
@@ -0,0 +1,3 @@
+acct_syscall
+config
+process_log \ No newline at end of file
diff --git a/tools/testing/selftests/acct/Makefile b/tools/testing/selftests/acct/Makefile
new file mode 100644
index 000000000000..7e025099cf65
--- /dev/null
+++ b/tools/testing/selftests/acct/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := acct_syscall
+CFLAGS += -Wall
+
+include ../lib.mk \ No newline at end of file
diff --git a/tools/testing/selftests/acct/acct_syscall.c b/tools/testing/selftests/acct/acct_syscall.c
new file mode 100644
index 000000000000..e44e8fe1f4a3
--- /dev/null
+++ b/tools/testing/selftests/acct/acct_syscall.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* kselftest for acct() system call
+ * The acct() system call enables or disables process accounting.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
+
+int main(void)
+{
+ char filename[] = "process_log";
+ FILE *fp;
+ pid_t child_pid;
+ int sz;
+
+ // Setting up kselftest framework
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ // Check if test is run a root
+ if (geteuid()) {
+ ksft_test_result_skip("This test needs root to run!\n");
+ return 1;
+ }
+
+ // Create file to log closed processes
+ fp = fopen(filename, "w");
+
+ if (!fp) {
+ ksft_test_result_error("%s.\n", strerror(errno));
+ ksft_finished();
+ return 1;
+ }
+
+ acct(filename);
+
+ // Handle error conditions
+ if (errno) {
+ ksft_test_result_error("%s.\n", strerror(errno));
+ fclose(fp);
+ ksft_finished();
+ return 1;
+ }
+
+ // Create child process and wait for it to terminate.
+
+ child_pid = fork();
+
+ if (child_pid < 0) {
+ ksft_test_result_error("Creating a child process to log failed\n");
+ acct(NULL);
+ return 1;
+ } else if (child_pid > 0) {
+ wait(NULL);
+ fseek(fp, 0L, SEEK_END);
+ sz = ftell(fp);
+
+ acct(NULL);
+
+ if (sz <= 0) {
+ ksft_test_result_fail("Terminated child process not logged\n");
+ ksft_exit_fail();
+ return 1;
+ }
+
+ ksft_test_result_pass("Successfully logged terminated process.\n");
+ fclose(fp);
+ ksft_exit_pass();
+ return 0;
+ }
+
+ return 1;
+}
diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile
index c1ce39874e2b..25be68025290 100644
--- a/tools/testing/selftests/alsa/Makefile
+++ b/tools/testing/selftests/alsa/Makefile
@@ -12,9 +12,9 @@ LDLIBS+=-lpthread
OVERRIDE_TARGETS = 1
-TEST_GEN_PROGS := mixer-test pcm-test test-pcmtest-driver
+TEST_GEN_PROGS := mixer-test pcm-test test-pcmtest-driver utimer-test
-TEST_GEN_PROGS_EXTENDED := libatest.so
+TEST_GEN_PROGS_EXTENDED := libatest.so global-timer
TEST_FILES := conf.d pcm-test.conf
diff --git a/tools/testing/selftests/alsa/global-timer.c b/tools/testing/selftests/alsa/global-timer.c
new file mode 100644
index 000000000000..c15ec0ba851a
--- /dev/null
+++ b/tools/testing/selftests/alsa/global-timer.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This tool is used by the utimer test, and it allows us to
+ * count the ticks of a global timer in a certain time frame
+ * (which is set by `timeout` parameter).
+ *
+ * Author: Ivan Orlov <ivan.orlov0322@gmail.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <alsa/asoundlib.h>
+#include <time.h>
+
+static int ticked;
+static void async_callback(snd_async_handler_t *ahandler)
+{
+ ticked++;
+}
+
+static char timer_name[64];
+static void bind_to_timer(int device, int subdevice, int timeout)
+{
+ snd_timer_t *handle;
+ snd_timer_params_t *params;
+ snd_async_handler_t *ahandler;
+
+ time_t end;
+
+ sprintf(timer_name, "hw:CLASS=%d,SCLASS=%d,DEV=%d,SUBDEV=%d",
+ SND_TIMER_CLASS_GLOBAL, SND_TIMER_SCLASS_NONE,
+ device, subdevice);
+
+ snd_timer_params_alloca(&params);
+
+ if (snd_timer_open(&handle, timer_name, SND_TIMER_OPEN_NONBLOCK) < 0) {
+ perror("Can't open the timer");
+ exit(EXIT_FAILURE);
+ }
+
+ snd_timer_params_set_auto_start(params, 1);
+ snd_timer_params_set_ticks(params, 1);
+ if (snd_timer_params(handle, params) < 0) {
+ perror("Can't set timer params");
+ exit(EXIT_FAILURE);
+ }
+
+ if (snd_async_add_timer_handler(&ahandler, handle, async_callback, NULL) < 0) {
+ perror("Can't create a handler");
+ exit(EXIT_FAILURE);
+ }
+ end = time(NULL) + timeout;
+ if (snd_timer_start(handle) < 0) {
+ perror("Failed to start the timer");
+ exit(EXIT_FAILURE);
+ }
+ printf("Timer has started\n");
+ while (time(NULL) <= end) {
+ /*
+ * Waiting for the timeout to elapse. Can't use sleep here, as it gets
+ * constantly interrupted by the signal from the timer (SIGIO)
+ */
+ }
+ snd_timer_stop(handle);
+ snd_timer_close(handle);
+}
+
+int main(int argc, char *argv[])
+{
+ int device, subdevice, timeout;
+
+ if (argc < 4) {
+ perror("Usage: %s <device> <subdevice> <timeout>");
+ return EXIT_FAILURE;
+ }
+
+ setlinebuf(stdout);
+
+ device = atoi(argv[1]);
+ subdevice = atoi(argv[2]);
+ timeout = atoi(argv[3]);
+
+ bind_to_timer(device, subdevice, timeout);
+
+ printf("Total ticks count: %d\n", ticked);
+
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/alsa/utimer-test.c b/tools/testing/selftests/alsa/utimer-test.c
new file mode 100644
index 000000000000..32ee3ce57721
--- /dev/null
+++ b/tools/testing/selftests/alsa/utimer-test.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This test covers the functionality of userspace-driven ALSA timers. Such timers
+ * are purely virtual (so they don't directly depend on the hardware), and they could be
+ * created and triggered by userspace applications.
+ *
+ * Author: Ivan Orlov <ivan.orlov0322@gmail.com>
+ */
+#include "../kselftest_harness.h"
+#include <sound/asound.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <string.h>
+
+#define FRAME_RATE 8000
+#define PERIOD_SIZE 4410
+#define UTIMER_DEFAULT_ID -1
+#define UTIMER_DEFAULT_FD -1
+#define NANO 1000000000ULL
+#define TICKS_COUNT 10
+#define TICKS_RECORDING_DELTA 5
+#define TIMER_OUTPUT_BUF_LEN 1024
+#define TIMER_FREQ_SEC 1
+#define RESULT_PREFIX_LEN strlen("Total ticks count: ")
+
+enum timer_app_event {
+ TIMER_APP_STARTED,
+ TIMER_APP_RESULT,
+ TIMER_NO_EVENT,
+};
+
+FIXTURE(timer_f) {
+ struct snd_timer_uinfo *utimer_info;
+};
+
+FIXTURE_SETUP(timer_f) {
+ int timer_dev_fd;
+
+ if (geteuid())
+ SKIP(return, "This test needs root to run!");
+
+ self->utimer_info = calloc(1, sizeof(*self->utimer_info));
+ ASSERT_NE(NULL, self->utimer_info);
+
+ /* Resolution is the time the period of frames takes in nanoseconds */
+ self->utimer_info->resolution = (NANO / FRAME_RATE * PERIOD_SIZE);
+
+ timer_dev_fd = open("/dev/snd/timer", O_RDONLY);
+ ASSERT_GE(timer_dev_fd, 0);
+
+ ASSERT_EQ(ioctl(timer_dev_fd, SNDRV_TIMER_IOCTL_CREATE, self->utimer_info), 0);
+ ASSERT_GE(self->utimer_info->fd, 0);
+
+ close(timer_dev_fd);
+}
+
+FIXTURE_TEARDOWN(timer_f) {
+ close(self->utimer_info->fd);
+ free(self->utimer_info);
+}
+
+static void *ticking_func(void *data)
+{
+ int i;
+ int *fd = (int *)data;
+
+ for (i = 0; i < TICKS_COUNT; i++) {
+ /* Well, trigger the timer! */
+ ioctl(*fd, SNDRV_TIMER_IOCTL_TRIGGER, NULL);
+ sleep(TIMER_FREQ_SEC);
+ }
+
+ return NULL;
+}
+
+static enum timer_app_event parse_timer_output(const char *s)
+{
+ if (strstr(s, "Timer has started"))
+ return TIMER_APP_STARTED;
+ if (strstr(s, "Total ticks count"))
+ return TIMER_APP_RESULT;
+
+ return TIMER_NO_EVENT;
+}
+
+static int parse_timer_result(const char *s)
+{
+ char *end;
+ long d;
+
+ d = strtol(s + RESULT_PREFIX_LEN, &end, 10);
+ if (end == s + RESULT_PREFIX_LEN)
+ return -1;
+
+ return d;
+}
+
+/*
+ * This test triggers the timer and counts ticks at the same time. The amount
+ * of the timer trigger calls should be equal to the amount of ticks received.
+ */
+TEST_F(timer_f, utimer) {
+ char command[64];
+ pthread_t ticking_thread;
+ int total_ticks = 0;
+ FILE *rfp;
+ char *buf = malloc(TIMER_OUTPUT_BUF_LEN);
+
+ ASSERT_NE(buf, NULL);
+
+ /* The timeout should be the ticks interval * count of ticks + some delta */
+ sprintf(command, "./global-timer %d %d %d", SNDRV_TIMER_GLOBAL_UDRIVEN,
+ self->utimer_info->id, TICKS_COUNT * TIMER_FREQ_SEC + TICKS_RECORDING_DELTA);
+
+ rfp = popen(command, "r");
+ while (fgets(buf, TIMER_OUTPUT_BUF_LEN, rfp)) {
+ buf[TIMER_OUTPUT_BUF_LEN - 1] = 0;
+ switch (parse_timer_output(buf)) {
+ case TIMER_APP_STARTED:
+ /* global-timer waits for timer to trigger, so start the ticking thread */
+ pthread_create(&ticking_thread, NULL, ticking_func,
+ &self->utimer_info->fd);
+ break;
+ case TIMER_APP_RESULT:
+ total_ticks = parse_timer_result(buf);
+ break;
+ case TIMER_NO_EVENT:
+ break;
+ }
+ }
+ pthread_join(ticking_thread, NULL);
+ ASSERT_EQ(total_ticks, TICKS_COUNT);
+ pclose(rfp);
+}
+
+TEST(wrong_timers_test) {
+ int timer_dev_fd;
+ int utimer_fd;
+ size_t i;
+ struct snd_timer_uinfo wrong_timer = {
+ .resolution = 0,
+ .id = UTIMER_DEFAULT_ID,
+ .fd = UTIMER_DEFAULT_FD,
+ };
+
+ timer_dev_fd = open("/dev/snd/timer", O_RDONLY);
+ ASSERT_GE(timer_dev_fd, 0);
+
+ utimer_fd = ioctl(timer_dev_fd, SNDRV_TIMER_IOCTL_CREATE, &wrong_timer);
+ ASSERT_LT(utimer_fd, 0);
+ /* Check that id was not updated */
+ ASSERT_EQ(wrong_timer.id, UTIMER_DEFAULT_ID);
+
+ /* Test the NULL as an argument is processed correctly */
+ ASSERT_LT(ioctl(timer_dev_fd, SNDRV_TIMER_IOCTL_CREATE, NULL), 0);
+
+ close(timer_dev_fd);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index d8909b2b535a..f2d6007a2b98 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -156,6 +156,12 @@ static void pmull_sigill(void)
asm volatile(".inst 0x0ee0e000" : : : );
}
+static void poe_sigill(void)
+{
+ /* mrs x0, POR_EL0 */
+ asm volatile("mrs x0, S3_3_C10_C2_4" : : : "x0");
+}
+
static void rng_sigill(void)
{
asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0");
@@ -602,6 +608,14 @@ static const struct hwcap_data {
.sigill_fn = pmull_sigill,
},
{
+ .name = "POE",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_POE,
+ .cpuinfo = "poe",
+ .sigill_fn = poe_sigill,
+ .sigill_reliable = true,
+ },
+ {
.name = "RNG",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_RNG,
diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c
index e4fa507cbdd0..b51d21f78cf9 100644
--- a/tools/testing/selftests/arm64/abi/ptrace.c
+++ b/tools/testing/selftests/arm64/abi/ptrace.c
@@ -163,10 +163,10 @@ static void test_hw_debug(pid_t child, int type, const char *type_name)
static int do_child(void)
{
if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
- ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+ ksft_exit_fail_perror("PTRACE_TRACEME");
if (raise(SIGSTOP))
- ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+ ksft_exit_fail_perror("raise(SIGSTOP)");
return EXIT_SUCCESS;
}
diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore
index 1ce5b5eac386..b2f2bfd5c6aa 100644
--- a/tools/testing/selftests/arm64/signal/.gitignore
+++ b/tools/testing/selftests/arm64/signal/.gitignore
@@ -2,6 +2,7 @@
mangle_*
fake_sigreturn_*
fpmr_*
+poe_*
sme_*
ssve_*
sve_*
diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile
index 8f5febaf1a9a..edb3613513b8 100644
--- a/tools/testing/selftests/arm64/signal/Makefile
+++ b/tools/testing/selftests/arm64/signal/Makefile
@@ -23,7 +23,7 @@ $(TEST_GEN_PROGS): $(PROGS)
# Common test-unit targets to build common-layout test-cases executables
# Needs secondary expansion to properly include the testcase c-file in pre-reqs
COMMON_SOURCES := test_signals.c test_signals_utils.c testcases/testcases.c \
- signals.S
+ signals.S sve_helpers.c
COMMON_HEADERS := test_signals.h test_signals_utils.h testcases/testcases.h
.SECONDEXPANSION:
diff --git a/tools/testing/selftests/arm64/signal/sve_helpers.c b/tools/testing/selftests/arm64/signal/sve_helpers.c
new file mode 100644
index 000000000000..0acc121af306
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/sve_helpers.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 ARM Limited
+ *
+ * Common helper functions for SVE and SME functionality.
+ */
+
+#include <stdbool.h>
+#include <kselftest.h>
+#include <asm/sigcontext.h>
+#include <sys/prctl.h>
+
+unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls;
+
+int sve_fill_vls(bool use_sme, int min_vls)
+{
+ int vq, vl;
+ int pr_set_vl = use_sme ? PR_SME_SET_VL : PR_SVE_SET_VL;
+ int len_mask = use_sme ? PR_SME_VL_LEN_MASK : PR_SVE_VL_LEN_MASK;
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(pr_set_vl, vq * 16);
+ if (vl == -1)
+ return KSFT_FAIL;
+
+ vl &= len_mask;
+
+ /*
+ * Unlike SVE, SME does not require the minimum vector length
+ * to be implemented, or the VLs to be consecutive, so any call
+ * to the prctl might return the single implemented VL, which
+ * might be larger than 16. So to avoid this loop never
+ * terminating, bail out here when we find a higher VL than
+ * we asked for.
+ * See the ARM ARM, DDI 0487K.a, B1.4.2: I_QQRNR and I_NWYBP.
+ */
+ if (vq < sve_vq_from_vl(vl))
+ break;
+
+ /* Skip missing VLs */
+ vq = sve_vq_from_vl(vl);
+
+ vls[nvls++] = vl;
+ }
+
+ if (nvls < min_vls) {
+ fprintf(stderr, "Only %d VL supported\n", nvls);
+ return KSFT_SKIP;
+ }
+
+ return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/arm64/signal/sve_helpers.h b/tools/testing/selftests/arm64/signal/sve_helpers.h
new file mode 100644
index 000000000000..50948ce471cc
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/sve_helpers.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2024 ARM Limited
+ *
+ * Common helper functions for SVE and SME functionality.
+ */
+
+#ifndef __SVE_HELPERS_H__
+#define __SVE_HELPERS_H__
+
+#include <stdbool.h>
+
+#define VLS_USE_SVE false
+#define VLS_USE_SME true
+
+extern unsigned int vls[];
+extern unsigned int nvls;
+
+int sve_fill_vls(bool use_sme, int min_vls);
+
+#endif
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
index ebd5815b54bb..dfd6a2badf9f 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
@@ -6,44 +6,28 @@
* handler, this is not supported and is expected to segfault.
*/
+#include <kselftest.h>
#include <signal.h>
#include <ucontext.h>
#include <sys/prctl.h>
#include "test_signals_utils.h"
+#include "sve_helpers.h"
#include "testcases.h"
struct fake_sigframe sf;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
static bool sme_get_vls(struct tdescr *td)
{
- int vq, vl;
+ int res = sve_fill_vls(VLS_USE_SME, 2);
- /*
- * Enumerate up to SVE_VQ_MAX vector lengths
- */
- for (vq = SVE_VQ_MAX; vq > 0; --vq) {
- vl = prctl(PR_SVE_SET_VL, vq * 16);
- if (vl == -1)
- return false;
+ if (!res)
+ return true;
- vl &= PR_SME_VL_LEN_MASK;
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
- /* Skip missing VLs */
- vq = sve_vq_from_vl(vl);
-
- vls[nvls++] = vl;
- }
-
- /* We need at least two VLs */
- if (nvls < 2) {
- fprintf(stderr, "Only %d VL supported\n", nvls);
- return false;
- }
-
- return true;
+ return false;
}
static int fake_sigreturn_ssve_change_vl(struct tdescr *td,
@@ -51,30 +35,30 @@ static int fake_sigreturn_ssve_change_vl(struct tdescr *td,
{
size_t resv_sz, offset;
struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
- struct sve_context *sve;
+ struct za_context *za;
/* Get a signal context with a SME ZA frame in it */
if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
return 1;
resv_sz = GET_SF_RESV_SIZE(sf);
- head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+ head = get_header(head, ZA_MAGIC, resv_sz, &offset);
if (!head) {
- fprintf(stderr, "No SVE context\n");
+ fprintf(stderr, "No ZA context\n");
return 1;
}
- if (head->size != sizeof(struct sve_context)) {
+ if (head->size != sizeof(struct za_context)) {
fprintf(stderr, "Register data present, aborting\n");
return 1;
}
- sve = (struct sve_context *)head;
+ za = (struct za_context *)head;
/* No changes are supported; init left us at minimum VL so go to max */
fprintf(stderr, "Attempting to change VL from %d to %d\n",
- sve->vl, vls[0]);
- sve->vl = vls[0];
+ za->vl, vls[0]);
+ za->vl = vls[0];
fake_sigreturn(&sf, sizeof(sf), 0);
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
index e2a452190511..e1ccf8f85a70 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
@@ -12,40 +12,22 @@
#include <sys/prctl.h>
#include "test_signals_utils.h"
+#include "sve_helpers.h"
#include "testcases.h"
struct fake_sigframe sf;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
static bool sve_get_vls(struct tdescr *td)
{
- int vq, vl;
+ int res = sve_fill_vls(VLS_USE_SVE, 2);
- /*
- * Enumerate up to SVE_VQ_MAX vector lengths
- */
- for (vq = SVE_VQ_MAX; vq > 0; --vq) {
- vl = prctl(PR_SVE_SET_VL, vq * 16);
- if (vl == -1)
- return false;
+ if (!res)
+ return true;
- vl &= PR_SVE_VL_LEN_MASK;
-
- /* Skip missing VLs */
- vq = sve_vq_from_vl(vl);
-
- vls[nvls++] = vl;
- }
-
- /* We need at least two VLs */
- if (nvls < 2) {
- fprintf(stderr, "Only %d VL supported\n", nvls);
+ if (res == KSFT_SKIP)
td->result = KSFT_SKIP;
- return false;
- }
- return true;
+ return false;
}
static int fake_sigreturn_sve_change_vl(struct tdescr *td,
diff --git a/tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c b/tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c
new file mode 100644
index 000000000000..36bd9940ee05
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Arm Limited
+ *
+ * Verify that the POR_EL0 register context in signal frames is set up as
+ * expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+#include <asm/sigcontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 128];
+} context;
+
+#define SYS_POR_EL0 "S3_3_C10_C2_4"
+
+static uint64_t get_por_el0(void)
+{
+ uint64_t val;
+
+ asm volatile(
+ "mrs %0, " SYS_POR_EL0 "\n"
+ : "=r"(val)
+ :
+ : );
+
+ return val;
+}
+
+int poe_present(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct poe_context *poe_ctx;
+ size_t offset;
+ bool in_sigframe;
+ bool have_poe;
+ __u64 orig_poe;
+
+ have_poe = getauxval(AT_HWCAP2) & HWCAP2_POE;
+ if (have_poe)
+ orig_poe = get_por_el0();
+
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ poe_ctx = (struct poe_context *)
+ get_header(head, POE_MAGIC, td->live_sz, &offset);
+
+ in_sigframe = poe_ctx != NULL;
+
+ fprintf(stderr, "POR_EL0 sigframe %s on system %s POE\n",
+ in_sigframe ? "present" : "absent",
+ have_poe ? "with" : "without");
+
+ td->pass = (in_sigframe == have_poe);
+
+ /*
+ * Check that the value we read back was the one present at
+ * the time that the signal was triggered.
+ */
+ if (have_poe && poe_ctx) {
+ if (poe_ctx->por_el0 != orig_poe) {
+ fprintf(stderr, "POR_EL0 in frame is %llx, was %llx\n",
+ poe_ctx->por_el0, orig_poe);
+ td->pass = false;
+ }
+ }
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "POR_EL0",
+ .descr = "Validate that POR_EL0 is present as expected",
+ .timeout = 3,
+ .run = poe_present,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
index 3d37daafcff5..6dbe48cf8b09 100644
--- a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
@@ -6,51 +6,31 @@
* set up as expected.
*/
+#include <kselftest.h>
#include <signal.h>
#include <ucontext.h>
#include <sys/prctl.h>
#include "test_signals_utils.h"
+#include "sve_helpers.h"
#include "testcases.h"
static union {
ucontext_t uc;
char buf[1024 * 64];
} context;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
static bool sme_get_vls(struct tdescr *td)
{
- int vq, vl;
+ int res = sve_fill_vls(VLS_USE_SME, 1);
- /*
- * Enumerate up to SVE_VQ_MAX vector lengths
- */
- for (vq = SVE_VQ_MAX; vq > 0; --vq) {
- vl = prctl(PR_SME_SET_VL, vq * 16);
- if (vl == -1)
- return false;
-
- vl &= PR_SME_VL_LEN_MASK;
-
- /* Did we find the lowest supported VL? */
- if (vq < sve_vq_from_vl(vl))
- break;
+ if (!res)
+ return true;
- /* Skip missing VLs */
- vq = sve_vq_from_vl(vl);
-
- vls[nvls++] = vl;
- }
-
- /* We need at least one VL */
- if (nvls < 1) {
- fprintf(stderr, "Only %d VL supported\n", nvls);
- return false;
- }
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
- return true;
+ return false;
}
static void setup_ssve_regs(void)
diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c
index 9dc5f128bbc0..5557e116e973 100644
--- a/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c
@@ -6,51 +6,31 @@
* signal frames is set up as expected when enabled simultaneously.
*/
+#include <kselftest.h>
#include <signal.h>
#include <ucontext.h>
#include <sys/prctl.h>
#include "test_signals_utils.h"
+#include "sve_helpers.h"
#include "testcases.h"
static union {
ucontext_t uc;
char buf[1024 * 128];
} context;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
static bool sme_get_vls(struct tdescr *td)
{
- int vq, vl;
+ int res = sve_fill_vls(VLS_USE_SME, 1);
- /*
- * Enumerate up to SVE_VQ_MAX vector lengths
- */
- for (vq = SVE_VQ_MAX; vq > 0; --vq) {
- vl = prctl(PR_SME_SET_VL, vq * 16);
- if (vl == -1)
- return false;
-
- vl &= PR_SME_VL_LEN_MASK;
-
- /* Did we find the lowest supported VL? */
- if (vq < sve_vq_from_vl(vl))
- break;
+ if (!res)
+ return true;
- /* Skip missing VLs */
- vq = sve_vq_from_vl(vl);
-
- vls[nvls++] = vl;
- }
-
- /* We need at least one VL */
- if (nvls < 1) {
- fprintf(stderr, "Only %d VL supported\n", nvls);
- return false;
- }
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
- return true;
+ return false;
}
static void setup_regs(void)
diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
index 8b16eabbb769..8143eb1c58c1 100644
--- a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
@@ -6,47 +6,31 @@
* expected.
*/
+#include <kselftest.h>
#include <signal.h>
#include <ucontext.h>
#include <sys/prctl.h>
#include "test_signals_utils.h"
+#include "sve_helpers.h"
#include "testcases.h"
static union {
ucontext_t uc;
char buf[1024 * 64];
} context;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
static bool sve_get_vls(struct tdescr *td)
{
- int vq, vl;
+ int res = sve_fill_vls(VLS_USE_SVE, 1);
- /*
- * Enumerate up to SVE_VQ_MAX vector lengths
- */
- for (vq = SVE_VQ_MAX; vq > 0; --vq) {
- vl = prctl(PR_SVE_SET_VL, vq * 16);
- if (vl == -1)
- return false;
-
- vl &= PR_SVE_VL_LEN_MASK;
-
- /* Skip missing VLs */
- vq = sve_vq_from_vl(vl);
+ if (!res)
+ return true;
- vls[nvls++] = vl;
- }
-
- /* We need at least one VL */
- if (nvls < 1) {
- fprintf(stderr, "Only %d VL supported\n", nvls);
- return false;
- }
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
- return true;
+ return false;
}
static void setup_sve_regs(void)
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c
index 674b88cc8c39..e6daa94fcd2e 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.c
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c
@@ -6,29 +6,6 @@
#include "testcases.h"
-struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
- size_t resv_sz, size_t *offset)
-{
- size_t offs = 0;
- struct _aarch64_ctx *found = NULL;
-
- if (!head || resv_sz < HDR_SZ)
- return found;
-
- while (offs <= resv_sz - HDR_SZ &&
- head->magic != magic && head->magic) {
- offs += head->size;
- head = GET_RESV_NEXT_HEAD(head);
- }
- if (head->magic == magic) {
- found = head;
- if (offset)
- *offset = offs;
- }
-
- return found;
-}
-
bool validate_extra_context(struct extra_context *extra, char **err,
void **extra_data, size_t *extra_size)
{
@@ -184,6 +161,10 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
if (head->size != sizeof(struct esr_context))
*err = "Bad size for esr_context";
break;
+ case POE_MAGIC:
+ if (head->size != sizeof(struct poe_context))
+ *err = "Bad size for poe_context";
+ break;
case TPIDR2_MAGIC:
if (head->size != sizeof(struct tpidr2_context))
*err = "Bad size for tpidr2_context";
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h
index 7727126347e0..9872b8912714 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.h
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h
@@ -26,6 +26,9 @@
#define HDR_SZ \
sizeof(struct _aarch64_ctx)
+#define GET_UC_RESV_HEAD(uc) \
+ (struct _aarch64_ctx *)(&(uc->uc_mcontext.__reserved))
+
#define GET_SF_RESV_HEAD(sf) \
(struct _aarch64_ctx *)(&(sf).uc.uc_mcontext.__reserved)
@@ -88,8 +91,29 @@ struct fake_sigframe {
bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err);
-struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
- size_t resv_sz, size_t *offset);
+static inline struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
+ size_t resv_sz, size_t *offset)
+{
+ size_t offs = 0;
+ struct _aarch64_ctx *found = NULL;
+
+ if (!head || resv_sz < HDR_SZ)
+ return found;
+
+ while (offs <= resv_sz - HDR_SZ &&
+ head->magic != magic && head->magic) {
+ offs += head->size;
+ head = GET_RESV_NEXT_HEAD(head);
+ }
+ if (head->magic == magic) {
+ found = head;
+ if (offset)
+ *offset = offs;
+ }
+
+ return found;
+}
+
static inline struct _aarch64_ctx *get_terminator(struct _aarch64_ctx *head,
size_t resv_sz,
diff --git a/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c
index 4d6f94b6178f..ce26e9c2fa5e 100644
--- a/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c
@@ -6,47 +6,31 @@
* expected.
*/
+#include <kselftest.h>
#include <signal.h>
#include <ucontext.h>
#include <sys/prctl.h>
#include "test_signals_utils.h"
+#include "sve_helpers.h"
#include "testcases.h"
static union {
ucontext_t uc;
char buf[1024 * 128];
} context;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
static bool sme_get_vls(struct tdescr *td)
{
- int vq, vl;
+ int res = sve_fill_vls(VLS_USE_SME, 1);
- /*
- * Enumerate up to SME_VQ_MAX vector lengths
- */
- for (vq = SVE_VQ_MAX; vq > 0; --vq) {
- vl = prctl(PR_SME_SET_VL, vq * 16);
- if (vl == -1)
- return false;
-
- vl &= PR_SME_VL_LEN_MASK;
-
- /* Skip missing VLs */
- vq = sve_vq_from_vl(vl);
+ if (!res)
+ return true;
- vls[nvls++] = vl;
- }
-
- /* We need at least one VL */
- if (nvls < 1) {
- fprintf(stderr, "Only %d VL supported\n", nvls);
- return false;
- }
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
- return true;
+ return false;
}
static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c
index 174ad6656696..b9e13f27f1f9 100644
--- a/tools/testing/selftests/arm64/signal/testcases/za_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c
@@ -6,51 +6,31 @@
* expected.
*/
+#include <kselftest.h>
#include <signal.h>
#include <ucontext.h>
#include <sys/prctl.h>
#include "test_signals_utils.h"
+#include "sve_helpers.h"
#include "testcases.h"
static union {
ucontext_t uc;
char buf[1024 * 128];
} context;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
static bool sme_get_vls(struct tdescr *td)
{
- int vq, vl;
+ int res = sve_fill_vls(VLS_USE_SME, 1);
- /*
- * Enumerate up to SME_VQ_MAX vector lengths
- */
- for (vq = SVE_VQ_MAX; vq > 0; --vq) {
- vl = prctl(PR_SME_SET_VL, vq * 16);
- if (vl == -1)
- return false;
-
- vl &= PR_SME_VL_LEN_MASK;
-
- /* Did we find the lowest supported VL? */
- if (vq < sve_vq_from_vl(vl))
- break;
+ if (!res)
+ return true;
- /* Skip missing VLs */
- vq = sve_vq_from_vl(vl);
-
- vls[nvls++] = vl;
- }
-
- /* We need at least one VL */
- if (nvls < 1) {
- fprintf(stderr, "Only %d VL supported\n", nvls);
- return false;
- }
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
- return true;
+ return false;
}
static void setup_za_regs(void)
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
index 11ee801e75e7..6c3b4d4f173a 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
@@ -34,6 +34,12 @@ DECLARE_TRACE(bpf_testmod_test_write_bare,
TP_ARGS(task, ctx)
);
+/* Used in bpf_testmod_test_read() to test __nullable suffix */
+DECLARE_TRACE(bpf_testmod_test_nullable_bare,
+ TP_PROTO(struct bpf_testmod_test_read_ctx *ctx__nullable),
+ TP_ARGS(ctx__nullable)
+);
+
#undef BPF_TESTMOD_DECLARE_TRACE
#ifdef DECLARE_TRACE_WRITABLE
#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index fd28c1157bd3..22807fd78fe6 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -356,6 +356,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
if (bpf_testmod_loop_test(101) > 100)
trace_bpf_testmod_test_read(current, &ctx);
+ trace_bpf_testmod_test_nullable_bare(NULL);
+
/* Magic number to enable writable tp */
if (len == 64) {
struct bpf_testmod_test_writable_ctx writable = {
@@ -432,7 +434,7 @@ uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func,
struct testmod_uprobe {
struct path path;
- loff_t offset;
+ struct uprobe *uprobe;
struct uprobe_consumer consumer;
};
@@ -446,25 +448,25 @@ static int testmod_register_uprobe(loff_t offset)
{
int err = -EBUSY;
- if (uprobe.offset)
+ if (uprobe.uprobe)
return -EBUSY;
mutex_lock(&testmod_uprobe_mutex);
- if (uprobe.offset)
+ if (uprobe.uprobe)
goto out;
err = kern_path("/proc/self/exe", LOOKUP_FOLLOW, &uprobe.path);
if (err)
goto out;
- err = uprobe_register_refctr(d_real_inode(uprobe.path.dentry),
- offset, 0, &uprobe.consumer);
- if (err)
+ uprobe.uprobe = uprobe_register(d_real_inode(uprobe.path.dentry),
+ offset, 0, &uprobe.consumer);
+ if (IS_ERR(uprobe.uprobe)) {
+ err = PTR_ERR(uprobe.uprobe);
path_put(&uprobe.path);
- else
- uprobe.offset = offset;
-
+ uprobe.uprobe = NULL;
+ }
out:
mutex_unlock(&testmod_uprobe_mutex);
return err;
@@ -474,10 +476,11 @@ static void testmod_unregister_uprobe(void)
{
mutex_lock(&testmod_uprobe_mutex);
- if (uprobe.offset) {
- uprobe_unregister(d_real_inode(uprobe.path.dentry),
- uprobe.offset, &uprobe.consumer);
- uprobe.offset = 0;
+ if (uprobe.uprobe) {
+ uprobe_unregister_nosync(uprobe.uprobe, &uprobe.consumer);
+ uprobe_unregister_sync();
+ path_put(&uprobe.path);
+ uprobe.uprobe = NULL;
}
mutex_unlock(&testmod_uprobe_mutex);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 00965a6e83bb..61de88cf4ad0 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3551,6 +3551,40 @@ static struct btf_raw_test raw_tests[] = {
BTF_STR_SEC("\0x\0?.foo bar:buz"),
},
{
+ .descr = "datasec: name with non-printable first char not is ok",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC ?.data */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0x\0\7foo"),
+ .err_str = "Invalid name",
+ .btf_load_err = true,
+},
+{
+ .descr = "datasec: name '\\0' is not ok",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC \0 */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0x\0"),
+ .err_str = "Invalid name",
+ .btf_load_err = true,
+},
+{
.descr = "type name '?foo' is not ok",
.raw_types = {
/* union ?foo; */
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index 7cfac53c0d58..b614a5272dfd 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -9,6 +9,7 @@
enum test_setup_type {
SETUP_SYSCALL_SLEEP,
SETUP_SKB_PROG,
+ SETUP_SKB_PROG_TP,
};
static struct {
@@ -28,6 +29,7 @@ static struct {
{"test_dynptr_clone", SETUP_SKB_PROG},
{"test_dynptr_skb_no_buff", SETUP_SKB_PROG},
{"test_dynptr_skb_strcmp", SETUP_SKB_PROG},
+ {"test_dynptr_skb_tp_btf", SETUP_SKB_PROG_TP},
};
static void verify_success(const char *prog_name, enum test_setup_type setup_type)
@@ -35,7 +37,7 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
struct dynptr_success *skel;
struct bpf_program *prog;
struct bpf_link *link;
- int err;
+ int err;
skel = dynptr_success__open();
if (!ASSERT_OK_PTR(skel, "dynptr_success__open"))
@@ -47,7 +49,7 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
goto cleanup;
- bpf_program__set_autoload(prog, true);
+ bpf_program__set_autoload(prog, true);
err = dynptr_success__load(skel);
if (!ASSERT_OK(err, "dynptr_success__load"))
@@ -87,6 +89,37 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
break;
}
+ case SETUP_SKB_PROG_TP:
+ {
+ struct __sk_buff skb = {};
+ struct bpf_object *obj;
+ int aux_prog_fd;
+
+ /* Just use its test_run to trigger kfree_skb tracepoint */
+ err = bpf_prog_test_load("./test_pkt_access.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &aux_prog_fd);
+ if (!ASSERT_OK(err, "prog_load sched cls"))
+ goto cleanup;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ );
+
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+ goto cleanup;
+
+ err = bpf_prog_test_run_opts(aux_prog_fd, &topts);
+ bpf_link__destroy(link);
+
+ if (!ASSERT_OK(err, "test_run"))
+ goto cleanup;
+
+ break;
+ }
}
ASSERT_EQ(skel->bss->err, 0, "err");
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 9e5f38739104..6b3078dd5645 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -378,8 +378,8 @@ struct test tests[] = {
.iph_inner.ihl = 5,
.iph_inner.protocol = IPPROTO_TCP,
.iph_inner.tot_len =
- __bpf_constant_htons(MAGIC_BYTES) -
- sizeof(struct iphdr),
+ __bpf_constant_htons(MAGIC_BYTES -
+ sizeof(struct iphdr)),
.tcp.doff = 5,
.tcp.source = 80,
.tcp.dest = 8080,
@@ -407,8 +407,8 @@ struct test tests[] = {
.iph_inner.ihl = 5,
.iph_inner.protocol = IPPROTO_TCP,
.iph_inner.tot_len =
- __bpf_constant_htons(MAGIC_BYTES) -
- sizeof(struct iphdr),
+ __bpf_constant_htons(MAGIC_BYTES -
+ sizeof(struct iphdr)),
.tcp.doff = 5,
.tcp.source = 80,
.tcp.dest = 8080,
@@ -436,8 +436,8 @@ struct test tests[] = {
.iph_inner.ihl = 5,
.iph_inner.protocol = IPPROTO_TCP,
.iph_inner.tot_len =
- __bpf_constant_htons(MAGIC_BYTES) -
- sizeof(struct iphdr),
+ __bpf_constant_htons(MAGIC_BYTES -
+ sizeof(struct iphdr)),
.tcp.doff = 5,
.tcp.source = 99,
.tcp.dest = 9090,
diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
index 7d4a9b3d3722..e12255121c15 100644
--- a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
@@ -154,6 +154,51 @@ err_out:
close(sfd);
}
+static void test_nonstandard_opt(int family)
+{
+ struct setget_sockopt__bss *bss = skel->bss;
+ struct bpf_link *getsockopt_link = NULL;
+ int sfd = -1, fd = -1, cfd = -1, flags;
+ socklen_t flagslen = sizeof(flags);
+
+ memset(bss, 0, sizeof(*bss));
+
+ sfd = start_server(family, SOCK_STREAM,
+ family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
+ if (!ASSERT_GE(sfd, 0, "start_server"))
+ return;
+
+ fd = connect_to_fd(sfd, 0);
+ if (!ASSERT_GE(fd, 0, "connect_to_fd_server"))
+ goto err_out;
+
+ /* cgroup/getsockopt prog will intercept getsockopt() below and
+ * retrieve the tcp socket bpf_sock_ops_cb_flags value for the
+ * accept()ed socket; this was set earlier in the passive established
+ * callback for the accept()ed socket via bpf_setsockopt().
+ */
+ getsockopt_link = bpf_program__attach_cgroup(skel->progs._getsockopt, cg_fd);
+ if (!ASSERT_OK_PTR(getsockopt_link, "getsockopt prog"))
+ goto err_out;
+
+ cfd = accept(sfd, NULL, 0);
+ if (!ASSERT_GE(cfd, 0, "accept"))
+ goto err_out;
+
+ if (!ASSERT_OK(getsockopt(cfd, SOL_TCP, TCP_BPF_SOCK_OPS_CB_FLAGS, &flags, &flagslen),
+ "getsockopt_flags"))
+ goto err_out;
+ ASSERT_EQ(flags & BPF_SOCK_OPS_STATE_CB_FLAG, BPF_SOCK_OPS_STATE_CB_FLAG,
+ "cb_flags_set");
+err_out:
+ close(sfd);
+ if (fd != -1)
+ close(fd);
+ if (cfd != -1)
+ close(cfd);
+ bpf_link__destroy(getsockopt_link);
+}
+
void test_setget_sockopt(void)
{
cg_fd = test__join_cgroup(CG_NAME);
@@ -191,6 +236,8 @@ void test_setget_sockopt(void)
test_udp(AF_INET);
test_ktls(AF_INET6);
test_ktls(AF_INET);
+ test_nonstandard_opt(AF_INET);
+ test_nonstandard_opt(AF_INET6);
done:
setget_sockopt__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 1337153eb0ad..82bfb266741c 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -451,11 +451,11 @@ out:
#define MAX_EVENTS 10
static void test_sockmap_skb_verdict_shutdown(void)
{
+ int n, err, map, verdict, c1 = -1, p1 = -1;
struct epoll_event ev, events[MAX_EVENTS];
- int n, err, map, verdict, s, c1 = -1, p1 = -1;
struct test_sockmap_pass_prog *skel;
- int epollfd;
int zero = 0;
+ int epollfd;
char b;
skel = test_sockmap_pass_prog__open_and_load();
@@ -469,10 +469,7 @@ static void test_sockmap_skb_verdict_shutdown(void)
if (!ASSERT_OK(err, "bpf_prog_attach"))
goto out;
- s = socket_loopback(AF_INET, SOCK_STREAM);
- if (s < 0)
- goto out;
- err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1);
+ err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1);
if (err < 0)
goto out;
@@ -506,8 +503,8 @@ out:
static void test_sockmap_skb_verdict_fionread(bool pass_prog)
{
+ int err, map, verdict, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
int expected, zero = 0, sent, recvd, avail;
- int err, map, verdict, s, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
struct test_sockmap_pass_prog *pass = NULL;
struct test_sockmap_drop_prog *drop = NULL;
char buf[256] = "0123456789";
@@ -534,11 +531,8 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
if (!ASSERT_OK(err, "bpf_prog_attach"))
goto out;
- s = socket_loopback(AF_INET, SOCK_STREAM);
- if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
- goto out;
- err = create_socket_pairs(s, AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1);
- if (!ASSERT_OK(err, "create_socket_pairs(s)"))
+ err = create_socket_pairs(AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1);
+ if (!ASSERT_OK(err, "create_socket_pairs()"))
goto out;
err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
@@ -570,16 +564,12 @@ out:
static void test_sockmap_skb_verdict_peek_helper(int map)
{
- int err, s, c1, p1, zero = 0, sent, recvd, avail;
+ int err, c1, p1, zero = 0, sent, recvd, avail;
char snd[256] = "0123456789";
char rcv[256] = "0";
- s = socket_loopback(AF_INET, SOCK_STREAM);
- if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
- return;
-
- err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1);
- if (!ASSERT_OK(err, "create_pairs(s)"))
+ err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1);
+ if (!ASSERT_OK(err, "create_pair()"))
return;
err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
index e880f97bc44d..38e35c72bdaa 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
@@ -3,6 +3,9 @@
#include <linux/vm_sockets.h>
+/* include/linux/net.h */
+#define SOCK_TYPE_MASK 0xf
+
#define IO_TIMEOUT_SEC 30
#define MAX_STRERR_LEN 256
#define MAX_TEST_NAME 80
@@ -14,6 +17,17 @@
#define __always_unused __attribute__((__unused__))
+/* include/linux/cleanup.h */
+#define __get_and_null(p, nullvalue) \
+ ({ \
+ __auto_type __ptr = &(p); \
+ __auto_type __val = *__ptr; \
+ *__ptr = nullvalue; \
+ __val; \
+ })
+
+#define take_fd(fd) __get_and_null(fd, -EBADF)
+
#define _FAIL(errnum, fmt...) \
({ \
error_at_line(0, (errnum), __func__, __LINE__, fmt); \
@@ -179,6 +193,14 @@
__ret; \
})
+static inline void close_fd(int *fd)
+{
+ if (*fd >= 0)
+ xclose(*fd);
+}
+
+#define __close_fd __attribute__((cleanup(close_fd)))
+
static inline int poll_connect(int fd, unsigned int timeout_sec)
{
struct timeval timeout = { .tv_sec = timeout_sec };
@@ -312,54 +334,6 @@ static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
}
-static inline int create_pair(int s, int family, int sotype, int *c, int *p)
-{
- struct sockaddr_storage addr;
- socklen_t len;
- int err = 0;
-
- len = sizeof(addr);
- err = xgetsockname(s, sockaddr(&addr), &len);
- if (err)
- return err;
-
- *c = xsocket(family, sotype, 0);
- if (*c < 0)
- return errno;
- err = xconnect(*c, sockaddr(&addr), len);
- if (err) {
- err = errno;
- goto close_cli0;
- }
-
- *p = xaccept_nonblock(s, NULL, NULL);
- if (*p < 0) {
- err = errno;
- goto close_cli0;
- }
- return err;
-close_cli0:
- close(*c);
- return err;
-}
-
-static inline int create_socket_pairs(int s, int family, int sotype,
- int *c0, int *c1, int *p0, int *p1)
-{
- int err;
-
- err = create_pair(s, family, sotype, c0, p0);
- if (err)
- return err;
-
- err = create_pair(s, family, sotype, c1, p1);
- if (err) {
- close(*c0);
- close(*p0);
- }
- return err;
-}
-
static inline int enable_reuseport(int s, int progfd)
{
int err, one = 1;
@@ -412,5 +386,84 @@ static inline int socket_loopback(int family, int sotype)
return socket_loopback_reuseport(family, sotype, -1);
}
+static inline int create_pair(int family, int sotype, int *p0, int *p1)
+{
+ __close_fd int s, c = -1, p = -1;
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int err;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return s;
+
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ return err;
+
+ c = xsocket(family, sotype, 0);
+ if (c < 0)
+ return c;
+
+ err = connect(c, sockaddr(&addr), len);
+ if (err) {
+ if (errno != EINPROGRESS) {
+ FAIL_ERRNO("connect");
+ return err;
+ }
+
+ err = poll_connect(c, IO_TIMEOUT_SEC);
+ if (err) {
+ FAIL_ERRNO("poll_connect");
+ return err;
+ }
+ }
+
+ switch (sotype & SOCK_TYPE_MASK) {
+ case SOCK_DGRAM:
+ err = xgetsockname(c, sockaddr(&addr), &len);
+ if (err)
+ return err;
+
+ err = xconnect(s, sockaddr(&addr), len);
+ if (err)
+ return err;
+
+ *p0 = take_fd(s);
+ break;
+ case SOCK_STREAM:
+ case SOCK_SEQPACKET:
+ p = xaccept_nonblock(s, NULL, NULL);
+ if (p < 0)
+ return p;
+
+ *p0 = take_fd(p);
+ break;
+ default:
+ FAIL("Unsupported socket type %#x", sotype);
+ return -EOPNOTSUPP;
+ }
+
+ *p1 = take_fd(c);
+ return 0;
+}
+
+static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1,
+ int *p0, int *p1)
+{
+ int err;
+
+ err = create_pair(family, sotype, c0, p0);
+ if (err)
+ return err;
+
+ err = create_pair(family, sotype, c1, p1);
+ if (err) {
+ close(*c0);
+ close(*p0);
+ }
+
+ return err;
+}
#endif // __SOCKMAP_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 9ce0e0e0b7da..da5a6fb03b69 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -677,7 +677,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
const char *log_prefix = redir_mode_str(mode);
- int s, c0, c1, p0, p1;
+ int c0, c1, p0, p1;
unsigned int pass;
int err, n;
u32 key;
@@ -685,13 +685,10 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
zero_verdict_count(verd_mapfd);
- s = socket_loopback(family, sotype | SOCK_NONBLOCK);
- if (s < 0)
- return;
-
- err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
+ err = create_socket_pairs(family, sotype | SOCK_NONBLOCK, &c0, &c1,
+ &p0, &p1);
if (err)
- goto close_srv;
+ return;
err = add_to_sockmap(sock_mapfd, p0, p1);
if (err)
@@ -722,8 +719,6 @@ close:
xclose(c1);
xclose(p0);
xclose(c0);
-close_srv:
- xclose(s);
}
static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
@@ -909,7 +904,7 @@ static void test_msg_redir_to_listening_with_link(struct test_sockmap_listen *sk
static void redir_partial(int family, int sotype, int sock_map, int parser_map)
{
- int s, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
+ int c0 = -1, c1 = -1, p0 = -1, p1 = -1;
int err, n, key, value;
char buf[] = "abc";
@@ -919,13 +914,10 @@ static void redir_partial(int family, int sotype, int sock_map, int parser_map)
if (err)
return;
- s = socket_loopback(family, sotype | SOCK_NONBLOCK);
- if (s < 0)
- goto clean_parser_map;
-
- err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
+ err = create_socket_pairs(family, sotype | SOCK_NONBLOCK, &c0, &c1,
+ &p0, &p1);
if (err)
- goto close_srv;
+ goto clean_parser_map;
err = add_to_sockmap(sock_map, p0, p1);
if (err)
@@ -944,8 +936,6 @@ close:
xclose(p0);
xclose(c1);
xclose(p1);
-close_srv:
- xclose(s);
clean_parser_map:
key = 0;
@@ -1500,49 +1490,7 @@ static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *ma
/* Returns two connected loopback vsock sockets */
static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
{
- struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
- int s, p, c;
-
- s = socket_loopback(AF_VSOCK, sotype);
- if (s < 0)
- return -1;
-
- c = xsocket(AF_VSOCK, sotype | SOCK_NONBLOCK, 0);
- if (c == -1)
- goto close_srv;
-
- if (getsockname(s, sockaddr(&addr), &len) < 0)
- goto close_cli;
-
- if (connect(c, sockaddr(&addr), len) < 0 && errno != EINPROGRESS) {
- FAIL_ERRNO("connect");
- goto close_cli;
- }
-
- len = sizeof(addr);
- p = accept_timeout(s, sockaddr(&addr), &len, IO_TIMEOUT_SEC);
- if (p < 0)
- goto close_cli;
-
- if (poll_connect(c, IO_TIMEOUT_SEC) < 0) {
- FAIL_ERRNO("poll_connect");
- goto close_acc;
- }
-
- *v0 = p;
- *v1 = c;
-
- return 0;
-
-close_acc:
- close(p);
-close_cli:
- close(c);
-close_srv:
- close(s);
-
- return -1;
+ return create_pair(AF_VSOCK, sotype | SOCK_NONBLOCK, v0, v1);
}
static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
@@ -1691,44 +1639,7 @@ static void test_reuseport(struct test_sockmap_listen *skel,
static int inet_socketpair(int family, int type, int *s, int *c)
{
- struct sockaddr_storage addr;
- socklen_t len;
- int p0, c0;
- int err;
-
- p0 = socket_loopback(family, type | SOCK_NONBLOCK);
- if (p0 < 0)
- return p0;
-
- len = sizeof(addr);
- err = xgetsockname(p0, sockaddr(&addr), &len);
- if (err)
- goto close_peer0;
-
- c0 = xsocket(family, type | SOCK_NONBLOCK, 0);
- if (c0 < 0) {
- err = c0;
- goto close_peer0;
- }
- err = xconnect(c0, sockaddr(&addr), len);
- if (err)
- goto close_cli0;
- err = xgetsockname(c0, sockaddr(&addr), &len);
- if (err)
- goto close_cli0;
- err = xconnect(p0, sockaddr(&addr), len);
- if (err)
- goto close_cli0;
-
- *s = p0;
- *c = c0;
- return 0;
-
-close_cli0:
- xclose(c0);
-close_peer0:
- xclose(p0);
- return err;
+ return create_pair(family, type | SOCK_NONBLOCK, s, c);
}
static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
@@ -1795,11 +1706,11 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
int sfd[2];
int err;
- if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
+ if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd))
return;
c0 = sfd[0], p0 = sfd[1];
- err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
+ err = inet_socketpair(family, type, &p1, &c1);
if (err)
goto close;
@@ -1847,7 +1758,7 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
int sfd[2];
int err;
- err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
+ err = inet_socketpair(family, type, &p0, &c0);
if (err)
return;
@@ -1882,7 +1793,7 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
unix_inet_redir_to_connected(family, SOCK_DGRAM,
sock_map, -1, verdict_map,
REDIR_EGRESS, NO_FLAGS);
- unix_inet_redir_to_connected(family, SOCK_DGRAM,
+ unix_inet_redir_to_connected(family, SOCK_STREAM,
sock_map, -1, verdict_map,
REDIR_EGRESS, NO_FLAGS);
diff --git a/tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c b/tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c
new file mode 100644
index 000000000000..accc42e01f8a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_tp_btf_nullable.skel.h"
+
+void test_tp_btf_nullable(void)
+{
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
+ RUN_TESTS(test_tp_btf_nullable);
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index e35bc1eac52a..c3bc186af21e 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -6,6 +6,7 @@
#include <stdbool.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
#include <linux/if_ether.h>
#include "bpf_misc.h"
#include "bpf_kfuncs.h"
@@ -1254,6 +1255,30 @@ int skb_invalid_ctx(void *ctx)
return 0;
}
+SEC("fentry/skb_tx_error")
+__failure __msg("must be referenced or trusted")
+int BPF_PROG(skb_invalid_ctx_fentry, void *skb)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ return 0;
+}
+
+SEC("fexit/skb_tx_error")
+__failure __msg("must be referenced or trusted")
+int BPF_PROG(skb_invalid_ctx_fexit, void *skb)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ return 0;
+}
+
/* Reject writes to dynptr slot for uninit arg */
SEC("?raw_tp")
__failure __msg("potential write to dynptr at off=-16")
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index 5985920d162e..bfcc85686cf0 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -5,6 +5,7 @@
#include <stdbool.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
#include "bpf_kfuncs.h"
#include "errno.h"
@@ -544,3 +545,25 @@ int test_dynptr_skb_strcmp(struct __sk_buff *skb)
return 1;
}
+
+SEC("tp_btf/kfree_skb")
+int BPF_PROG(test_dynptr_skb_tp_btf, void *skb, void *location)
+{
+ __u8 write_data[2] = {1, 2};
+ struct bpf_dynptr ptr;
+ int ret;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* since tp_btf skbs are read only, writes should fail */
+ ret = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+ if (ret != -EINVAL) {
+ err = 2;
+ return 1;
+ }
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c
index 60518aed1ffc..6dd4318debbf 100644
--- a/tools/testing/selftests/bpf/progs/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c
@@ -59,6 +59,8 @@ static const struct sockopt_test sol_tcp_tests[] = {
{ .opt = TCP_THIN_LINEAR_TIMEOUTS, .flip = 1, },
{ .opt = TCP_USER_TIMEOUT, .new = 123400, .expected = 123400, },
{ .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, },
+ { .opt = TCP_BPF_SOCK_OPS_CB_FLAGS, .new = BPF_SOCK_OPS_ALL_CB_FLAGS,
+ .expected = BPF_SOCK_OPS_ALL_CB_FLAGS, },
{ .opt = 0, },
};
@@ -353,11 +355,30 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family,
return 1;
}
+SEC("cgroup/getsockopt")
+int _getsockopt(struct bpf_sockopt *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ int *optval = ctx->optval;
+ struct tcp_sock *tp;
+
+ if (!sk || ctx->level != SOL_TCP || ctx->optname != TCP_BPF_SOCK_OPS_CB_FLAGS)
+ return 1;
+
+ tp = bpf_core_cast(sk, struct tcp_sock);
+ if (ctx->optval + sizeof(int) <= ctx->optval_end) {
+ *optval = tp->bpf_sock_ops_cb_flags;
+ ctx->retval = 0;
+ }
+ return 1;
+}
+
SEC("sockops")
int skops_sockopt(struct bpf_sock_ops *skops)
{
struct bpf_sock *bpf_sk = skops->sk;
struct sock *sk;
+ int flags;
if (!bpf_sk)
return 1;
@@ -384,9 +405,8 @@ int skops_sockopt(struct bpf_sock_ops *skops)
nr_passive += !(bpf_test_sockopt(skops, sk) ||
test_tcp_maxseg(skops, sk) ||
test_tcp_saved_syn(skops, sk));
- bpf_sock_ops_cb_flags_set(skops,
- skops->bpf_sock_ops_cb_flags |
- BPF_SOCK_OPS_STATE_CB_FLAG);
+ flags = skops->bpf_sock_ops_cb_flags | BPF_SOCK_OPS_STATE_CB_FLAG;
+ bpf_setsockopt(skops, SOL_TCP, TCP_BPF_SOCK_OPS_CB_FLAGS, &flags, sizeof(flags));
break;
case BPF_SOCK_OPS_STATE_CB:
if (skops->args[1] == BPF_TCP_CLOSE_WAIT)
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
index 44ee0d037f95..eb5cca1fce16 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
@@ -486,17 +486,10 @@ static int tcp_validate_cookie(struct tcp_syncookie *ctx)
goto err;
mssind = (cookie & (3 << 6)) >> 6;
- if (ctx->ipv4) {
- if (mssind > ARRAY_SIZE(msstab4))
- goto err;
-
+ if (ctx->ipv4)
ctx->attrs.mss = msstab4[mssind];
- } else {
- if (mssind > ARRAY_SIZE(msstab6))
- goto err;
-
+ else
ctx->attrs.mss = msstab6[mssind];
- }
ctx->attrs.snd_wscale = cookie & BPF_SYNCOOKIE_WSCALE_MASK;
ctx->attrs.rcv_wscale = ctx->attrs.snd_wscale;
diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
new file mode 100644
index 000000000000..bba3e37f749b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+#include "bpf_misc.h"
+
+SEC("tp_btf/bpf_testmod_test_nullable_bare")
+__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
+int BPF_PROG(handle_tp_btf_nullable_bare1, struct bpf_testmod_test_read_ctx *nullable_ctx)
+{
+ return nullable_ctx->len;
+}
+
+SEC("tp_btf/bpf_testmod_test_nullable_bare")
+int BPF_PROG(handle_tp_btf_nullable_bare2, struct bpf_testmod_test_read_ctx *nullable_ctx)
+{
+ if (nullable_ctx)
+ return nullable_ctx->len;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 8144fd145237..1ee0ef114f9d 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -324,6 +324,25 @@ out:
return zc_avail;
}
+#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags"
+static unsigned int get_max_skb_frags(void)
+{
+ unsigned int max_skb_frags = 0;
+ FILE *file;
+
+ file = fopen(MAX_SKB_FRAGS_PATH, "r");
+ if (!file) {
+ ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH);
+ return 0;
+ }
+
+ if (fscanf(file, "%u", &max_skb_frags) != 1)
+ ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH);
+
+ fclose(file);
+ return max_skb_frags;
+}
+
static struct option long_options[] = {
{"interface", required_argument, 0, 'i'},
{"busy-poll", no_argument, 0, 'b'},
@@ -2244,13 +2263,24 @@ static int testapp_poll_rxq_tmout(struct test_spec *test)
static int testapp_too_many_frags(struct test_spec *test)
{
- struct pkt pkts[2 * XSK_DESC__MAX_SKB_FRAGS + 2] = {};
+ struct pkt *pkts;
u32 max_frags, i;
+ int ret;
- if (test->mode == TEST_MODE_ZC)
+ if (test->mode == TEST_MODE_ZC) {
max_frags = test->ifobj_tx->xdp_zc_max_segs;
- else
- max_frags = XSK_DESC__MAX_SKB_FRAGS;
+ } else {
+ max_frags = get_max_skb_frags();
+ if (!max_frags) {
+ ksft_print_msg("Couldn't retrieve MAX_SKB_FRAGS from system, using default (17) value\n");
+ max_frags = 17;
+ }
+ max_frags += 1;
+ }
+
+ pkts = calloc(2 * max_frags + 2, sizeof(struct pkt));
+ if (!pkts)
+ return TEST_FAILURE;
test->mtu = MAX_ETH_JUMBO_SIZE;
@@ -2280,7 +2310,10 @@ static int testapp_too_many_frags(struct test_spec *test)
pkts[2 * max_frags + 1].valid = true;
pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2);
- return testapp_validate_traffic(test);
+ ret = testapp_validate_traffic(test);
+
+ free(pkts);
+ return ret;
}
static int xsk_load_xdp_programs(struct ifobject *ifobj)
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 885c948c5d83..e46e823f6a1a 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -55,7 +55,6 @@
#define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024)
#define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024)
#define XSK_DESC__INVALID_OPTION (0xffff)
-#define XSK_DESC__MAX_SKB_FRAGS 18
#define HUGEPAGE_SIZE (2 * 1024 * 1024)
#define PKT_DUMP_NB_TO_PRINT 16
#define RUN_ALL_TESTS UINT_MAX
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
index 7c08cc153367..03c1bdaed2c3 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -84,6 +84,20 @@ echo member > test/cpuset.cpus.partition
echo "" > test/cpuset.cpus
[[ $RESULT -eq 0 ]] && skip_test "Child cgroups are using cpuset!"
+#
+# If isolated CPUs have been reserved at boot time (as shown in
+# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-7
+# that will be used by this script for testing purpose. If not, some of
+# the tests may fail incorrectly. These isolated CPUs will also be removed
+# before being compared with the expected results.
+#
+BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated)
+if [[ -n "$BOOT_ISOLCPUS" ]]
+then
+ [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 7 ]] &&
+ skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested"
+ echo "Pre-isolated CPUs: $BOOT_ISOLCPUS"
+fi
cleanup()
{
online_cpus
@@ -321,7 +335,7 @@ TEST_MATRIX=(
# old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
# ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
#
- # Incorrect change to cpuset.cpus invalidates partition root
+ # Incorrect change to cpuset.cpus[.exclusive] invalidates partition root
#
# Adding CPUs to partition root that are not in parent's
# cpuset.cpus is allowed, but those extra CPUs are ignored.
@@ -365,6 +379,16 @@ TEST_MATRIX=(
# cpuset.cpus can overlap with sibling cpuset.cpus.exclusive but not subsumed by it
" C0-3 . . C4-5 X5 . . . 0 A1:0-3,B1:4-5"
+ # Child partition root that try to take all CPUs from parent partition
+ # with tasks will remain invalid.
+ " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1"
+ " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1,A2:1-4 A1:P1,A2:P1"
+ " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1"
+
+ # Clearing of cpuset.cpus with a preset cpuset.cpus.exclusive shouldn't
+ # affect cpuset.cpus.exclusive.effective.
+ " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4,XA2:3"
+
# old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
# ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
# Failure cases:
@@ -632,7 +656,8 @@ check_cgroup_states()
# Note that isolated CPUs from the sched/domains context include offline
# CPUs as well as CPUs in non-isolated 1-CPU partition. Those CPUs may
# not be included in the cpuset.cpus.isolated control file which contains
-# only CPUs in isolated partitions.
+# only CPUs in isolated partitions as well as those that are isolated at
+# boot time.
#
# $1 - expected isolated cpu list(s) <isolcpus1>{,<isolcpus2>}
# <isolcpus1> - expected sched/domains value
@@ -659,18 +684,21 @@ check_isolcpus()
fi
#
- # Check the debug isolated cpumask, if present
+ # Check cpuset.cpus.isolated cpumask
#
- [[ -f $ISCPUS ]] && {
+ if [[ -z "$BOOT_ISOLCPUS" ]]
+ then
+ ISOLCPUS=$(cat $ISCPUS)
+ else
+ ISOLCPUS=$(cat $ISCPUS | sed -e "s/,*$BOOT_ISOLCPUS//")
+ fi
+ [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && {
+ # Take a 50ms pause and try again
+ pause 0.05
ISOLCPUS=$(cat $ISCPUS)
- [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && {
- # Take a 50ms pause and try again
- pause 0.05
- ISOLCPUS=$(cat $ISCPUS)
- }
- [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && return 1
- ISOLCPUS=
}
+ [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && return 1
+ ISOLCPUS=
#
# Use the sched domain in debugfs to check isolated CPUs, if available
@@ -703,6 +731,9 @@ check_isolcpus()
fi
done
[[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU
+ [[ -n "BOOT_ISOLCPUS" ]] &&
+ ISOLCPUS=$(echo $ISOLCPUS | sed -e "s/,*$BOOT_ISOLCPUS//")
+
[[ "$EXPECT_VAL" = "$ISOLCPUS" ]]
}
@@ -720,7 +751,8 @@ test_fail()
}
#
-# Check to see if there are unexpected isolated CPUs left
+# Check to see if there are unexpected isolated CPUs left beyond the boot
+# time isolated ones.
#
null_isolcpus_check()
{
diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh
new file mode 100755
index 000000000000..42a6628fb8bc
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Basc test for cpuset v1 interfaces write/read
+#
+
+skip_test() {
+ echo "$1"
+ echo "Test SKIPPED"
+ exit 4 # ksft_skip
+}
+
+write_test() {
+ dir=$1
+ interface=$2
+ value=$3
+ original=$(cat $dir/$interface)
+ echo "testing $interface $value"
+ echo $value > $dir/$interface
+ new=$(cat $dir/$interface)
+ [[ $value -ne $(cat $dir/$interface) ]] && {
+ echo "$interface write $value failed: new:$new"
+ exit 1
+ }
+}
+
+[[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!"
+
+# Find cpuset v1 mount point
+CPUSET=$(mount -t cgroup | grep cpuset | head -1 | awk '{print $3}')
+[[ -n "$CPUSET" ]] || skip_test "cpuset v1 mount point not found!"
+
+#
+# Create a test cpuset, read write test
+#
+TDIR=test$$
+[[ -d $CPUSET/$TDIR ]] || mkdir $CPUSET/$TDIR
+
+ITF_MATRIX=(
+ #interface value expect root_only
+ 'cpuset.cpus 0-1 0-1 0'
+ 'cpuset.mem_exclusive 1 1 0'
+ 'cpuset.mem_exclusive 0 0 0'
+ 'cpuset.mem_hardwall 1 1 0'
+ 'cpuset.mem_hardwall 0 0 0'
+ 'cpuset.memory_migrate 1 1 0'
+ 'cpuset.memory_migrate 0 0 0'
+ 'cpuset.memory_spread_page 1 1 0'
+ 'cpuset.memory_spread_page 0 0 0'
+ 'cpuset.memory_spread_slab 1 1 0'
+ 'cpuset.memory_spread_slab 0 0 0'
+ 'cpuset.mems 0 0 0'
+ 'cpuset.sched_load_balance 1 1 0'
+ 'cpuset.sched_load_balance 0 0 0'
+ 'cpuset.sched_relax_domain_level 2 2 0'
+ 'cpuset.memory_pressure_enabled 1 1 1'
+ 'cpuset.memory_pressure_enabled 0 0 1'
+)
+
+run_test()
+{
+ cnt="${ITF_MATRIX[@]}"
+ for i in "${ITF_MATRIX[@]}" ; do
+ args=($i)
+ root_only=${args[3]}
+ [[ $root_only -eq 1 ]] && {
+ write_test "$CPUSET" "${args[0]}" "${args[1]}" "${args[2]}"
+ continue
+ }
+ write_test "$CPUSET/$TDIR" "${args[0]}" "${args[1]}" "${args[2]}"
+ done
+}
+
+run_test
+rmdir $CPUSET/$TDIR
+echo "Test PASSED"
+exit 0
diff --git a/tools/testing/selftests/core/Makefile b/tools/testing/selftests/core/Makefile
index ce262d097269..8e99f87f5d7c 100644
--- a/tools/testing/selftests/core/Makefile
+++ b/tools/testing/selftests/core/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
CFLAGS += -g $(KHDR_INCLUDES)
-TEST_GEN_PROGS := close_range_test
+TEST_GEN_PROGS := close_range_test unshare_test
include ../lib.mk
diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index 12b4eb9d0434..e0d9851fe1c9 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -26,6 +26,10 @@
#define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3)
#endif
+#ifndef F_CREATED_QUERY
+#define F_CREATED_QUERY (F_LINUX_SPECIFIC_BASE + 4)
+#endif
+
static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
unsigned int flags)
{
@@ -624,4 +628,39 @@ TEST(close_range_bitmap_corruption)
EXPECT_EQ(0, WEXITSTATUS(status));
}
+TEST(fcntl_created)
+{
+ for (int i = 0; i < 101; i++) {
+ int fd;
+ char path[PATH_MAX];
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0) {
+ if (errno == ENOENT)
+ SKIP(return,
+ "Skipping test since /dev/null does not exist");
+ }
+
+ /* We didn't create "/dev/null". */
+ EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0);
+ close(fd);
+
+ sprintf(path, "aaaa_%d", i);
+ fd = open(path, O_CREAT | O_RDONLY | O_CLOEXEC, 0600);
+ ASSERT_GE(fd, 0);
+
+ /* We created "aaaa_%d". */
+ EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 1);
+ close(fd);
+
+ fd = open(path, O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+
+ /* We're opening it again, so no positive creation check. */
+ EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0);
+ close(fd);
+ unlink(path);
+ }
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/core/unshare_test.c b/tools/testing/selftests/core/unshare_test.c
new file mode 100644
index 000000000000..7fec9dfb1b0e
--- /dev/null
+++ b/tools/testing/selftests/core/unshare_test.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/kernel.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <sys/resource.h>
+#include <linux/close_range.h>
+
+#include "../kselftest_harness.h"
+#include "../clone3/clone3_selftests.h"
+
+TEST(unshare_EMFILE)
+{
+ pid_t pid;
+ int status;
+ struct __clone_args args = {
+ .flags = CLONE_FILES,
+ .exit_signal = SIGCHLD,
+ };
+ int fd;
+ ssize_t n, n2;
+ static char buf[512], buf2[512];
+ struct rlimit rlimit;
+ int nr_open;
+
+ fd = open("/proc/sys/fs/nr_open", O_RDWR);
+ ASSERT_GE(fd, 0);
+
+ n = read(fd, buf, sizeof(buf));
+ ASSERT_GT(n, 0);
+ ASSERT_EQ(buf[n - 1], '\n');
+
+ ASSERT_EQ(sscanf(buf, "%d", &nr_open), 1);
+
+ ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
+
+ /* bump fs.nr_open */
+ n2 = sprintf(buf2, "%d\n", nr_open + 1024);
+ lseek(fd, 0, SEEK_SET);
+ write(fd, buf2, n2);
+
+ /* bump ulimit -n */
+ rlimit.rlim_cur = nr_open + 1024;
+ rlimit.rlim_max = nr_open + 1024;
+ EXPECT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit)) {
+ lseek(fd, 0, SEEK_SET);
+ write(fd, buf, n);
+ exit(EXIT_FAILURE);
+ }
+
+ /* get a descriptor past the old fs.nr_open */
+ EXPECT_GE(dup2(2, nr_open + 64), 0) {
+ lseek(fd, 0, SEEK_SET);
+ write(fd, buf, n);
+ exit(EXIT_FAILURE);
+ }
+
+ /* get descriptor table shared */
+ pid = sys_clone3(&args, sizeof(args));
+ EXPECT_GE(pid, 0) {
+ lseek(fd, 0, SEEK_SET);
+ write(fd, buf, n);
+ exit(EXIT_FAILURE);
+ }
+
+ if (pid == 0) {
+ int err;
+
+ /* restore fs.nr_open */
+ lseek(fd, 0, SEEK_SET);
+ write(fd, buf, n);
+ /* ... and now unshare(CLONE_FILES) must fail with EMFILE */
+ err = unshare(CLONE_FILES);
+ EXPECT_EQ(err, -1)
+ exit(EXIT_FAILURE);
+ EXPECT_EQ(errno, EMFILE)
+ exit(EXIT_FAILURE);
+ exit(EXIT_SUCCESS);
+ }
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh
index a8b1dbc0a3a5..e350c521b467 100755
--- a/tools/testing/selftests/cpufreq/cpufreq.sh
+++ b/tools/testing/selftests/cpufreq/cpufreq.sh
@@ -231,6 +231,21 @@ do_suspend()
for i in `seq 1 $2`; do
printf "Starting $1\n"
+
+ if [ "$3" = "rtc" ]; then
+ if ! command -v rtcwake &> /dev/null; then
+ printf "rtcwake could not be found, please install it.\n"
+ return 1
+ fi
+
+ rtcwake -m $filename -s 15
+
+ if [ $? -ne 0 ]; then
+ printf "Failed to suspend using RTC wake alarm\n"
+ return 1
+ fi
+ fi
+
echo $filename > $SYSFS/power/state
printf "Came out of $1\n"
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
index a0eb84cf7167..f12ff7416e41 100755
--- a/tools/testing/selftests/cpufreq/main.sh
+++ b/tools/testing/selftests/cpufreq/main.sh
@@ -24,6 +24,8 @@ helpme()
[-t <basic: Basic cpufreq testing
suspend: suspend/resume,
hibernate: hibernate/resume,
+ suspend_rtc: suspend/resume back using the RTC wakeup alarm,
+ hibernate_rtc: hibernate/resume back using the RTC wakeup alarm,
modtest: test driver or governor modules. Only to be used with -d or -g options,
sptest1: Simple governor switch to produce lockdep.
sptest2: Concurrent governor switch to produce lockdep.
@@ -76,7 +78,8 @@ parse_arguments()
helpme
;;
- t) # --func_type (Function to perform: basic, suspend, hibernate, modtest, sptest1/2/3/4 (default: basic))
+ t) # --func_type (Function to perform: basic, suspend, hibernate,
+ # suspend_rtc, hibernate_rtc, modtest, sptest1/2/3/4 (default: basic))
FUNC=$OPTARG
;;
@@ -121,6 +124,14 @@ do_test()
do_suspend "hibernate" 1
;;
+ "suspend_rtc")
+ do_suspend "suspend" 1 rtc
+ ;;
+
+ "hibernate_rtc")
+ do_suspend "hibernate" 1 rtc
+ ;;
+
"modtest")
# Do we have modules in place?
if [ -z $DRIVER_MOD ] && [ -z $GOVERNOR_MOD ]; then
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index e54f382bcb02..39fb97a8c1df 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -1,8 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_INCLUDES := $(wildcard lib/py/*.py)
+TEST_INCLUDES := $(wildcard lib/py/*.py) \
+ ../../net/net_helper.sh \
+ ../../net/lib.sh \
TEST_PROGS := \
+ netcons_basic.sh \
ping.py \
queues.py \
stats.py \
diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config
index f6a58ce8a230..a2d8af60876d 100644
--- a/tools/testing/selftests/drivers/net/config
+++ b/tools/testing/selftests/drivers/net/config
@@ -1,2 +1,6 @@
CONFIG_IPV6=y
CONFIG_NETDEVSIM=m
+CONFIG_CONFIGFS_FS=y
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_NETCONSOLE_EXTENDED_LOG=y
diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
index 026d98976c35..05b6fbb3fcdd 100755
--- a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
+++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+import errno
import time
import os
from lib.py import ksft_run, ksft_exit, ksft_pr
@@ -61,7 +62,7 @@ def test_pp_alloc(cfg, netdevnl):
try:
stats = get_stats()
except NlError as e:
- if e.nl_msg.error == -95:
+ if e.nl_msg.error == -errno.EOPNOTSUPP:
stats = {}
else:
raise
diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
index 011508ca604b..9d7adb3cf33b 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
@@ -3,7 +3,7 @@
import datetime
import random
-from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ge, ksft_lt
+from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt
from lib.py import NetDrvEpEnv
from lib.py import EthtoolFamily, NetdevFamily
from lib.py import KsftSkipEx
@@ -90,10 +90,10 @@ def _send_traffic_check(cfg, port, name, params):
ksft_ge(directed, 20000, f"traffic on {name}: " + str(cnts))
if params.get('noise'):
ksft_lt(sum(cnts[i] for i in params['noise']), directed / 2,
- "traffic on other queues:" + str(cnts))
+ f"traffic on other queues ({name})':" + str(cnts))
if params.get('empty'):
ksft_eq(sum(cnts[i] for i in params['empty']), 0,
- "traffic on inactive queues: " + str(cnts))
+ f"traffic on inactive queues ({name}): " + str(cnts))
def test_rss_key_indir(cfg):
@@ -302,6 +302,78 @@ def test_hitless_key_update(cfg):
ksft_eq(carrier1 - carrier0, 0)
+def test_rss_context_dump(cfg):
+ """
+ Test dumping RSS contexts. This tests mostly exercises the kernel APIs.
+ """
+
+ # Get a random key of the right size
+ data = get_rss(cfg)
+ if 'rss-hash-key' in data:
+ key_data = _rss_key_rand(len(data['rss-hash-key']))
+ key = _rss_key_str(key_data)
+ else:
+ key_data = []
+ key = "ba:ad"
+
+ ids = []
+ try:
+ ids.append(ethtool_create(cfg, "-X", f"context new"))
+ defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete")
+
+ ids.append(ethtool_create(cfg, "-X", f"context new weight 1 1"))
+ defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete")
+
+ ids.append(ethtool_create(cfg, "-X", f"context new hkey {key}"))
+ defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete")
+ except CmdExitFailure:
+ if not ids:
+ raise KsftSkipEx("Unable to add any contexts")
+ ksft_pr(f"Added only {len(ids)} out of 3 contexts")
+
+ expect_tuples = set([(cfg.ifname, -1)] + [(cfg.ifname, ctx_id) for ctx_id in ids])
+
+ # Dump all
+ ctxs = cfg.ethnl.rss_get({}, dump=True)
+ tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]
+ ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump")
+ ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname])
+ ksft_eq(expect_tuples, ctx_tuples)
+
+ # Sanity-check the results
+ for data in ctxs:
+ ksft_ne(set(data['indir']), {0}, "indir table is all zero")
+ ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero")
+
+ # More specific checks
+ if len(ids) > 1 and data.get('context') == ids[1]:
+ ksft_eq(set(data['indir']), {0, 1},
+ "ctx1 - indir table mismatch")
+ if len(ids) > 2 and data.get('context') == ids[2]:
+ ksft_eq(data['hkey'], bytes(key_data), "ctx2 - key mismatch")
+
+ # Ifindex filter
+ ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}, dump=True)
+ tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]
+ ctx_tuples = set(tuples)
+ ksft_eq(len(tuples), len(ctx_tuples), "duplicates in context dump")
+ ksft_eq(expect_tuples, ctx_tuples)
+
+ # Skip ctx 0
+ expect_tuples.remove((cfg.ifname, -1))
+
+ ctxs = cfg.ethnl.rss_get({'start-context': 1}, dump=True)
+ tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]
+ ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump")
+ ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname])
+ ksft_eq(expect_tuples, ctx_tuples)
+
+ # And finally both with ifindex and skip main
+ ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}, 'start-context': 1}, dump=True)
+ ctx_tuples = set([(c['header']['dev-name'], c.get('context', -1)) for c in ctxs])
+ ksft_eq(expect_tuples, ctx_tuples)
+
+
def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None):
"""
Test separating traffic into RSS contexts.
@@ -542,7 +614,7 @@ def main() -> None:
ksft_run([test_rss_key_indir, test_rss_queue_reconfigure,
test_rss_resize, test_hitless_key_update,
test_rss_context, test_rss_context4, test_rss_context32,
- test_rss_context_queue_reconfigure,
+ test_rss_context_dump, test_rss_context_queue_reconfigure,
test_rss_context_overlap, test_rss_context_overlap2,
test_rss_context_out_of_order, test_rss_context4_create_with_cfg],
args=(cfg, ))
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index a5e800b8f103..1ea9bb695e94 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -4,6 +4,7 @@ import os
import time
from pathlib import Path
from lib.py import KsftSkipEx, KsftXfailEx
+from lib.py import ksft_setup
from lib.py import cmd, ethtool, ip
from lib.py import NetNS, NetdevSimDev
from .remote import Remote
@@ -14,7 +15,7 @@ def _load_env_file(src_path):
src_dir = Path(src_path).parent.resolve()
if not (src_dir / "net.config").exists():
- return env
+ return ksft_setup(env)
with open((src_dir / "net.config").as_posix(), 'r') as fp:
for line in fp.readlines():
@@ -30,7 +31,7 @@ def _load_env_file(src_path):
if len(pair) != 2:
raise Exception("Can't parse configuration line:", full_file)
env[pair[0]] = pair[1]
- return env
+ return ksft_setup(env)
class NetDrvEnv:
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
index 877cd6df94a1..fe905a7f34b3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
@@ -2,6 +2,7 @@
# SPDX-License-Identifier: GPL-2.0
lib_dir=$(dirname $0)/../../../net/forwarding
+ethtool_lib_dir=$(dirname $0)/../hw
ALL_TESTS="
autoneg
@@ -11,7 +12,7 @@ ALL_TESTS="
NUM_NETIFS=2
: ${TIMEOUT:=30000} # ms
source $lib_dir/lib.sh
-source $lib_dir/ethtool_lib.sh
+source $ethtool_lib_dir/ethtool_lib.sh
setup_prepare()
{
diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh
new file mode 100755
index 000000000000..06021b2059b7
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_basic.sh
@@ -0,0 +1,234 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test creates two netdevsim virtual interfaces, assigns one of them (the
+# "destination interface") to a new namespace, and assigns IP addresses to both
+# interfaces.
+#
+# It listens on the destination interface using socat and configures a dynamic
+# target on netconsole, pointing to the destination IP address.
+#
+# Finally, it checks whether the message was received properly on the
+# destination interface. Note that this test may pollute the kernel log buffer
+# (dmesg) and relies on dynamic configuration and namespaces being configured.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+# Simple script to test dynamic targets in netconsole
+SRCIF="" # to be populated later
+SRCIP=192.168.1.1
+DSTIF="" # to be populated later
+DSTIP=192.168.1.2
+
+PORT="6666"
+MSG="netconsole selftest"
+TARGET=$(mktemp -u netcons_XXXXX)
+DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk)
+NETCONS_CONFIGFS="/sys/kernel/config/netconsole"
+NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}"
+# NAMESPACE will be populated by setup_ns with a random value
+NAMESPACE=""
+
+# IDs for netdevsim
+NSIM_DEV_1_ID=$((256 + RANDOM % 256))
+NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+
+# Used to create and delete namespaces
+source "${SCRIPTDIR}"/../../net/lib.sh
+source "${SCRIPTDIR}"/../../net/net_helper.sh
+
+# Create netdevsim interfaces
+create_ifaces() {
+ local NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+
+ echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW"
+ echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW"
+ udevadm settle 2> /dev/null || true
+
+ local NSIM1=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_1_ID"
+ local NSIM2=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_2_ID"
+
+ # These are global variables
+ SRCIF=$(find "$NSIM1"/net -maxdepth 1 -type d ! \
+ -path "$NSIM1"/net -exec basename {} \;)
+ DSTIF=$(find "$NSIM2"/net -maxdepth 1 -type d ! \
+ -path "$NSIM2"/net -exec basename {} \;)
+}
+
+link_ifaces() {
+ local NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device"
+ local SRCIF_IFIDX=$(cat /sys/class/net/"$SRCIF"/ifindex)
+ local DSTIF_IFIDX=$(cat /sys/class/net/"$DSTIF"/ifindex)
+
+ exec {NAMESPACE_FD}</var/run/netns/"${NAMESPACE}"
+ exec {INITNS_FD}</proc/self/ns/net
+
+ # Bind the dst interface to namespace
+ ip link set "${DSTIF}" netns "${NAMESPACE}"
+
+ # Linking one device to the other one (on the other namespace}
+ if ! echo "${INITNS_FD}:$SRCIF_IFIDX $NAMESPACE_FD:$DSTIF_IFIDX" > $NSIM_DEV_SYS_LINK
+ then
+ echo "linking netdevsim1 with netdevsim2 should succeed"
+ cleanup
+ exit "${ksft_skip}"
+ fi
+}
+
+function configure_ip() {
+ # Configure the IPs for both interfaces
+ ip netns exec "${NAMESPACE}" ip addr add "${DSTIP}"/24 dev "${DSTIF}"
+ ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" up
+
+ ip addr add "${SRCIP}"/24 dev "${SRCIF}"
+ ip link set "${SRCIF}" up
+}
+
+function set_network() {
+ # setup_ns function is coming from lib.sh
+ setup_ns NAMESPACE
+
+ # Create both interfaces, and assign the destination to a different
+ # namespace
+ create_ifaces
+
+ # Link both interfaces back to back
+ link_ifaces
+
+ configure_ip
+}
+
+function create_dynamic_target() {
+ DSTMAC=$(ip netns exec "${NAMESPACE}" \
+ ip link show "${DSTIF}" | awk '/ether/ {print $2}')
+
+ # Create a dynamic target
+ mkdir "${NETCONS_PATH}"
+
+ echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip
+ echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip
+ echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac
+ echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name
+
+ echo 1 > "${NETCONS_PATH}"/enabled
+}
+
+function cleanup() {
+ local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
+
+ # delete netconsole dynamic reconfiguration
+ echo 0 > "${NETCONS_PATH}"/enabled
+ # Remove the configfs entry
+ rmdir "${NETCONS_PATH}"
+
+ # Delete netdevsim devices
+ echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL"
+ echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL"
+
+ # this is coming from lib.sh
+ cleanup_all_ns
+
+ # Restoring printk configurations
+ echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk
+}
+
+function listen_port_and_save_to() {
+ local OUTPUT=${1}
+ # Just wait for 2 seconds
+ timeout 2 ip netns exec "${NAMESPACE}" \
+ socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}"
+}
+
+function validate_result() {
+ local TMPFILENAME="$1"
+
+ # Check if the file exists
+ if [ ! -f "$TMPFILENAME" ]; then
+ echo "FAIL: File was not generated." >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "${MSG}" "${TMPFILENAME}"; then
+ echo "FAIL: ${MSG} not found in ${TMPFILENAME}" >&2
+ cat "${TMPFILENAME}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ # Delete the file once it is validated, otherwise keep it
+ # for debugging purposes
+ rm "${TMPFILENAME}"
+ exit "${ksft_pass}"
+}
+
+function check_for_dependencies() {
+ if [ "$(id -u)" -ne 0 ]; then
+ echo "This test must be run as root" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ! which socat > /dev/null ; then
+ echo "SKIP: socat(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ! which ip > /dev/null ; then
+ echo "SKIP: ip(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ! which udevadm > /dev/null ; then
+ echo "SKIP: udevadm(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if [ ! -d "${NETCONS_CONFIGFS}" ]; then
+ echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ip link show "${DSTIF}" 2> /dev/null; then
+ echo "SKIP: interface ${DSTIF} exists in the system. Not overwriting it." >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then
+ echo "SKIP: IPs already in use. Skipping it" >&2
+ exit "${ksft_skip}"
+ fi
+}
+
+# ========== #
+# Start here #
+# ========== #
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+# Listed for netconsole port inside the namespace and destination interface
+listen_port_and_save_to "${OUTPUT_FILE}" &
+# Wait for socat to start and listen to the port.
+wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+# Send the message
+echo "${MSG}: ${TARGET}" > /dev/kmsg
+# Wait until socat saves the file to disk
+busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+
+# Make sure the message was received in the dst part
+# and exit
+validate_result "${OUTPUT_FILE}"
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
index 820b8e0a22c6..63e3c045a3b2 100755
--- a/tools/testing/selftests/drivers/net/stats.py
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -1,10 +1,13 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+import errno
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx
+from lib.py import ksft_disruptive
from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
from lib.py import NetDrvEnv
+from lib.py import ip, defer
ethnl = EthtoolFamily()
netfam = NetdevFamily()
@@ -17,7 +20,7 @@ def check_pause(cfg) -> None:
try:
ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
except NlError as e:
- if e.error == 95:
+ if e.error == errno.EOPNOTSUPP:
raise KsftXfailEx("pause not supported by the device")
raise
@@ -32,7 +35,7 @@ def check_fec(cfg) -> None:
try:
ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
except NlError as e:
- if e.error == 95:
+ if e.error == errno.EOPNOTSUPP:
raise KsftXfailEx("FEC not supported by the device")
raise
@@ -117,7 +120,7 @@ def qstat_by_ifindex(cfg) -> None:
# loopback has no stats
with ksft_raises(NlError) as cm:
netfam.qstats_get({"ifindex": 1}, dump=True)
- ksft_eq(cm.exception.nl_msg.error, -95)
+ ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP)
ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
# Try to get stats for lowest unused ifindex but not 0
@@ -133,9 +136,31 @@ def qstat_by_ifindex(cfg) -> None:
ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+@ksft_disruptive
+def check_down(cfg) -> None:
+ try:
+ qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+ except NlError as e:
+ if e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("qstats not supported by the device")
+ raise
+
+ ip(f"link set dev {cfg.dev['ifname']} down")
+ defer(ip, f"link set dev {cfg.dev['ifname']} up")
+
+ qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+ for k, v in qstat.items():
+ ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down")
+
+ # exercise per-queue API to make sure that "device down" state
+ # is handled correctly and doesn't crash
+ netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True)
+
+
def main() -> None:
with NetDrvEnv(__file__) as cfg:
- ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex],
+ ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex,
+ check_down],
args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c
index ea0cdc37b44f..7ee7492138c6 100644
--- a/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c
+++ b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c
@@ -257,12 +257,6 @@ TEST_F(attest_fixture, att_inval_addr)
att_inval_addr_test(&self->uvio_attest.meas_addr, _metadata, self);
}
-static void __attribute__((constructor)) __constructor_order_last(void)
-{
- if (!__constructor_order)
- __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD;
-}
-
int main(int argc, char **argv)
{
int fd = open(UV_PATH, O_ACCMODE);
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index 6418ded40bdd..071e03532cba 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -117,7 +117,7 @@ static int check_execveat_invoked_rc(int fd, const char *path, int flags,
}
if ((WEXITSTATUS(status) != expected_rc) &&
(WEXITSTATUS(status) != expected_rc2)) {
- ksft_print_msg("child %d exited with %d not %d nor %d\n",
+ ksft_print_msg("child %d exited with %d neither %d nor %d\n",
child, WEXITSTATUS(status), expected_rc,
expected_rc2);
ksft_test_result_fail("%s\n", test_name);
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
index e044f5fc57fd..70cb0c8b21cf 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
@@ -319,8 +319,11 @@ static void test_listmount_ns(void)
* Tell our parent how many mounts we have, and then wait for it
* to tell us we're done.
*/
- write(child_ready_pipe[1], &nr_mounts, sizeof(nr_mounts));
- read(parent_ready_pipe[0], &cval, sizeof(cval));
+ if (write(child_ready_pipe[1], &nr_mounts, sizeof(nr_mounts)) !=
+ sizeof(nr_mounts))
+ ret = NSID_ERROR;
+ if (read(parent_ready_pipe[0], &cval, sizeof(cval)) != sizeof(cval))
+ ret = NSID_ERROR;
exit(NSID_PASS);
}
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc
index c45094d1e1d2..094419e190c2 100644
--- a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc
+++ b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc
@@ -6,6 +6,18 @@ original_group=`stat -c "%g" .`
original_owner=`stat -c "%u" .`
mount_point=`stat -c '%m' .`
+
+# If stat -c '%m' does not work (e.g. busybox) or failed, try to use the
+# current working directory (which should be a tracefs) as the mount point.
+if [ ! -d "$mount_point" ]; then
+ if mount | grep -qw $PWD ; then
+ mount_point=$PWD
+ else
+ # If PWD doesn't work, that is an environmental problem.
+ exit_unresolved
+ fi
+fi
+
mount_options=`mount | grep "$mount_point" | sed -e 's/.*(\(.*\)).*/\1/'`
# find another owner and group that is not the original
@@ -83,32 +95,38 @@ run_tests() {
done
}
-mount -o remount,"$new_options" .
+# Run the tests twice as leftovers can cause issues
+for loop in 1 2 ; do
-run_tests
+ echo "Running iteration $loop"
-mount -o remount,"$mount_options" .
+ mount -o remount,"$new_options" .
-for d in "." "events" "events/sched" "events/sched/sched_switch" "events/sched/sched_switch/enable" $canary; do
- test "$d" $original_group
-done
+ run_tests
+
+ mount -o remount,"$mount_options" .
+
+ for d in "." "events" "events/sched" "events/sched/sched_switch" "events/sched/sched_switch/enable" $canary; do
+ test "$d" $original_group
+ done
# check instances as well
-chgrp $other_group instances
+ chgrp $other_group instances
-instance="$(mktemp -u test-XXXXXX)"
+ instance="$(mktemp -u test-XXXXXX)"
-mkdir instances/$instance
+ mkdir instances/$instance
-cd instances/$instance
+ cd instances/$instance
-run_tests
+ run_tests
-cd ../..
+ cd ../..
-rmdir instances/$instance
+ rmdir instances/$instance
-chgrp $original_group instances
+ chgrp $original_group instances
+done
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc
new file mode 100644
index 000000000000..a275decdc880
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc
@@ -0,0 +1,26 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove/test uprobe events
+# requires: uprobe_events
+
+echo 0 > events/enable
+echo > dynamic_events
+
+echo 'cat /proc/$$/maps' | /bin/sh | \
+ grep "r-xp .*/bin/.*sh$" | \
+ awk '{printf "p:myevent %s:0x%s\n", $6,$3 }' >> uprobe_events
+
+grep -q myevent uprobe_events
+test -d events/uprobes/myevent
+
+echo 1 > events/uprobes/myevent/enable
+echo 'ls' | /bin/sh > /dev/null
+echo 0 > events/uprobes/myevent/enable
+grep -q myevent trace
+
+echo "-:myevent" >> uprobe_events
+! grep -q myevent uprobe_events
+
+echo > uprobe_events
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
index 073a748b9380..263f6b798c85 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
@@ -19,7 +19,14 @@ fail() { # mesg
FILTER=set_ftrace_filter
FUNC1="schedule"
-FUNC2="sched_tick"
+if grep '^sched_tick\b' available_filter_functions; then
+ FUNC2="sched_tick"
+elif grep '^scheduler_tick\b' available_filter_functions; then
+ FUNC2="scheduler_tick"
+else
+ exit_unresolved
+fi
+
ALL_FUNCS="#### all functions enabled ####"
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc
index e21c9c27ece4..77f4c07cdcb8 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kprobe event char type argument
-# requires: kprobe_events
+# requires: kprobe_events available_filter_functions
case `uname -m` in
x86_64)
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index 93217d459556..39001073f7ed 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kprobe event string type argument
-# requires: kprobe_events
+# requires: kprobe_events available_filter_functions
case `uname -m` in
x86_64)
diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c
index 75b7b4ef6cfa..c4bc2aa508c3 100644
--- a/tools/testing/selftests/hid/hid_bpf.c
+++ b/tools/testing/selftests/hid/hid_bpf.c
@@ -1357,12 +1357,6 @@ static int libbpf_print_fn(enum libbpf_print_level level,
return 0;
}
-static void __attribute__((constructor)) __constructor_order_last(void)
-{
- if (!__constructor_order)
- __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD;
-}
-
int main(int argc, char **argv)
{
/* Use libbpf 1.0 API mode */
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index 6343f4053bd4..4927b9add5ad 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -825,7 +825,7 @@ TEST_F(iommufd_ioas, copy_area)
{
struct iommu_ioas_copy copy_cmd = {
.size = sizeof(copy_cmd),
- .flags = IOMMU_IOAS_MAP_FIXED_IOVA,
+ .flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE,
.dst_ioas_id = self->ioas_id,
.src_ioas_id = self->ioas_id,
.length = PAGE_SIZE,
@@ -1318,7 +1318,7 @@ TEST_F(iommufd_ioas, copy_sweep)
{
struct iommu_ioas_copy copy_cmd = {
.size = sizeof(copy_cmd),
- .flags = IOMMU_IOAS_MAP_FIXED_IOVA,
+ .flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE,
.src_ioas_id = self->ioas_id,
.dst_iova = MOCK_APERTURE_START,
.length = MOCK_PAGE_SIZE,
@@ -1608,7 +1608,7 @@ TEST_F(iommufd_mock_domain, user_copy)
};
struct iommu_ioas_copy copy_cmd = {
.size = sizeof(copy_cmd),
- .flags = IOMMU_IOAS_MAP_FIXED_IOVA,
+ .flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE,
.dst_ioas_id = self->ioas_id,
.dst_iova = MOCK_APERTURE_START,
.length = BUFFER_SIZE,
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index b8967b6e29d5..29fedf609611 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -61,6 +61,7 @@
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#endif
+#if defined(__i386__) || defined(__x86_64__) /* arch */
/*
* gcc cpuid.h provides __cpuid_count() since v4.4.
* Clang/LLVM cpuid.h provides __cpuid_count() since v3.4.0.
@@ -75,6 +76,7 @@
: "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
: "0" (level), "2" (count))
#endif
+#endif /* end arch */
/* define kselftest exit codes */
#define KSFT_PASS 0
@@ -371,15 +373,7 @@ static inline __noreturn __printf(1, 2) void ksft_exit_fail_msg(const char *msg,
static inline __noreturn void ksft_exit_fail_perror(const char *msg)
{
-#ifndef NOLIBC
ksft_exit_fail_msg("%s: %s (%d)\n", msg, strerror(errno), errno);
-#else
- /*
- * nolibc doesn't provide strerror() and it seems
- * inappropriate to add one, just print the errno.
- */
- ksft_exit_fail_msg("%s: %d)\n", msg, errno);
-#endif
}
static inline __noreturn void ksft_exit_xfail(void)
diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
index 74954f6a8f94..2c3c58e65a41 100644
--- a/tools/testing/selftests/kselftest/runner.sh
+++ b/tools/testing/selftests/kselftest/runner.sh
@@ -111,8 +111,11 @@ run_one()
stdbuf="/usr/bin/stdbuf --output=L "
fi
eval kselftest_cmd_args="\$${kselftest_cmd_args_ref:-}"
- cmd="$stdbuf ./$BASENAME_TEST $kselftest_cmd_args"
- if [ ! -x "$TEST" ]; then
+ if [ -x "$TEST" ]; then
+ cmd="$stdbuf ./$BASENAME_TEST $kselftest_cmd_args"
+ elif [ -x "./ksft_runner.sh" ]; then
+ cmd="$stdbuf ./ksft_runner.sh ./$BASENAME_TEST"
+ else
echo "# Warning: file $TEST is not executable"
if [ $(head -n 1 "$TEST" | cut -c -2) = "#!" ]
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 40723a6a083f..a5a72415e37b 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -488,12 +488,6 @@
* Use once to append a main() to the test file.
*/
#define TEST_HARNESS_MAIN \
- static void __attribute__((constructor)) \
- __constructor_order_last(void) \
- { \
- if (!__constructor_order) \
- __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; \
- } \
int main(int argc, char **argv) { \
return test_harness_run(argc, argv); \
}
@@ -824,7 +818,7 @@
item->prev = item; \
return; \
} \
- if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) { \
+ if (__constructor_order_forward) { \
item->next = NULL; \
item->prev = head->prev; \
item->prev->next = item; \
@@ -888,10 +882,7 @@ struct __test_xfail {
}
static struct __fixture_metadata *__fixture_list = &_fixture_global;
-static int __constructor_order;
-
-#define _CONSTRUCTOR_ORDER_FORWARD 1
-#define _CONSTRUCTOR_ORDER_BACKWARD -1
+static bool __constructor_order_forward;
static inline void __register_fixture(struct __fixture_metadata *f)
{
@@ -942,7 +933,7 @@ static inline bool __test_passed(struct __test_metadata *metadata)
* list so tests are run in source declaration order.
* https://gcc.gnu.org/onlinedocs/gccint/Initialization.html
* However, it seems not all toolchains do this correctly, so use
- * __constructor_order to detect which direction is called first
+ * __constructor_order_foward to detect which direction is called first
* and adjust list building logic to get things running in the right
* direction.
*/
@@ -1337,8 +1328,7 @@ static int test_harness_run(int argc, char **argv)
static void __attribute__((constructor)) __constructor_order_first(void)
{
- if (!__constructor_order)
- __constructor_order = _CONSTRUCTOR_ORDER_FORWARD;
+ __constructor_order_forward = true;
}
#endif /* __KSELFTEST_HARNESS_H */
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 48d32c5aa3eb..0c4b254ab56b 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -152,6 +152,7 @@ TEST_GEN_PROGS_x86_64 += pre_fault_memory_test
TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test
TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs
+TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases
TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
@@ -163,6 +164,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress
TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access
+TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3
TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
TEST_GEN_PROGS_aarch64 += arch_timer
TEST_GEN_PROGS_aarch64 += demand_paging_test
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c
new file mode 100644
index 000000000000..a36a7e2db434
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c
@@ -0,0 +1,1062 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arch_timer_edge_cases.c - Tests the aarch64 timer IRQ functionality.
+ *
+ * The test validates some edge cases related to the arch-timer:
+ * - timers above the max TVAL value.
+ * - timers in the past
+ * - moving counters ahead and behind pending timers.
+ * - reprograming timers.
+ * - timers fired multiple times.
+ * - masking/unmasking using the timer control mask.
+ *
+ * Copyright (c) 2021, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+#include <pthread.h>
+#include <sys/sysinfo.h>
+
+#include "arch_timer.h"
+#include "gic.h"
+#include "vgic.h"
+
+static const uint64_t CVAL_MAX = ~0ULL;
+/* tval is a signed 32-bit int. */
+static const int32_t TVAL_MAX = INT32_MAX;
+static const int32_t TVAL_MIN = INT32_MIN;
+
+/* After how much time we say there is no IRQ. */
+static const uint32_t TIMEOUT_NO_IRQ_US = 50000;
+
+/* A nice counter value to use as the starting one for most tests. */
+static const uint64_t DEF_CNT = (CVAL_MAX / 2);
+
+/* Number of runs. */
+static const uint32_t NR_TEST_ITERS_DEF = 5;
+
+/* Default wait test time in ms. */
+static const uint32_t WAIT_TEST_MS = 10;
+
+/* Default "long" wait test time in ms. */
+static const uint32_t LONG_WAIT_TEST_MS = 100;
+
+/* Shared with IRQ handler. */
+struct test_vcpu_shared_data {
+ atomic_t handled;
+ atomic_t spurious;
+} shared_data;
+
+struct test_args {
+ /* Virtual or physical timer and counter tests. */
+ enum arch_timer timer;
+ /* Delay used for most timer tests. */
+ uint64_t wait_ms;
+ /* Delay used in the test_long_timer_delays test. */
+ uint64_t long_wait_ms;
+ /* Number of iterations. */
+ int iterations;
+ /* Whether to test the physical timer. */
+ bool test_physical;
+ /* Whether to test the virtual timer. */
+ bool test_virtual;
+};
+
+struct test_args test_args = {
+ .wait_ms = WAIT_TEST_MS,
+ .long_wait_ms = LONG_WAIT_TEST_MS,
+ .iterations = NR_TEST_ITERS_DEF,
+ .test_physical = true,
+ .test_virtual = true,
+};
+
+static int vtimer_irq, ptimer_irq;
+
+enum sync_cmd {
+ SET_COUNTER_VALUE,
+ USERSPACE_USLEEP,
+ USERSPACE_SCHED_YIELD,
+ USERSPACE_MIGRATE_SELF,
+ NO_USERSPACE_CMD,
+};
+
+typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec);
+
+static void sleep_poll(enum arch_timer timer, uint64_t usec);
+static void sleep_sched_poll(enum arch_timer timer, uint64_t usec);
+static void sleep_in_userspace(enum arch_timer timer, uint64_t usec);
+static void sleep_migrate(enum arch_timer timer, uint64_t usec);
+
+sleep_method_t sleep_method[] = {
+ sleep_poll,
+ sleep_sched_poll,
+ sleep_migrate,
+ sleep_in_userspace,
+};
+
+typedef void (*irq_wait_method_t)(void);
+
+static void wait_for_non_spurious_irq(void);
+static void wait_poll_for_irq(void);
+static void wait_sched_poll_for_irq(void);
+static void wait_migrate_poll_for_irq(void);
+
+irq_wait_method_t irq_wait_method[] = {
+ wait_for_non_spurious_irq,
+ wait_poll_for_irq,
+ wait_sched_poll_for_irq,
+ wait_migrate_poll_for_irq,
+};
+
+enum timer_view {
+ TIMER_CVAL,
+ TIMER_TVAL,
+};
+
+static void assert_irqs_handled(uint32_t n)
+{
+ int h = atomic_read(&shared_data.handled);
+
+ __GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n);
+}
+
+static void userspace_cmd(uint64_t cmd)
+{
+ GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0);
+}
+
+static void userspace_migrate_vcpu(void)
+{
+ userspace_cmd(USERSPACE_MIGRATE_SELF);
+}
+
+static void userspace_sleep(uint64_t usecs)
+{
+ GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0);
+}
+
+static void set_counter(enum arch_timer timer, uint64_t counter)
+{
+ GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int intid = gic_get_and_ack_irq();
+ enum arch_timer timer;
+ uint64_t cnt, cval;
+ uint32_t ctl;
+ bool timer_condition, istatus;
+
+ if (intid == IAR_SPURIOUS) {
+ atomic_inc(&shared_data.spurious);
+ goto out;
+ }
+
+ if (intid == ptimer_irq)
+ timer = PHYSICAL;
+ else if (intid == vtimer_irq)
+ timer = VIRTUAL;
+ else
+ goto out;
+
+ ctl = timer_get_ctl(timer);
+ cval = timer_get_cval(timer);
+ cnt = timer_get_cntct(timer);
+ timer_condition = cnt >= cval;
+ istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE);
+ GUEST_ASSERT_EQ(timer_condition, istatus);
+
+ /* Disable and mask the timer. */
+ timer_set_ctl(timer, CTL_IMASK);
+
+ atomic_inc(&shared_data.handled);
+
+out:
+ gic_set_eoi(intid);
+}
+
+static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles,
+ uint32_t ctl)
+{
+ atomic_set(&shared_data.handled, 0);
+ atomic_set(&shared_data.spurious, 0);
+ timer_set_cval(timer, cval_cycles);
+ timer_set_ctl(timer, ctl);
+}
+
+static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles,
+ uint32_t ctl)
+{
+ atomic_set(&shared_data.handled, 0);
+ atomic_set(&shared_data.spurious, 0);
+ timer_set_ctl(timer, ctl);
+ timer_set_tval(timer, tval_cycles);
+}
+
+static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl,
+ enum timer_view tv)
+{
+ switch (tv) {
+ case TIMER_CVAL:
+ set_cval_irq(timer, xval, ctl);
+ break;
+ case TIMER_TVAL:
+ set_tval_irq(timer, xval, ctl);
+ break;
+ default:
+ GUEST_FAIL("Could not get timer %d", timer);
+ }
+}
+
+/*
+ * Note that this can theoretically hang forever, so we rely on having
+ * a timeout mechanism in the "runner", like:
+ * tools/testing/selftests/kselftest/runner.sh.
+ */
+static void wait_for_non_spurious_irq(void)
+{
+ int h;
+
+ local_irq_disable();
+
+ for (h = atomic_read(&shared_data.handled); h == atomic_read(&shared_data.handled);) {
+ wfi();
+ local_irq_enable();
+ isb(); /* handle IRQ */
+ local_irq_disable();
+ }
+}
+
+/*
+ * Wait for an non-spurious IRQ by polling in the guest or in
+ * userspace (e.g. userspace_cmd=USERSPACE_SCHED_YIELD).
+ *
+ * Note that this can theoretically hang forever, so we rely on having
+ * a timeout mechanism in the "runner", like:
+ * tools/testing/selftests/kselftest/runner.sh.
+ */
+static void poll_for_non_spurious_irq(enum sync_cmd usp_cmd)
+{
+ int h;
+
+ local_irq_disable();
+
+ h = atomic_read(&shared_data.handled);
+
+ local_irq_enable();
+ while (h == atomic_read(&shared_data.handled)) {
+ if (usp_cmd == NO_USERSPACE_CMD)
+ cpu_relax();
+ else
+ userspace_cmd(usp_cmd);
+ }
+ local_irq_disable();
+}
+
+static void wait_poll_for_irq(void)
+{
+ poll_for_non_spurious_irq(NO_USERSPACE_CMD);
+}
+
+static void wait_sched_poll_for_irq(void)
+{
+ poll_for_non_spurious_irq(USERSPACE_SCHED_YIELD);
+}
+
+static void wait_migrate_poll_for_irq(void)
+{
+ poll_for_non_spurious_irq(USERSPACE_MIGRATE_SELF);
+}
+
+/*
+ * Sleep for usec microseconds by polling in the guest or in
+ * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE).
+ */
+static void guest_poll(enum arch_timer test_timer, uint64_t usec,
+ enum sync_cmd usp_cmd)
+{
+ uint64_t cycles = usec_to_cycles(usec);
+ /* Whichever timer we are testing with, sleep with the other. */
+ enum arch_timer sleep_timer = 1 - test_timer;
+ uint64_t start = timer_get_cntct(sleep_timer);
+
+ while ((timer_get_cntct(sleep_timer) - start) < cycles) {
+ if (usp_cmd == NO_USERSPACE_CMD)
+ cpu_relax();
+ else
+ userspace_cmd(usp_cmd);
+ }
+}
+
+static void sleep_poll(enum arch_timer timer, uint64_t usec)
+{
+ guest_poll(timer, usec, NO_USERSPACE_CMD);
+}
+
+static void sleep_sched_poll(enum arch_timer timer, uint64_t usec)
+{
+ guest_poll(timer, usec, USERSPACE_SCHED_YIELD);
+}
+
+static void sleep_migrate(enum arch_timer timer, uint64_t usec)
+{
+ guest_poll(timer, usec, USERSPACE_MIGRATE_SELF);
+}
+
+static void sleep_in_userspace(enum arch_timer timer, uint64_t usec)
+{
+ userspace_sleep(usec);
+}
+
+/*
+ * Reset the timer state to some nice values like the counter not being close
+ * to the edge, and the control register masked and disabled.
+ */
+static void reset_timer_state(enum arch_timer timer, uint64_t cnt)
+{
+ set_counter(timer, cnt);
+ timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_timer_xval(enum arch_timer timer, uint64_t xval,
+ enum timer_view tv, irq_wait_method_t wm, bool reset_state,
+ uint64_t reset_cnt)
+{
+ local_irq_disable();
+
+ if (reset_state)
+ reset_timer_state(timer, reset_cnt);
+
+ set_xval_irq(timer, xval, CTL_ENABLE, tv);
+
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ assert_irqs_handled(1);
+ local_irq_enable();
+}
+
+/*
+ * The test_timer_* functions will program the timer, wait for it, and assert
+ * the firing of the correct IRQ.
+ *
+ * These functions don't have a timeout and return as soon as they receive an
+ * IRQ. They can hang (forever), so we rely on having a timeout mechanism in
+ * the "runner", like: tools/testing/selftests/kselftest/runner.sh.
+ */
+
+static void test_timer_cval(enum arch_timer timer, uint64_t cval,
+ irq_wait_method_t wm, bool reset_state,
+ uint64_t reset_cnt)
+{
+ test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt);
+}
+
+static void test_timer_tval(enum arch_timer timer, int32_t tval,
+ irq_wait_method_t wm, bool reset_state,
+ uint64_t reset_cnt)
+{
+ test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state,
+ reset_cnt);
+}
+
+static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval,
+ uint64_t usec, enum timer_view timer_view,
+ sleep_method_t guest_sleep)
+{
+ local_irq_disable();
+
+ set_xval_irq(timer, xval, CTL_ENABLE | CTL_IMASK, timer_view);
+ guest_sleep(timer, usec);
+
+ local_irq_enable();
+ isb();
+
+ /* Assume success (no IRQ) after waiting usec microseconds */
+ assert_irqs_handled(0);
+}
+
+static void test_cval_no_irq(enum arch_timer timer, uint64_t cval,
+ uint64_t usec, sleep_method_t wm)
+{
+ test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm);
+}
+
+static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec,
+ sleep_method_t wm)
+{
+ /* tval will be cast to an int32_t in test_xval_check_no_irq */
+ test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm);
+}
+
+/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */
+static void test_timer_control_mask_then_unmask(enum arch_timer timer)
+{
+ reset_timer_state(timer, DEF_CNT);
+ set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
+
+ /* Unmask the timer, and then get an IRQ. */
+ local_irq_disable();
+ timer_set_ctl(timer, CTL_ENABLE);
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wait_for_non_spurious_irq();
+
+ assert_irqs_handled(1);
+ local_irq_enable();
+}
+
+/* Check that timer control masks actually mask a timer being fired. */
+static void test_timer_control_masks(enum arch_timer timer)
+{
+ reset_timer_state(timer, DEF_CNT);
+
+ /* Local IRQs are not masked at this point. */
+
+ set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
+
+ /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
+ sleep_poll(timer, TIMEOUT_NO_IRQ_US);
+
+ assert_irqs_handled(0);
+ timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_fire_a_timer_multiple_times(enum arch_timer timer,
+ irq_wait_method_t wm, int num)
+{
+ int i;
+
+ local_irq_disable();
+ reset_timer_state(timer, DEF_CNT);
+
+ set_tval_irq(timer, 0, CTL_ENABLE);
+
+ for (i = 1; i <= num; i++) {
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ /* The IRQ handler masked and disabled the timer.
+ * Enable and unmmask it again.
+ */
+ timer_set_ctl(timer, CTL_ENABLE);
+
+ assert_irqs_handled(i);
+ }
+
+ local_irq_enable();
+}
+
+static void test_timers_fired_multiple_times(enum arch_timer timer)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++)
+ test_fire_a_timer_multiple_times(timer, irq_wait_method[i], 10);
+}
+
+/*
+ * Set a timer for tval=delta_1_ms then reprogram it to
+ * tval=delta_2_ms. Check that we get the timer fired. There is no
+ * timeout for the wait: we use the wfi instruction.
+ */
+static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm,
+ int32_t delta_1_ms, int32_t delta_2_ms)
+{
+ local_irq_disable();
+ reset_timer_state(timer, DEF_CNT);
+
+ /* Program the timer to DEF_CNT + delta_1_ms. */
+ set_tval_irq(timer, msec_to_cycles(delta_1_ms), CTL_ENABLE);
+
+ /* Reprogram the timer to DEF_CNT + delta_2_ms. */
+ timer_set_tval(timer, msec_to_cycles(delta_2_ms));
+
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ /* The IRQ should arrive at DEF_CNT + delta_2_ms (or after). */
+ GUEST_ASSERT(timer_get_cntct(timer) >=
+ DEF_CNT + msec_to_cycles(delta_2_ms));
+
+ local_irq_enable();
+ assert_irqs_handled(1);
+};
+
+static void test_reprogram_timers(enum arch_timer timer)
+{
+ int i;
+ uint64_t base_wait = test_args.wait_ms;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ /*
+ * Ensure reprogramming works whether going from a
+ * longer time to a shorter or vice versa.
+ */
+ test_reprogramming_timer(timer, irq_wait_method[i], 2 * base_wait,
+ base_wait);
+ test_reprogramming_timer(timer, irq_wait_method[i], base_wait,
+ 2 * base_wait);
+ }
+}
+
+static void test_basic_functionality(enum arch_timer timer)
+{
+ int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms);
+ uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ irq_wait_method_t wm = irq_wait_method[i];
+
+ test_timer_cval(timer, cval, wm, true, DEF_CNT);
+ test_timer_tval(timer, tval, wm, true, DEF_CNT);
+ }
+}
+
+/*
+ * This test checks basic timer behavior without actually firing timers, things
+ * like: the relationship between cval and tval, tval down-counting.
+ */
+static void timers_sanity_checks(enum arch_timer timer, bool use_sched)
+{
+ reset_timer_state(timer, DEF_CNT);
+
+ local_irq_disable();
+
+ /* cval in the past */
+ timer_set_cval(timer,
+ timer_get_cntct(timer) -
+ msec_to_cycles(test_args.wait_ms));
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_tval(timer) < 0);
+
+ /* tval in the past */
+ timer_set_tval(timer, -1);
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_cval(timer) < timer_get_cntct(timer));
+
+ /* tval larger than TVAL_MAX. This requires programming with
+ * timer_set_cval instead so the value is expressible
+ */
+ timer_set_cval(timer,
+ timer_get_cntct(timer) + TVAL_MAX +
+ msec_to_cycles(test_args.wait_ms));
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_tval(timer) <= 0);
+
+ /*
+ * tval larger than 2 * TVAL_MAX.
+ * Twice the TVAL_MAX completely loops around the TVAL.
+ */
+ timer_set_cval(timer,
+ timer_get_cntct(timer) + 2ULL * TVAL_MAX +
+ msec_to_cycles(test_args.wait_ms));
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_tval(timer) <=
+ msec_to_cycles(test_args.wait_ms));
+
+ /* negative tval that rollovers from 0. */
+ set_counter(timer, msec_to_cycles(1));
+ timer_set_tval(timer, -1 * msec_to_cycles(test_args.wait_ms));
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_cval(timer) >= (CVAL_MAX - msec_to_cycles(test_args.wait_ms)));
+
+ /* tval should keep down-counting from 0 to -1. */
+ timer_set_tval(timer, 0);
+ sleep_poll(timer, 1);
+ GUEST_ASSERT(timer_get_tval(timer) < 0);
+
+ local_irq_enable();
+
+ /* Mask and disable any pending timer. */
+ timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_timers_sanity_checks(enum arch_timer timer)
+{
+ timers_sanity_checks(timer, false);
+ /* Check how KVM saves/restores these edge-case values. */
+ timers_sanity_checks(timer, true);
+}
+
+static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t wm)
+{
+ local_irq_disable();
+ reset_timer_state(timer, DEF_CNT);
+
+ set_cval_irq(timer,
+ (uint64_t) TVAL_MAX +
+ msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE);
+
+ set_counter(timer, TVAL_MAX);
+
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ assert_irqs_handled(1);
+ local_irq_enable();
+}
+
+/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */
+static void test_timers_above_tval_max(enum arch_timer timer)
+{
+ uint64_t cval;
+ int i;
+
+ /*
+ * Test that the system is not implementing cval in terms of
+ * tval. If that was the case, setting a cval to "cval = now
+ * + TVAL_MAX + wait_ms" would wrap to "cval = now +
+ * wait_ms", and the timer would fire immediately. Test that it
+ * doesn't.
+ */
+ for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+ reset_timer_state(timer, DEF_CNT);
+ cval = timer_get_cntct(timer) + TVAL_MAX +
+ msec_to_cycles(test_args.wait_ms);
+ test_cval_no_irq(timer, cval,
+ msecs_to_usecs(test_args.wait_ms) +
+ TIMEOUT_NO_IRQ_US, sleep_method[i]);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ /* Get the IRQ by moving the counter forward. */
+ test_set_cnt_after_tval_max(timer, irq_wait_method[i]);
+ }
+}
+
+/*
+ * Template function to be used by the test_move_counter_ahead_* tests. It
+ * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
+ * then waits for an IRQ.
+ */
+static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1,
+ uint64_t xval, uint64_t cnt_2,
+ irq_wait_method_t wm, enum timer_view tv)
+{
+ local_irq_disable();
+
+ set_counter(timer, cnt_1);
+ timer_set_ctl(timer, CTL_IMASK);
+
+ set_xval_irq(timer, xval, CTL_ENABLE, tv);
+ set_counter(timer, cnt_2);
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ assert_irqs_handled(1);
+ local_irq_enable();
+}
+
+/*
+ * Template function to be used by the test_move_counter_ahead_* tests. It
+ * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
+ * then waits for an IRQ.
+ */
+static void test_set_cnt_after_xval_no_irq(enum arch_timer timer,
+ uint64_t cnt_1, uint64_t xval,
+ uint64_t cnt_2,
+ sleep_method_t guest_sleep,
+ enum timer_view tv)
+{
+ local_irq_disable();
+
+ set_counter(timer, cnt_1);
+ timer_set_ctl(timer, CTL_IMASK);
+
+ set_xval_irq(timer, xval, CTL_ENABLE, tv);
+ set_counter(timer, cnt_2);
+ guest_sleep(timer, TIMEOUT_NO_IRQ_US);
+
+ local_irq_enable();
+ isb();
+
+ /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
+ assert_irqs_handled(0);
+ timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1,
+ int32_t tval, uint64_t cnt_2,
+ irq_wait_method_t wm)
+{
+ test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL);
+}
+
+static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1,
+ uint64_t cval, uint64_t cnt_2,
+ irq_wait_method_t wm)
+{
+ test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL);
+}
+
+static void test_set_cnt_after_tval_no_irq(enum arch_timer timer,
+ uint64_t cnt_1, int32_t tval,
+ uint64_t cnt_2, sleep_method_t wm)
+{
+ test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm,
+ TIMER_TVAL);
+}
+
+static void test_set_cnt_after_cval_no_irq(enum arch_timer timer,
+ uint64_t cnt_1, uint64_t cval,
+ uint64_t cnt_2, sleep_method_t wm)
+{
+ test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm,
+ TIMER_CVAL);
+}
+
+/* Set a timer and then move the counter ahead of it. */
+static void test_move_counters_ahead_of_timers(enum arch_timer timer)
+{
+ int i;
+ int32_t tval;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ irq_wait_method_t wm = irq_wait_method[i];
+
+ test_set_cnt_after_cval(timer, 0, DEF_CNT, DEF_CNT + 1, wm);
+ test_set_cnt_after_cval(timer, CVAL_MAX, 1, 2, wm);
+
+ /* Move counter ahead of negative tval. */
+ test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm);
+ test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm);
+ tval = TVAL_MAX;
+ test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1,
+ wm);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+ sleep_method_t sm = sleep_method[i];
+
+ test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm);
+ }
+}
+
+/*
+ * Program a timer, mask it, and then change the tval or counter to cancel it.
+ * Unmask it and check that nothing fires.
+ */
+static void test_move_counters_behind_timers(enum arch_timer timer)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+ sleep_method_t sm = sleep_method[i];
+
+ test_set_cnt_after_cval_no_irq(timer, DEF_CNT, DEF_CNT - 1, 0,
+ sm);
+ test_set_cnt_after_tval_no_irq(timer, DEF_CNT, -1, 0, sm);
+ }
+}
+
+static void test_timers_in_the_past(enum arch_timer timer)
+{
+ int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms);
+ uint64_t cval;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ irq_wait_method_t wm = irq_wait_method[i];
+
+ /* set a timer wait_ms the past. */
+ cval = DEF_CNT - msec_to_cycles(test_args.wait_ms);
+ test_timer_cval(timer, cval, wm, true, DEF_CNT);
+ test_timer_tval(timer, tval, wm, true, DEF_CNT);
+
+ /* Set a timer to counter=0 (in the past) */
+ test_timer_cval(timer, 0, wm, true, DEF_CNT);
+
+ /* Set a time for tval=0 (now) */
+ test_timer_tval(timer, 0, wm, true, DEF_CNT);
+
+ /* Set a timer to as far in the past as possible */
+ test_timer_tval(timer, TVAL_MIN, wm, true, DEF_CNT);
+ }
+
+ /*
+ * Set the counter to wait_ms, and a tval to -wait_ms. There should be no
+ * IRQ as that tval means cval=CVAL_MAX-wait_ms.
+ */
+ for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+ sleep_method_t sm = sleep_method[i];
+
+ set_counter(timer, msec_to_cycles(test_args.wait_ms));
+ test_tval_no_irq(timer, tval, TIMEOUT_NO_IRQ_US, sm);
+ }
+}
+
+static void test_long_timer_delays(enum arch_timer timer)
+{
+ int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms);
+ uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ irq_wait_method_t wm = irq_wait_method[i];
+
+ test_timer_cval(timer, cval, wm, true, DEF_CNT);
+ test_timer_tval(timer, tval, wm, true, DEF_CNT);
+ }
+}
+
+static void guest_run_iteration(enum arch_timer timer)
+{
+ test_basic_functionality(timer);
+ test_timers_sanity_checks(timer);
+
+ test_timers_above_tval_max(timer);
+ test_timers_in_the_past(timer);
+
+ test_move_counters_ahead_of_timers(timer);
+ test_move_counters_behind_timers(timer);
+ test_reprogram_timers(timer);
+
+ test_timers_fired_multiple_times(timer);
+
+ test_timer_control_mask_then_unmask(timer);
+ test_timer_control_masks(timer);
+}
+
+static void guest_code(enum arch_timer timer)
+{
+ int i;
+
+ local_irq_disable();
+
+ gic_init(GIC_V3, 1);
+
+ timer_set_ctl(VIRTUAL, CTL_IMASK);
+ timer_set_ctl(PHYSICAL, CTL_IMASK);
+
+ gic_irq_enable(vtimer_irq);
+ gic_irq_enable(ptimer_irq);
+ local_irq_enable();
+
+ for (i = 0; i < test_args.iterations; i++) {
+ GUEST_SYNC(i);
+ guest_run_iteration(timer);
+ }
+
+ test_long_timer_delays(timer);
+ GUEST_DONE();
+}
+
+static uint32_t next_pcpu(void)
+{
+ uint32_t max = get_nprocs();
+ uint32_t cur = sched_getcpu();
+ uint32_t next = cur;
+ cpu_set_t cpuset;
+
+ TEST_ASSERT(max > 1, "Need at least two physical cpus");
+
+ sched_getaffinity(0, sizeof(cpuset), &cpuset);
+
+ do {
+ next = (next + 1) % CPU_SETSIZE;
+ } while (!CPU_ISSET(next, &cpuset));
+
+ return next;
+}
+
+static void migrate_self(uint32_t new_pcpu)
+{
+ int ret;
+ cpu_set_t cpuset;
+ pthread_t thread;
+
+ thread = pthread_self();
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(new_pcpu, &cpuset);
+
+ pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu);
+
+ ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
+
+ TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n",
+ new_pcpu, ret);
+}
+
+static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt,
+ enum arch_timer timer)
+{
+ if (timer == PHYSICAL)
+ vcpu_set_reg(vcpu, KVM_REG_ARM_PTIMER_CNT, cnt);
+ else
+ vcpu_set_reg(vcpu, KVM_REG_ARM_TIMER_CNT, cnt);
+}
+
+static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
+{
+ enum sync_cmd cmd = uc->args[1];
+ uint64_t val = uc->args[2];
+ enum arch_timer timer = uc->args[3];
+
+ switch (cmd) {
+ case SET_COUNTER_VALUE:
+ kvm_set_cntxct(vcpu, val, timer);
+ break;
+ case USERSPACE_USLEEP:
+ usleep(val);
+ break;
+ case USERSPACE_SCHED_YIELD:
+ sched_yield();
+ break;
+ case USERSPACE_MIGRATE_SELF:
+ migrate_self(next_pcpu());
+ break;
+ default:
+ break;
+ }
+}
+
+static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ /* Start on CPU 0 */
+ migrate_self(0);
+
+ while (true) {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ handle_sync(vcpu, &uc);
+ break;
+ case UCALL_DONE:
+ goto out;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ goto out;
+ default:
+ TEST_FAIL("Unexpected guest exit\n");
+ }
+ }
+
+ out:
+ return;
+}
+
+static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
+ KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
+ vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
+ KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
+
+ sync_global_to_guest(vm, ptimer_irq);
+ sync_global_to_guest(vm, vtimer_irq);
+
+ pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
+}
+
+static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
+ enum arch_timer timer)
+{
+ *vm = vm_create_with_one_vcpu(vcpu, guest_code);
+ TEST_ASSERT(*vm, "Failed to create the test VM\n");
+
+ vm_init_descriptor_tables(*vm);
+ vm_install_exception_handler(*vm, VECTOR_IRQ_CURRENT,
+ guest_irq_handler);
+
+ vcpu_init_descriptor_tables(*vcpu);
+ vcpu_args_set(*vcpu, 1, timer);
+
+ test_init_timer_irq(*vm, *vcpu);
+ vgic_v3_setup(*vm, 1, 64);
+ sync_global_to_guest(*vm, test_args);
+}
+
+static void test_print_help(char *name)
+{
+ pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n"
+ , name);
+ pr_info("\t-i: Number of iterations (default: %u)\n",
+ NR_TEST_ITERS_DEF);
+ pr_info("\t-b: Test both physical and virtual timers (default: true)\n");
+ pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n",
+ LONG_WAIT_TEST_MS);
+ pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n",
+ WAIT_TEST_MS);
+ pr_info("\t-p: Test physical timer (default: true)\n");
+ pr_info("\t-v: Test virtual timer (default: true)\n");
+ pr_info("\t-h: Print this help message\n");
+}
+
+static bool parse_args(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "bhi:l:pvw:")) != -1) {
+ switch (opt) {
+ case 'b':
+ test_args.test_physical = true;
+ test_args.test_virtual = true;
+ break;
+ case 'i':
+ test_args.iterations =
+ atoi_positive("Number of iterations", optarg);
+ break;
+ case 'l':
+ test_args.long_wait_ms =
+ atoi_positive("Long wait time", optarg);
+ break;
+ case 'p':
+ test_args.test_physical = true;
+ test_args.test_virtual = false;
+ break;
+ case 'v':
+ test_args.test_virtual = true;
+ test_args.test_physical = false;
+ break;
+ case 'w':
+ test_args.wait_ms = atoi_positive("Wait time", optarg);
+ break;
+ case 'h':
+ default:
+ goto err;
+ }
+ }
+
+ return true;
+
+ err:
+ test_print_help(argv[0]);
+ return false;
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ if (!parse_args(argc, argv))
+ exit(KSFT_SKIP);
+
+ if (test_args.test_virtual) {
+ test_vm_create(&vm, &vcpu, VIRTUAL);
+ test_run(vm, vcpu);
+ kvm_vm_free(vm);
+ }
+
+ if (test_args.test_physical) {
+ test_vm_create(&vm, &vcpu, PHYSICAL);
+ test_run(vm, vcpu);
+ kvm_vm_free(vm);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index 4abebde78187..d43fb3f49050 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -40,6 +40,18 @@ static struct feature_id_reg feat_id_regs[] = {
ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
8,
1
+ },
+ {
+ ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ 16,
+ 1
+ },
+ {
+ ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ 16,
+ 1
}
};
@@ -468,6 +480,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */
ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */
ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */
+ ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */
ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */
ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */
ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */
@@ -475,6 +488,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */
ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */
ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */
+ ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */
ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */
diff --git a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c
new file mode 100644
index 000000000000..943d65fc6b0b
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Check that, on a GICv3 system, not configuring GICv3 correctly
+// results in all of the sysregs generating an UNDEF exception.
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+static volatile bool handled;
+
+#define __check_sr_read(r) \
+ ({ \
+ uint64_t val; \
+ \
+ handled = false; \
+ dsb(sy); \
+ val = read_sysreg_s(SYS_ ## r); \
+ val; \
+ })
+
+#define __check_sr_write(r) \
+ do { \
+ handled = false; \
+ dsb(sy); \
+ write_sysreg_s(0, SYS_ ## r); \
+ isb(); \
+ } while(0)
+
+/* Fatal checks */
+#define check_sr_read(r) \
+ do { \
+ __check_sr_read(r); \
+ __GUEST_ASSERT(handled, #r " no read trap"); \
+ } while(0)
+
+#define check_sr_write(r) \
+ do { \
+ __check_sr_write(r); \
+ __GUEST_ASSERT(handled, #r " no write trap"); \
+ } while(0)
+
+#define check_sr_rw(r) \
+ do { \
+ check_sr_read(r); \
+ check_sr_write(r); \
+ } while(0)
+
+static void guest_code(void)
+{
+ uint64_t val;
+
+ /*
+ * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
+ * hidden the feature at runtime without any other userspace action.
+ */
+ __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC),
+ read_sysreg(id_aa64pfr0_el1)) == 0,
+ "GICv3 wrongly advertised");
+
+ /*
+ * Access all GICv3 registers, and fail if we don't get an UNDEF.
+ * Note that we happily access all the APxRn registers without
+ * checking their existance, as all we want to see is a failure.
+ */
+ check_sr_rw(ICC_PMR_EL1);
+ check_sr_read(ICC_IAR0_EL1);
+ check_sr_write(ICC_EOIR0_EL1);
+ check_sr_rw(ICC_HPPIR0_EL1);
+ check_sr_rw(ICC_BPR0_EL1);
+ check_sr_rw(ICC_AP0R0_EL1);
+ check_sr_rw(ICC_AP0R1_EL1);
+ check_sr_rw(ICC_AP0R2_EL1);
+ check_sr_rw(ICC_AP0R3_EL1);
+ check_sr_rw(ICC_AP1R0_EL1);
+ check_sr_rw(ICC_AP1R1_EL1);
+ check_sr_rw(ICC_AP1R2_EL1);
+ check_sr_rw(ICC_AP1R3_EL1);
+ check_sr_write(ICC_DIR_EL1);
+ check_sr_read(ICC_RPR_EL1);
+ check_sr_write(ICC_SGI1R_EL1);
+ check_sr_write(ICC_ASGI1R_EL1);
+ check_sr_write(ICC_SGI0R_EL1);
+ check_sr_read(ICC_IAR1_EL1);
+ check_sr_write(ICC_EOIR1_EL1);
+ check_sr_rw(ICC_HPPIR1_EL1);
+ check_sr_rw(ICC_BPR1_EL1);
+ check_sr_rw(ICC_CTLR_EL1);
+ check_sr_rw(ICC_IGRPEN0_EL1);
+ check_sr_rw(ICC_IGRPEN1_EL1);
+
+ /*
+ * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can
+ * be RAO/WI. Engage in non-fatal accesses, starting with a
+ * write of 0 to try and disable SRE, and let's see if it
+ * sticks.
+ */
+ __check_sr_write(ICC_SRE_EL1);
+ if (!handled)
+ GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n");
+
+ val = __check_sr_read(ICC_SRE_EL1);
+ if (!handled) {
+ __GUEST_ASSERT((val & BIT(0)),
+ "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n");
+ GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n");
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_undef_handler(struct ex_regs *regs)
+{
+ /* Success, we've gracefully exploded! */
+ handled = true;
+ regs->pc += 4;
+}
+
+static void test_run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ do {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ printf("%s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ } while (uc.cmd != UCALL_DONE);
+}
+
+static void test_guest_no_gicv3(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /* Create a VM without a GICv3 */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_UNKNOWN, guest_undef_handler);
+
+ test_run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint64_t pfr0;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &pfr0);
+ __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0),
+ "GICv3 not supported.");
+ kvm_vm_free(vm);
+
+ test_guest_no_gicv3();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
index d20981663831..2a3fe7914b72 100644
--- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
@@ -126,6 +126,7 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0),
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
index a51dbd2a5f84..f4ac28d53747 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -269,13 +269,12 @@ static void guest_inject(struct test_args *args,
KVM_INJECT_MULTI(cmd, first_intid, num);
while (irq_handled < num) {
- asm volatile("wfi\n"
- "msr daifclr, #2\n"
- /* handle IRQ */
- "msr daifset, #2\n"
- : : : "memory");
+ wfi();
+ local_irq_enable();
+ isb(); /* handle IRQ */
+ local_irq_disable();
}
- asm volatile("msr daifclr, #2" : : : "memory");
+ local_irq_enable();
GUEST_ASSERT_EQ(irq_handled, num);
for (i = first_intid; i < num + first_intid; i++)
diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
index b3e97525cb55..bf461de34785 100644
--- a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
+++ b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
@@ -79,7 +79,7 @@ static inline uint64_t timer_get_cval(enum arch_timer timer)
return 0;
}
-static inline void timer_set_tval(enum arch_timer timer, uint32_t tval)
+static inline void timer_set_tval(enum arch_timer timer, int32_t tval)
{
switch (timer) {
case VIRTUAL:
@@ -95,6 +95,22 @@ static inline void timer_set_tval(enum arch_timer timer, uint32_t tval)
isb();
}
+static inline int32_t timer_get_tval(enum arch_timer timer)
+{
+ isb();
+ switch (timer) {
+ case VIRTUAL:
+ return read_sysreg(cntv_tval_el0);
+ case PHYSICAL:
+ return read_sysreg(cntp_tval_el0);
+ default:
+ GUEST_FAIL("Could not get timer %d\n", timer);
+ }
+
+ /* We should not reach here */
+ return 0;
+}
+
static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
{
switch (timer) {
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index 9b20a355d81a..de977d131082 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -243,4 +243,7 @@ void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
uint64_t arg6, struct arm_smccc_res *res);
+/* Execute a Wait For Interrupt instruction. */
+void wfi(void);
+
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 0ac7cc89f38c..fe4dc3693112 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -639,3 +639,9 @@ void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
sparsebit_set_num(vm->vpages_valid, 0,
(1ULL << vm->va_bits) >> vm->page_shift);
}
+
+/* Helper to call wfi instruction. */
+void wfi(void)
+{
+ asm volatile("wfi");
+}
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index ee71fc99d5b5..c52fe3ad8e98 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -4,6 +4,5 @@
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
all:
-TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh scanf.sh strscpy.sh
-
+TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh scanf.sh
include ../lib.mk
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index 645839b50b0a..dc15aba8d0a3 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -2,5 +2,4 @@ CONFIG_TEST_PRINTF=m
CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_PRIME_NUMBERS=m
-CONFIG_TEST_STRSCPY=m
CONFIG_TEST_BITOPS=m
diff --git a/tools/testing/selftests/lib/strscpy.sh b/tools/testing/selftests/lib/strscpy.sh
deleted file mode 100755
index be60ef6e1a7f..000000000000
--- a/tools/testing/selftests/lib/strscpy.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0+
-$(dirname $0)/../kselftest/module.sh "strscpy*" test_strscpy
diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh
index 65c9c058458d..bd13257bfdfe 100755
--- a/tools/testing/selftests/livepatch/test-livepatch.sh
+++ b/tools/testing/selftests/livepatch/test-livepatch.sh
@@ -139,11 +139,8 @@ load_lp $MOD_REPLACE replace=1
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
-mods=(/sys/kernel/livepatch/*)
-nmods=${#mods[@]}
-if [ "$nmods" -ne 1 ]; then
- die "Expecting only one moduled listed, found $nmods"
-fi
+loop_until 'mods=(/sys/kernel/livepatch/*); nmods=${#mods[@]}; [[ "$nmods" -eq 1 ]]' ||
+ die "Expecting only one moduled listed, found $nmods"
# These modules were disabled by the atomic replace
for mod in $MOD_LIVEPATCH3 $MOD_LIVEPATCH2 $MOD_LIVEPATCH1; do
diff --git a/tools/testing/selftests/lsm/lsm_list_modules_test.c b/tools/testing/selftests/lsm/lsm_list_modules_test.c
index 06d24d4679a6..1cc8a977c711 100644
--- a/tools/testing/selftests/lsm/lsm_list_modules_test.c
+++ b/tools/testing/selftests/lsm/lsm_list_modules_test.c
@@ -128,6 +128,9 @@ TEST(correct_lsm_list_modules)
case LSM_ID_EVM:
name = "evm";
break;
+ case LSM_ID_IPE:
+ name = "ipe";
+ break;
default:
name = "INVALID";
break;
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index cfad627e8d94..e10b87376fde 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -90,6 +90,7 @@ CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_pr
CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie)
VMTARGETS := protection_keys
+VMTARGETS += pkey_sighandler_tests
BINARIES_32 := $(VMTARGETS:%=%_32)
BINARIES_64 := $(VMTARGETS:%=%_64)
@@ -106,7 +107,7 @@ TEST_GEN_FILES += $(BINARIES_64)
endif
else
-ifneq (,$(findstring $(ARCH),powerpc))
+ifneq (,$(filter $(ARCH),arm64 powerpc))
TEST_GEN_FILES += protection_keys
endif
diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c
index a818f010de47..bfcea5cf9a48 100644
--- a/tools/testing/selftests/mm/mseal_test.c
+++ b/tools/testing/selftests/mm/mseal_test.c
@@ -81,17 +81,6 @@ static int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
return sret;
}
-static void *sys_mmap(void *addr, unsigned long len, unsigned long prot,
- unsigned long flags, unsigned long fd, unsigned long offset)
-{
- void *sret;
-
- errno = 0;
- sret = (void *) syscall(__NR_mmap, addr, len, prot,
- flags, fd, offset);
- return sret;
-}
-
static int sys_munmap(void *ptr, size_t size)
{
int sret;
@@ -172,7 +161,7 @@ static void setup_single_address(int size, void **ptrOut)
{
void *ptr;
- ptr = sys_mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ ptr = mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
*ptrOut = ptr;
}
@@ -181,7 +170,7 @@ static void setup_single_address_rw(int size, void **ptrOut)
void *ptr;
unsigned long mapflags = MAP_ANONYMOUS | MAP_PRIVATE;
- ptr = sys_mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0);
+ ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0);
*ptrOut = ptr;
}
@@ -205,7 +194,7 @@ bool seal_support(void)
void *ptr;
unsigned long page_size = getpagesize();
- ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (ptr == (void *) -1)
return false;
@@ -481,8 +470,8 @@ static void test_seal_zero_address(void)
int prot;
/* use mmap to change protection. */
- ptr = sys_mmap(0, size, PROT_NONE,
- MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ptr = mmap(0, size, PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
FAIL_TEST_IF_FALSE(ptr == 0);
size = get_vma_size(ptr, &prot);
@@ -1209,8 +1198,8 @@ static void test_seal_mmap_overwrite_prot(bool seal)
}
/* use mmap to change protection. */
- ret2 = sys_mmap(ptr, size, PROT_NONE,
- MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ret2 = mmap(ptr, size, PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
if (seal) {
FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
FAIL_TEST_IF_FALSE(errno == EPERM);
@@ -1240,8 +1229,8 @@ static void test_seal_mmap_expand(bool seal)
}
/* use mmap to expand. */
- ret2 = sys_mmap(ptr, size, PROT_READ,
- MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ret2 = mmap(ptr, size, PROT_READ,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
if (seal) {
FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
FAIL_TEST_IF_FALSE(errno == EPERM);
@@ -1268,8 +1257,8 @@ static void test_seal_mmap_shrink(bool seal)
}
/* use mmap to shrink. */
- ret2 = sys_mmap(ptr, 8 * page_size, PROT_READ,
- MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ret2 = mmap(ptr, 8 * page_size, PROT_READ,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
if (seal) {
FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
FAIL_TEST_IF_FALSE(errno == EPERM);
@@ -1650,7 +1639,7 @@ static void test_seal_discard_ro_anon_on_filebacked(bool seal)
ret = fallocate(fd, 0, 0, size);
FAIL_TEST_IF_FALSE(!ret);
- ptr = sys_mmap(NULL, size, PROT_READ, mapflags, fd, 0);
+ ptr = mmap(NULL, size, PROT_READ, mapflags, fd, 0);
FAIL_TEST_IF_FALSE(ptr != MAP_FAILED);
if (seal) {
@@ -1680,7 +1669,7 @@ static void test_seal_discard_ro_anon_on_shared(bool seal)
int ret;
unsigned long mapflags = MAP_ANONYMOUS | MAP_SHARED;
- ptr = sys_mmap(NULL, size, PROT_READ, mapflags, -1, 0);
+ ptr = mmap(NULL, size, PROT_READ, mapflags, -1, 0);
FAIL_TEST_IF_FALSE(ptr != (void *)-1);
if (seal) {
diff --git a/tools/testing/selftests/mm/pkey-arm64.h b/tools/testing/selftests/mm/pkey-arm64.h
new file mode 100644
index 000000000000..580e1b0bb38e
--- /dev/null
+++ b/tools/testing/selftests/mm/pkey-arm64.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Arm Ltd.
+ */
+
+#ifndef _PKEYS_ARM64_H
+#define _PKEYS_ARM64_H
+
+#include "vm_util.h"
+/* for signal frame parsing */
+#include "../arm64/signal/testcases/testcases.h"
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 288
+#endif
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 289
+# define SYS_pkey_free 290
+#endif
+#define MCONTEXT_IP(mc) mc.pc
+#define MCONTEXT_TRAPNO(mc) -1
+
+#define PKEY_MASK 0xf
+
+#define POE_NONE 0x0
+#define POE_X 0x2
+#define POE_RX 0x3
+#define POE_RWX 0x7
+
+#define NR_PKEYS 8
+#define NR_RESERVED_PKEYS 1 /* pkey-0 */
+
+#define PKEY_ALLOW_ALL 0x77777777
+
+#define PKEY_BITS_PER_PKEY 4
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+#undef HPAGE_SIZE
+#define HPAGE_SIZE default_huge_page_size()
+
+/* 4-byte instructions * 16384 = 64K page */
+#define __page_o_noops() asm(".rept 16384 ; nop; .endr")
+
+static inline u64 __read_pkey_reg(void)
+{
+ u64 pkey_reg = 0;
+
+ // POR_EL0
+ asm volatile("mrs %0, S3_3_c10_c2_4" : "=r" (pkey_reg));
+
+ return pkey_reg;
+}
+
+static inline void __write_pkey_reg(u64 pkey_reg)
+{
+ u64 por = pkey_reg;
+
+ dprintf4("%s() changing %016llx to %016llx\n",
+ __func__, __read_pkey_reg(), pkey_reg);
+
+ // POR_EL0
+ asm volatile("msr S3_3_c10_c2_4, %0\nisb" :: "r" (por) :);
+
+ dprintf4("%s() pkey register after changing %016llx to %016llx\n",
+ __func__, __read_pkey_reg(), pkey_reg);
+}
+
+static inline int cpu_has_pkeys(void)
+{
+ /* No simple way to determine this */
+ return 1;
+}
+
+static inline u32 pkey_bit_position(int pkey)
+{
+ return pkey * PKEY_BITS_PER_PKEY;
+}
+
+static inline int get_arch_reserved_keys(void)
+{
+ return NR_RESERVED_PKEYS;
+}
+
+void expect_fault_on_read_execonly_key(void *p1, int pkey)
+{
+}
+
+void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
+{
+ return PTR_ERR_ENOTSUP;
+}
+
+#define set_pkey_bits set_pkey_bits
+static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
+{
+ u32 shift = pkey_bit_position(pkey);
+ u64 new_val = POE_RWX;
+
+ /* mask out bits from pkey in old value */
+ reg &= ~((u64)PKEY_MASK << shift);
+
+ if (flags & PKEY_DISABLE_ACCESS)
+ new_val = POE_X;
+ else if (flags & PKEY_DISABLE_WRITE)
+ new_val = POE_RX;
+
+ /* OR in new bits for pkey */
+ reg |= new_val << shift;
+
+ return reg;
+}
+
+#define get_pkey_bits get_pkey_bits
+static inline u64 get_pkey_bits(u64 reg, int pkey)
+{
+ u32 shift = pkey_bit_position(pkey);
+ /*
+ * shift down the relevant bits to the lowest four, then
+ * mask off all the other higher bits
+ */
+ u32 perm = (reg >> shift) & PKEY_MASK;
+
+ if (perm == POE_X)
+ return PKEY_DISABLE_ACCESS;
+ if (perm == POE_RX)
+ return PKEY_DISABLE_WRITE;
+ return 0;
+}
+
+static void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey)
+{
+ struct _aarch64_ctx *ctx = GET_UC_RESV_HEAD(uctxt);
+ struct poe_context *poe_ctx =
+ (struct poe_context *) get_header(ctx, POE_MAGIC,
+ sizeof(uctxt->uc_mcontext), NULL);
+ if (poe_ctx)
+ poe_ctx->por_el0 = pkey;
+}
+
+#endif /* _PKEYS_ARM64_H */
diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h
index 1af3156a9db8..9ab6a3ee153b 100644
--- a/tools/testing/selftests/mm/pkey-helpers.h
+++ b/tools/testing/selftests/mm/pkey-helpers.h
@@ -79,7 +79,18 @@ extern void abort_hooks(void);
} \
} while (0)
-__attribute__((noinline)) int read_ptr(int *ptr);
+#define barrier() __asm__ __volatile__("": : :"memory")
+#ifndef noinline
+# define noinline __attribute__((noinline))
+#endif
+
+noinline int read_ptr(int *ptr)
+{
+ /* Keep GCC from optimizing this away somehow */
+ barrier();
+ return *ptr;
+}
+
void expected_pkey_fault(int pkey);
int sys_pkey_alloc(unsigned long flags, unsigned long init_val);
int sys_pkey_free(unsigned long pkey);
@@ -91,12 +102,17 @@ void record_pkey_malloc(void *ptr, long size, int prot);
#include "pkey-x86.h"
#elif defined(__powerpc64__) /* arch */
#include "pkey-powerpc.h"
+#elif defined(__aarch64__) /* arch */
+#include "pkey-arm64.h"
#else /* arch */
#error Architecture not supported
#endif /* arch */
+#ifndef PKEY_MASK
#define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)
+#endif
+#ifndef set_pkey_bits
static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
{
u32 shift = pkey_bit_position(pkey);
@@ -106,7 +122,9 @@ static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
reg |= (flags & PKEY_MASK) << shift;
return reg;
}
+#endif
+#ifndef get_pkey_bits
static inline u64 get_pkey_bits(u64 reg, int pkey)
{
u32 shift = pkey_bit_position(pkey);
@@ -116,6 +134,7 @@ static inline u64 get_pkey_bits(u64 reg, int pkey)
*/
return ((reg >> shift) & PKEY_MASK);
}
+#endif
extern u64 shadow_pkey_reg;
diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h
index ae5df26104e5..3d0c0bdae5bc 100644
--- a/tools/testing/selftests/mm/pkey-powerpc.h
+++ b/tools/testing/selftests/mm/pkey-powerpc.h
@@ -8,7 +8,10 @@
# define SYS_pkey_free 385
#endif
#define REG_IP_IDX PT_NIP
+#define MCONTEXT_IP(mc) mc.gp_regs[REG_IP_IDX]
+#define MCONTEXT_TRAPNO(mc) mc.gp_regs[REG_TRAPNO]
#define REG_TRAPNO PT_TRAP
+#define MCONTEXT_FPREGS
#define gregs gp_regs
#define fpregs fp_regs
#define si_pkey_offset 0x20
diff --git a/tools/testing/selftests/mm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h
index 814758e109c0..5f28e26a2511 100644
--- a/tools/testing/selftests/mm/pkey-x86.h
+++ b/tools/testing/selftests/mm/pkey-x86.h
@@ -15,6 +15,10 @@
#endif
+#define MCONTEXT_IP(mc) mc.gregs[REG_IP_IDX]
+#define MCONTEXT_TRAPNO(mc) mc.gregs[REG_TRAPNO]
+#define MCONTEXT_FPREGS
+
#ifndef PKEY_DISABLE_ACCESS
# define PKEY_DISABLE_ACCESS 0x1
#endif
diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c
new file mode 100644
index 000000000000..a8088b645ad6
--- /dev/null
+++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests Memory Protection Keys (see Documentation/core-api/protection-keys.rst)
+ *
+ * The testcases in this file exercise various flows related to signal handling,
+ * using an alternate signal stack, with the default pkey (pkey 0) disabled.
+ *
+ * Compile with:
+ * gcc -mxsave -o pkey_sighandler_tests -O2 -g -std=gnu99 -pthread -Wall pkey_sighandler_tests.c -I../../../../tools/include -lrt -ldl -lm
+ * gcc -mxsave -m32 -o pkey_sighandler_tests -O2 -g -std=gnu99 -pthread -Wall pkey_sighandler_tests.c -I../../../../tools/include -lrt -ldl -lm
+ */
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
+#include <errno.h>
+#include <sys/syscall.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <limits.h>
+
+#include "pkey-helpers.h"
+
+#define STACK_SIZE PTHREAD_STACK_MIN
+
+void expected_pkey_fault(int pkey) {}
+
+pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+siginfo_t siginfo = {0};
+
+/*
+ * We need to use inline assembly instead of glibc's syscall because glibc's
+ * syscall will attempt to access the PLT in order to call a library function
+ * which is protected by MPK 0 which we don't have access to.
+ */
+static inline __always_inline
+long syscall_raw(long n, long a1, long a2, long a3, long a4, long a5, long a6)
+{
+ unsigned long ret;
+#ifdef __x86_64__
+ register long r10 asm("r10") = a4;
+ register long r8 asm("r8") = a5;
+ register long r9 asm("r9") = a6;
+ asm volatile ("syscall"
+ : "=a"(ret)
+ : "a"(n), "D"(a1), "S"(a2), "d"(a3), "r"(r10), "r"(r8), "r"(r9)
+ : "rcx", "r11", "memory");
+#elif defined __i386__
+ asm volatile ("int $0x80"
+ : "=a"(ret)
+ : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5)
+ : "memory");
+#else
+# error syscall_raw() not implemented
+#endif
+ return ret;
+}
+
+static void sigsegv_handler(int signo, siginfo_t *info, void *ucontext)
+{
+ pthread_mutex_lock(&mutex);
+
+ memcpy(&siginfo, info, sizeof(siginfo_t));
+
+ pthread_cond_signal(&cond);
+ pthread_mutex_unlock(&mutex);
+
+ syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0);
+}
+
+static void sigusr1_handler(int signo, siginfo_t *info, void *ucontext)
+{
+ pthread_mutex_lock(&mutex);
+
+ memcpy(&siginfo, info, sizeof(siginfo_t));
+
+ pthread_cond_signal(&cond);
+ pthread_mutex_unlock(&mutex);
+}
+
+static void sigusr2_handler(int signo, siginfo_t *info, void *ucontext)
+{
+ /*
+ * pkru should be the init_pkru value which enabled MPK 0 so
+ * we can use library functions.
+ */
+ printf("%s invoked.\n", __func__);
+}
+
+static void raise_sigusr2(void)
+{
+ pid_t tid = 0;
+
+ tid = syscall_raw(SYS_gettid, 0, 0, 0, 0, 0, 0);
+
+ syscall_raw(SYS_tkill, tid, SIGUSR2, 0, 0, 0, 0);
+
+ /*
+ * We should return from the signal handler here and be able to
+ * return to the interrupted thread.
+ */
+}
+
+static void *thread_segv_with_pkey0_disabled(void *ptr)
+{
+ /* Disable MPK 0 (and all others too) */
+ __write_pkey_reg(0x55555555);
+
+ /* Segfault (with SEGV_MAPERR) */
+ *(int *) (0x1) = 1;
+ return NULL;
+}
+
+static void *thread_segv_pkuerr_stack(void *ptr)
+{
+ /* Disable MPK 0 (and all others too) */
+ __write_pkey_reg(0x55555555);
+
+ /* After we disable MPK 0, we can't access the stack to return */
+ return NULL;
+}
+
+static void *thread_segv_maperr_ptr(void *ptr)
+{
+ stack_t *stack = ptr;
+ int *bad = (int *)1;
+
+ /*
+ * Setup alternate signal stack, which should be pkey_mprotect()ed by
+ * MPK 0. The thread's stack cannot be used for signals because it is
+ * not accessible by the default init_pkru value of 0x55555554.
+ */
+ syscall_raw(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0);
+
+ /* Disable MPK 0. Only MPK 1 is enabled. */
+ __write_pkey_reg(0x55555551);
+
+ /* Segfault */
+ *bad = 1;
+ syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0);
+ return NULL;
+}
+
+/*
+ * Verify that the sigsegv handler is invoked when pkey 0 is disabled.
+ * Note that the new thread stack and the alternate signal stack is
+ * protected by MPK 0.
+ */
+static void test_sigsegv_handler_with_pkey0_disabled(void)
+{
+ struct sigaction sa;
+ pthread_attr_t attr;
+ pthread_t thr;
+
+ sa.sa_flags = SA_SIGINFO;
+
+ sa.sa_sigaction = sigsegv_handler;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+
+ memset(&siginfo, 0, sizeof(siginfo));
+
+ pthread_attr_init(&attr);
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+ pthread_create(&thr, &attr, thread_segv_with_pkey0_disabled, NULL);
+
+ pthread_mutex_lock(&mutex);
+ while (siginfo.si_signo == 0)
+ pthread_cond_wait(&cond, &mutex);
+ pthread_mutex_unlock(&mutex);
+
+ ksft_test_result(siginfo.si_signo == SIGSEGV &&
+ siginfo.si_code == SEGV_MAPERR &&
+ siginfo.si_addr == (void *)1,
+ "%s\n", __func__);
+}
+
+/*
+ * Verify that the sigsegv handler is invoked when pkey 0 is disabled.
+ * Note that the new thread stack and the alternate signal stack is
+ * protected by MPK 0, which renders them inaccessible when MPK 0
+ * is disabled. So just the return from the thread should cause a
+ * segfault with SEGV_PKUERR.
+ */
+static void test_sigsegv_handler_cannot_access_stack(void)
+{
+ struct sigaction sa;
+ pthread_attr_t attr;
+ pthread_t thr;
+
+ sa.sa_flags = SA_SIGINFO;
+
+ sa.sa_sigaction = sigsegv_handler;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+
+ memset(&siginfo, 0, sizeof(siginfo));
+
+ pthread_attr_init(&attr);
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+ pthread_create(&thr, &attr, thread_segv_pkuerr_stack, NULL);
+
+ pthread_mutex_lock(&mutex);
+ while (siginfo.si_signo == 0)
+ pthread_cond_wait(&cond, &mutex);
+ pthread_mutex_unlock(&mutex);
+
+ ksft_test_result(siginfo.si_signo == SIGSEGV &&
+ siginfo.si_code == SEGV_PKUERR,
+ "%s\n", __func__);
+}
+
+/*
+ * Verify that the sigsegv handler that uses an alternate signal stack
+ * is correctly invoked for a thread which uses a non-zero MPK to protect
+ * its own stack, and disables all other MPKs (including 0).
+ */
+static void test_sigsegv_handler_with_different_pkey_for_stack(void)
+{
+ struct sigaction sa;
+ static stack_t sigstack;
+ void *stack;
+ int pkey;
+ int parent_pid = 0;
+ int child_pid = 0;
+
+ sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
+
+ sa.sa_sigaction = sigsegv_handler;
+
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+
+ stack = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ assert(stack != MAP_FAILED);
+
+ /* Allow access to MPK 0 and MPK 1 */
+ __write_pkey_reg(0x55555550);
+
+ /* Protect the new stack with MPK 1 */
+ pkey = pkey_alloc(0, 0);
+ pkey_mprotect(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey);
+
+ /* Set up alternate signal stack that will use the default MPK */
+ sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ sigstack.ss_flags = 0;
+ sigstack.ss_size = STACK_SIZE;
+
+ memset(&siginfo, 0, sizeof(siginfo));
+
+ /* Use clone to avoid newer glibcs using rseq on new threads */
+ long ret = syscall_raw(SYS_clone,
+ CLONE_VM | CLONE_FS | CLONE_FILES |
+ CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
+ CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID |
+ CLONE_DETACHED,
+ (long) ((char *)(stack) + STACK_SIZE),
+ (long) &parent_pid,
+ (long) &child_pid, 0, 0);
+
+ if (ret < 0) {
+ errno = -ret;
+ perror("clone");
+ } else if (ret == 0) {
+ thread_segv_maperr_ptr(&sigstack);
+ syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0);
+ }
+
+ pthread_mutex_lock(&mutex);
+ while (siginfo.si_signo == 0)
+ pthread_cond_wait(&cond, &mutex);
+ pthread_mutex_unlock(&mutex);
+
+ ksft_test_result(siginfo.si_signo == SIGSEGV &&
+ siginfo.si_code == SEGV_MAPERR &&
+ siginfo.si_addr == (void *)1,
+ "%s\n", __func__);
+}
+
+/*
+ * Verify that the PKRU value set by the application is correctly
+ * restored upon return from signal handling.
+ */
+static void test_pkru_preserved_after_sigusr1(void)
+{
+ struct sigaction sa;
+ unsigned long pkru = 0x45454544;
+
+ sa.sa_flags = SA_SIGINFO;
+
+ sa.sa_sigaction = sigusr1_handler;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGUSR1, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+
+ memset(&siginfo, 0, sizeof(siginfo));
+
+ __write_pkey_reg(pkru);
+
+ raise(SIGUSR1);
+
+ pthread_mutex_lock(&mutex);
+ while (siginfo.si_signo == 0)
+ pthread_cond_wait(&cond, &mutex);
+ pthread_mutex_unlock(&mutex);
+
+ /* Ensure the pkru value is the same after returning from signal. */
+ ksft_test_result(pkru == __read_pkey_reg() &&
+ siginfo.si_signo == SIGUSR1,
+ "%s\n", __func__);
+}
+
+static noinline void *thread_sigusr2_self(void *ptr)
+{
+ /*
+ * A const char array like "Resuming after SIGUSR2" won't be stored on
+ * the stack and the code could access it via an offset from the program
+ * counter. This makes sure it's on the function's stack frame.
+ */
+ char str[] = {'R', 'e', 's', 'u', 'm', 'i', 'n', 'g', ' ',
+ 'a', 'f', 't', 'e', 'r', ' ',
+ 'S', 'I', 'G', 'U', 'S', 'R', '2',
+ '.', '.', '.', '\n', '\0'};
+ stack_t *stack = ptr;
+
+ /*
+ * Setup alternate signal stack, which should be pkey_mprotect()ed by
+ * MPK 0. The thread's stack cannot be used for signals because it is
+ * not accessible by the default init_pkru value of 0x55555554.
+ */
+ syscall(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0);
+
+ /* Disable MPK 0. Only MPK 2 is enabled. */
+ __write_pkey_reg(0x55555545);
+
+ raise_sigusr2();
+
+ /* Do something, to show the thread resumed execution after the signal */
+ syscall_raw(SYS_write, 1, (long) str, sizeof(str) - 1, 0, 0, 0);
+
+ /*
+ * We can't return to test_pkru_sigreturn because it
+ * will attempt to use a %rbp value which is on the stack
+ * of the main thread.
+ */
+ syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0);
+ return NULL;
+}
+
+/*
+ * Verify that sigreturn is able to restore altstack even if the thread had
+ * disabled pkey 0.
+ */
+static void test_pkru_sigreturn(void)
+{
+ struct sigaction sa = {0};
+ static stack_t sigstack;
+ void *stack;
+ int pkey;
+ int parent_pid = 0;
+ int child_pid = 0;
+
+ sa.sa_handler = SIG_DFL;
+ sa.sa_flags = 0;
+ sigemptyset(&sa.sa_mask);
+
+ /*
+ * For this testcase, we do not want to handle SIGSEGV. Reset handler
+ * to default so that the application can crash if it receives SIGSEGV.
+ */
+ if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+
+ sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
+ sa.sa_sigaction = sigusr2_handler;
+ sigemptyset(&sa.sa_mask);
+
+ if (sigaction(SIGUSR2, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+
+ stack = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ assert(stack != MAP_FAILED);
+
+ /*
+ * Allow access to MPK 0 and MPK 2. The child thread (to be created
+ * later in this flow) will have its stack protected by MPK 2, whereas
+ * the current thread's stack is protected by the default MPK 0. Hence
+ * both need to be enabled.
+ */
+ __write_pkey_reg(0x55555544);
+
+ /* Protect the stack with MPK 2 */
+ pkey = pkey_alloc(0, 0);
+ pkey_mprotect(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey);
+
+ /* Set up alternate signal stack that will use the default MPK */
+ sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ sigstack.ss_flags = 0;
+ sigstack.ss_size = STACK_SIZE;
+
+ /* Use clone to avoid newer glibcs using rseq on new threads */
+ long ret = syscall_raw(SYS_clone,
+ CLONE_VM | CLONE_FS | CLONE_FILES |
+ CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
+ CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID |
+ CLONE_DETACHED,
+ (long) ((char *)(stack) + STACK_SIZE),
+ (long) &parent_pid,
+ (long) &child_pid, 0, 0);
+
+ if (ret < 0) {
+ errno = -ret;
+ perror("clone");
+ } else if (ret == 0) {
+ thread_sigusr2_self(&sigstack);
+ syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0);
+ }
+
+ child_pid = ret;
+ /* Check that thread exited */
+ do {
+ sched_yield();
+ ret = syscall_raw(SYS_tkill, child_pid, 0, 0, 0, 0, 0);
+ } while (ret != -ESRCH && ret != -EINVAL);
+
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+static void (*pkey_tests[])(void) = {
+ test_sigsegv_handler_with_pkey0_disabled,
+ test_sigsegv_handler_cannot_access_stack,
+ test_sigsegv_handler_with_different_pkey_for_stack,
+ test_pkru_preserved_after_sigusr1,
+ test_pkru_sigreturn
+};
+
+int main(int argc, char *argv[])
+{
+ int i;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(pkey_tests));
+
+ for (i = 0; i < ARRAY_SIZE(pkey_tests); i++)
+ (*pkey_tests[i])();
+
+ ksft_finished();
+ return 0;
+}
diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c
index eaa6d1fc5328..4990f7ab4cb7 100644
--- a/tools/testing/selftests/mm/protection_keys.c
+++ b/tools/testing/selftests/mm/protection_keys.c
@@ -147,7 +147,7 @@ void abort_hooks(void)
* will then fault, which makes sure that the fault code handles
* execute-only memory properly.
*/
-#ifdef __powerpc64__
+#if defined(__powerpc64__) || defined(__aarch64__)
/* This way, both 4K and 64K alignment are maintained */
__attribute__((__aligned__(65536)))
#else
@@ -212,7 +212,6 @@ void pkey_disable_set(int pkey, int flags)
unsigned long syscall_flags = 0;
int ret;
int pkey_rights;
- u64 orig_pkey_reg = read_pkey_reg();
dprintf1("START->%s(%d, 0x%x)\n", __func__,
pkey, flags);
@@ -242,8 +241,6 @@ void pkey_disable_set(int pkey, int flags)
dprintf1("%s(%d) pkey_reg: 0x%016llx\n",
__func__, pkey, read_pkey_reg());
- if (flags)
- pkey_assert(read_pkey_reg() >= orig_pkey_reg);
dprintf1("END<---%s(%d, 0x%x)\n", __func__,
pkey, flags);
}
@@ -253,7 +250,6 @@ void pkey_disable_clear(int pkey, int flags)
unsigned long syscall_flags = 0;
int ret;
int pkey_rights = hw_pkey_get(pkey, syscall_flags);
- u64 orig_pkey_reg = read_pkey_reg();
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
@@ -273,8 +269,6 @@ void pkey_disable_clear(int pkey, int flags)
dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__,
pkey, read_pkey_reg());
- if (flags)
- assert(read_pkey_reg() <= orig_pkey_reg);
}
void pkey_write_allow(int pkey)
@@ -314,7 +308,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
ucontext_t *uctxt = vucontext;
int trapno;
unsigned long ip;
+#ifdef MCONTEXT_FPREGS
char *fpregs;
+#endif
#if defined(__i386__) || defined(__x86_64__) /* arch */
u32 *pkey_reg_ptr;
int pkey_reg_offset;
@@ -328,9 +324,11 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
__func__, __LINE__,
__read_pkey_reg(), shadow_pkey_reg);
- trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
- ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
+ trapno = MCONTEXT_TRAPNO(uctxt->uc_mcontext);
+ ip = MCONTEXT_IP(uctxt->uc_mcontext);
+#ifdef MCONTEXT_FPREGS
fpregs = (char *) uctxt->uc_mcontext.fpregs;
+#endif
dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n",
__func__, trapno, ip, si_code_str(si->si_code),
@@ -359,7 +357,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
#endif /* arch */
dprintf1("siginfo: %p\n", si);
+#ifdef MCONTEXT_FPREGS
dprintf1(" fpregs: %p\n", fpregs);
+#endif
if ((si->si_code == SEGV_MAPERR) ||
(si->si_code == SEGV_ACCERR) ||
@@ -389,6 +389,8 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
#elif defined(__powerpc64__) /* arch */
/* restore access and let the faulting instruction continue */
pkey_access_allow(siginfo_pkey);
+#elif defined(__aarch64__)
+ aarch64_write_signal_pkey(uctxt, PKEY_ALLOW_ALL);
#endif /* arch */
pkey_faults++;
dprintf1("<<<<==================================================\n");
@@ -902,7 +904,9 @@ void expected_pkey_fault(int pkey)
* test program continue. We now have to restore it.
*/
if (__read_pkey_reg() != 0)
-#else /* arch */
+#elif defined(__aarch64__)
+ if (__read_pkey_reg() != PKEY_ALLOW_ALL)
+#else
if (__read_pkey_reg() != shadow_pkey_reg)
#endif /* arch */
pkey_assert(0);
@@ -950,16 +954,6 @@ void close_test_fds(void)
nr_test_fds = 0;
}
-#define barrier() __asm__ __volatile__("": : :"memory")
-__attribute__((noinline)) int read_ptr(int *ptr)
-{
- /*
- * Keep GCC from optimizing this away somehow
- */
- barrier();
- return *ptr;
-}
-
void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey)
{
int i, err;
@@ -1492,6 +1486,11 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
lots_o_noops_around_write(&scratch);
do_not_expect_pkey_fault("executing on PROT_EXEC memory");
expect_fault_on_read_execonly_key(p1, pkey);
+
+ // Reset back to PROT_EXEC | PROT_READ for architectures that support
+ // non-PKEY execute-only permissions.
+ ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC | PROT_READ, (u64)pkey);
+ pkey_assert(!ret);
}
void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
@@ -1665,6 +1664,84 @@ void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
}
#endif
+#if defined(__aarch64__)
+void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
+{
+ pid_t child;
+ int status, ret;
+ struct iovec iov;
+ u64 trace_pkey;
+ /* Just a random pkey value.. */
+ u64 new_pkey = (POE_X << PKEY_BITS_PER_PKEY * 2) |
+ (POE_NONE << PKEY_BITS_PER_PKEY) |
+ POE_RWX;
+
+ child = fork();
+ pkey_assert(child >= 0);
+ dprintf3("[%d] fork() ret: %d\n", getpid(), child);
+ if (!child) {
+ ptrace(PTRACE_TRACEME, 0, 0, 0);
+
+ /* Stop and allow the tracer to modify PKRU directly */
+ raise(SIGSTOP);
+
+ /*
+ * need __read_pkey_reg() version so we do not do shadow_pkey_reg
+ * checking
+ */
+ if (__read_pkey_reg() != new_pkey)
+ exit(1);
+
+ raise(SIGSTOP);
+
+ exit(0);
+ }
+
+ pkey_assert(child == waitpid(child, &status, 0));
+ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+ pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+ iov.iov_base = &trace_pkey;
+ iov.iov_len = 8;
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
+ pkey_assert(ret == 0);
+ pkey_assert(trace_pkey == read_pkey_reg());
+
+ trace_pkey = new_pkey;
+
+ ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_ARM_POE, &iov);
+ pkey_assert(ret == 0);
+
+ /* Test that the modification is visible in ptrace before any execution */
+ memset(&trace_pkey, 0, sizeof(trace_pkey));
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
+ pkey_assert(ret == 0);
+ pkey_assert(trace_pkey == new_pkey);
+
+ /* Execute the tracee */
+ ret = ptrace(PTRACE_CONT, child, 0, 0);
+ pkey_assert(ret == 0);
+
+ /* Test that the tracee saw the PKRU value change */
+ pkey_assert(child == waitpid(child, &status, 0));
+ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+ pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+ /* Test that the modification is visible in ptrace after execution */
+ memset(&trace_pkey, 0, sizeof(trace_pkey));
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
+ pkey_assert(ret == 0);
+ pkey_assert(trace_pkey == new_pkey);
+
+ ret = ptrace(PTRACE_CONT, child, 0, 0);
+ pkey_assert(ret == 0);
+ pkey_assert(child == waitpid(child, &status, 0));
+ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+ pkey_assert(WIFEXITED(status));
+ pkey_assert(WEXITSTATUS(status) == 0);
+}
+#endif
+
void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
{
int size = PAGE_SIZE;
@@ -1700,7 +1777,7 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
test_pkey_syscalls_bad_args,
test_pkey_alloc_exhaust,
test_pkey_alloc_free_attach_pkey0,
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
test_ptrace_modifies_pkru,
#endif
};
diff --git a/tools/testing/selftests/mm/seal_elf.c b/tools/testing/selftests/mm/seal_elf.c
index 7aa1366063e4..d9f8ba8d5050 100644
--- a/tools/testing/selftests/mm/seal_elf.c
+++ b/tools/testing/selftests/mm/seal_elf.c
@@ -30,17 +30,6 @@ static int sys_mseal(void *start, size_t len)
return sret;
}
-static void *sys_mmap(void *addr, unsigned long len, unsigned long prot,
- unsigned long flags, unsigned long fd, unsigned long offset)
-{
- void *sret;
-
- errno = 0;
- sret = (void *) syscall(__NR_mmap, addr, len, prot,
- flags, fd, offset);
- return sret;
-}
-
static inline int sys_mprotect(void *ptr, size_t size, unsigned long prot)
{
int sret;
@@ -56,7 +45,7 @@ static bool seal_support(void)
void *ptr;
unsigned long page_size = getpagesize();
- ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (ptr == (void *) -1)
return false;
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 666ab7d9390b..1c04c780db66 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -17,6 +17,7 @@ ipv6_flowlabel
ipv6_flowlabel_mgr
log.txt
msg_zerocopy
+ncdevmem
nettest
psock_fanout
psock_snd
@@ -34,6 +35,7 @@ scm_pidfd
scm_rights
sk_bind_sendto_listen
sk_connect_zero_addr
+sk_so_peek_off
socket
so_incoming_cpu
so_netns_cookie
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 8eaffd7a641c..649f1fe0dc46 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -56,7 +56,7 @@ TEST_PROGS += ip_local_port_range.sh
TEST_PROGS += rps_default_mask.sh
TEST_PROGS += big_tcp.sh
TEST_PROGS += netns-sysctl.sh
-TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh
+TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
@@ -80,12 +80,14 @@ TEST_PROGS += io_uring_zerocopy_tx.sh
TEST_GEN_FILES += bind_bhash
TEST_GEN_PROGS += sk_bind_sendto_listen
TEST_GEN_PROGS += sk_connect_zero_addr
+TEST_GEN_PROGS += sk_so_peek_off
TEST_PROGS += test_ingress_egress_chaining.sh
TEST_GEN_PROGS += so_incoming_cpu
TEST_PROGS += sctp_vrf.sh
TEST_GEN_FILES += sctp_hello
TEST_GEN_FILES += ip_local_port_range
-TEST_GEN_FILES += bind_wildcard
+TEST_GEN_PROGS += bind_wildcard
+TEST_GEN_PROGS += bind_timewait
TEST_PROGS += test_vxlan_mdb.sh
TEST_PROGS += test_bridge_neigh_suppress.sh
TEST_PROGS += test_vxlan_nolocalbypass.sh
@@ -95,6 +97,11 @@ TEST_PROGS += fq_band_pktlimit.sh
TEST_PROGS += vlan_hw_filter.sh
TEST_PROGS += bpf_offload.py
+# YNL files, must be before "include ..lib.mk"
+EXTRA_CLEAN += $(OUTPUT)/libynl.a
+YNL_GEN_FILES := ncdevmem
+TEST_GEN_FILES += $(YNL_GEN_FILES)
+
TEST_FILES := settings
TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
@@ -104,6 +111,10 @@ TEST_INCLUDES := forwarding/lib.sh
include ../lib.mk
+# YNL build
+YNL_GENS := netdev
+include ynl.mk
+
$(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap
$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto
diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c
index 535eb2c3d7d1..3ed3882a93b8 100644
--- a/tools/testing/selftests/net/af_unix/msg_oob.c
+++ b/tools/testing/selftests/net/af_unix/msg_oob.c
@@ -525,6 +525,29 @@ TEST_F(msg_oob, ex_oob_drop_2)
}
}
+TEST_F(msg_oob, ex_oob_oob)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("", -EAGAIN, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
TEST_F(msg_oob, ex_oob_ahead_break)
{
sendpair("hello", 5, MSG_OOB);
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 386ebd829df5..899dbad0104b 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -4304,14 +4304,7 @@ elif [ "$TESTS" = "ipv6" ]; then
TESTS="$TESTS_IPV6"
fi
-# nettest can be run from PATH or from same directory as this selftest
-if ! which nettest >/dev/null; then
- PATH=$PWD:$PATH
- if ! which nettest >/dev/null; then
- echo "'nettest' command not found; skipping tests"
- exit $ksft_skip
- fi
-fi
+check_gen_prog "nettest"
declare -i nfail=0
declare -i nsuccess=0
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index ac0b2c6a5761..77c83d9508d3 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -78,7 +78,12 @@ log_test()
else
ret=1
nfail=$((nfail+1))
- printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [[ $rc -eq $ksft_skip ]]; then
+ printf "TEST: %-60s [SKIP]\n" "${msg}"
+ else
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ fi
+
if [ "$VERBOSE" = "1" ]; then
echo " rc=$rc, expected $expected"
fi
@@ -923,6 +928,29 @@ ipv6_grp_fcnal()
ipv6_grp_refs
log_test $? 0 "Nexthop group replace refcounts"
+
+ #
+ # 16-bit weights.
+ #
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+ run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1"
+ run_cmd "$IP nexthop add id 66 dev veth1"
+
+ run_cmd "$IP nexthop add id 103 group 62,1000"
+ if [[ $? == 0 ]]; then
+ local GRP="id 103 group 62,254/63,255/64,256/65,257/66,65535"
+ run_cmd "$IP nexthop replace $GRP"
+ check_nexthop "id 103" "$GRP"
+ rc=$?
+ else
+ rc=$ksft_skip
+ fi
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ log_test $rc 0 "16-bit weights"
}
ipv6_res_grp_fcnal()
@@ -987,6 +1015,31 @@ ipv6_res_grp_fcnal()
check_nexthop_bucket "list id 102" \
"id 102 index 0 nhid 63 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62"
log_test $? 0 "Nexthop buckets updated after replace - nECMP"
+
+ #
+ # 16-bit weights.
+ #
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+ run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1"
+ run_cmd "$IP nexthop add id 66 dev veth1"
+
+ run_cmd "$IP nexthop add id 103 group 62,1000 type resilient buckets 32"
+ if [[ $? == 0 ]]; then
+ local GRP="id 103 group 62,254/63,255/64,256/65,257/66,65535 $(:
+ )type resilient buckets 32 idle_timer 0 $(:
+ )unbalanced_timer 0"
+ run_cmd "$IP nexthop replace $GRP"
+ check_nexthop "id 103" "$GRP unbalanced_time 0"
+ rc=$?
+ else
+ rc=$ksft_skip
+ fi
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ log_test $rc 0 "16-bit weights"
}
ipv6_fcnal_runtime()
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 7c01f58a20de..1d58b3b87465 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -35,18 +35,13 @@ log_test()
local expected=$2
local msg="$3"
- $IP rule show | grep -q l3mdev
- if [ $? -eq 0 ]; then
- msg="$msg (VRF)"
- fi
-
if [ ${rc} -eq ${expected} ]; then
nsuccess=$((nsuccess+1))
- printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ printf " TEST: %-60s [ OK ]\n" "${msg}"
else
ret=1
nfail=$((nfail+1))
- printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ printf " TEST: %-60s [FAIL]\n" "${msg}"
if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
echo
echo "hit enter to continue, 'q' to quit"
@@ -56,39 +51,6 @@ log_test()
fi
}
-log_section()
-{
- echo
- echo "######################################################################"
- echo "TEST SECTION: $*"
- echo "######################################################################"
-}
-
-check_nettest()
-{
- if which nettest > /dev/null 2>&1; then
- return 0
- fi
-
- # Add the selftest directory to PATH if not already done
- if [ "${SELFTEST_PATH}" = "" ]; then
- SELFTEST_PATH="$(dirname $0)"
- PATH="${PATH}:${SELFTEST_PATH}"
-
- # Now retry with the new path
- if which nettest > /dev/null 2>&1; then
- return 0
- fi
-
- if [ "${ret}" -eq 0 ]; then
- ret="${ksft_skip}"
- fi
- echo "nettest not found (try 'make -C ${SELFTEST_PATH} nettest')"
- fi
-
- return 1
-}
-
setup()
{
set -e
@@ -187,12 +149,17 @@ fib_rule6_test_match_n_redirect()
{
local match="$1"
local getmatch="$2"
- local description="$3"
+ local getnomatch="$3"
+ local description="$4"
+ local nomatch_description="$5"
$IP -6 rule add $match table $RTABLE
$IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
log_test $? 0 "rule6 check: $description"
+ $IP -6 route get $GW_IP6 $getnomatch 2>&1 | grep -q "table $RTABLE"
+ log_test $? 1 "rule6 check: $nomatch_description"
+
fib_rule6_del_by_pref "$match"
log_test $? 0 "rule6 del by pref: $description"
}
@@ -213,18 +180,27 @@ fib_rule6_test_reject()
fib_rule6_test()
{
+ local ext_name=$1; shift
+ local getnomatch
local getmatch
local match
local cnt
+ echo
+ echo "IPv6 FIB rule tests $ext_name"
+
# setup the fib rule redirect route
$IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
match="oif $DEV"
- fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table"
+ getnomatch="oif lo"
+ fib_rule6_test_match_n_redirect "$match" "$match" "$getnomatch" \
+ "oif redirect to table" "oif no redirect to table"
match="from $SRC_IP6 iif $DEV"
- fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
+ getnomatch="from $SRC_IP6 iif lo"
+ fib_rule6_test_match_n_redirect "$match" "$match" "$getnomatch" \
+ "iif redirect to table" "iif no redirect to table"
# Reject dsfield (tos) options which have ECN bits set
for cnt in $(seq 1 3); do
@@ -238,44 +214,89 @@ fib_rule6_test()
# Using option 'tos' instead of 'dsfield' as old iproute2
# versions don't support 'dsfield' in ip rule show.
getmatch="tos $cnt"
+ getnomatch="tos 0x20"
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
- "$getmatch redirect to table"
+ "$getnomatch" "$getmatch redirect to table" \
+ "$getnomatch no redirect to table"
+ done
+
+ # Re-test TOS matching, but with input routes since they are handled
+ # differently from output routes.
+ match="tos 0x10"
+ for cnt in "0x10" "0x11" "0x12" "0x13"; do
+ getmatch="tos $cnt"
+ getnomatch="tos 0x20"
+ fib_rule6_test_match_n_redirect "$match" \
+ "from $SRC_IP6 iif $DEV $getmatch" \
+ "from $SRC_IP6 iif $DEV $getnomatch" \
+ "iif $getmatch redirect to table" \
+ "iif $getnomatch no redirect to table"
done
match="fwmark 0x64"
getmatch="mark 0x64"
- fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+ getnomatch="mark 0x63"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" "$getnomatch" \
+ "fwmark redirect to table" "fwmark no redirect to table"
fib_check_iproute_support "uidrange" "uid"
if [ $? -eq 0 ]; then
match="uidrange 100-100"
getmatch="uid 100"
- fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+ getnomatch="uid 101"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "uid redirect to table" \
+ "uid no redirect to table"
fi
fib_check_iproute_support "sport" "sport"
if [ $? -eq 0 ]; then
match="sport 666 dport 777"
- fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+ getnomatch="sport 667 dport 778"
+ fib_rule6_test_match_n_redirect "$match" "$match" \
+ "$getnomatch" "sport and dport redirect to table" \
+ "sport and dport no redirect to table"
fi
fib_check_iproute_support "ipproto" "ipproto"
if [ $? -eq 0 ]; then
match="ipproto tcp"
- fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match"
+ getnomatch="ipproto udp"
+ fib_rule6_test_match_n_redirect "$match" "$match" \
+ "$getnomatch" "ipproto tcp match" "ipproto udp no match"
fi
fib_check_iproute_support "ipproto" "ipproto"
if [ $? -eq 0 ]; then
match="ipproto ipv6-icmp"
- fib_rule6_test_match_n_redirect "$match" "$match" "ipproto ipv6-icmp match"
+ getnomatch="ipproto tcp"
+ fib_rule6_test_match_n_redirect "$match" "$match" \
+ "$getnomatch" "ipproto ipv6-icmp match" \
+ "ipproto ipv6-tcp no match"
+ fi
+
+ fib_check_iproute_support "dscp" "tos"
+ if [ $? -eq 0 ]; then
+ match="dscp 0x3f"
+ getmatch="tos 0xfc"
+ getnomatch="tos 0xf4"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "dscp redirect to table" \
+ "dscp no redirect to table"
+
+ match="dscp 0x3f"
+ getmatch="from $SRC_IP6 iif $DEV tos 0xfc"
+ getnomatch="from $SRC_IP6 iif $DEV tos 0xf4"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif dscp redirect to table" \
+ "iif dscp no redirect to table"
fi
}
fib_rule6_vrf_test()
{
setup_vrf
- fib_rule6_test
+ fib_rule6_test "- with VRF"
cleanup_vrf
}
@@ -285,10 +306,8 @@ fib_rule6_connect_test()
{
local dsfield
- if ! check_nettest; then
- echo "SKIP: Could not run test without nettest tool"
- return
- fi
+ echo
+ echo "IPv6 FIB rule connect tests"
setup_peer
$IP -6 rule add dsfield 0x04 table $RTABLE_PEER
@@ -306,7 +325,45 @@ fib_rule6_connect_test()
log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})"
done
+ # Check that UDP and TCP connections fail when using a DS Field that
+ # does not match the previously configured FIB rule.
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D \
+ -Q 0x20 -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 1 "rule6 dsfield udp no connect (dsfield 0x20)"
+
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0x20 \
+ -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 1 "rule6 dsfield tcp no connect (dsfield 0x20)"
+
$IP -6 rule del dsfield 0x04 table $RTABLE_PEER
+
+ ip rule help 2>&1 | grep -q dscp
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 iprule too old, missing dscp match"
+ cleanup_peer
+ return
+ fi
+
+ $IP -6 rule add dscp 0x3f table $RTABLE_PEER
+
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xfc \
+ -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 0 "rule6 dscp udp connect"
+
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xfc \
+ -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 0 "rule6 dscp tcp connect"
+
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xf4 \
+ -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 1 "rule6 dscp udp no connect"
+
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xf4 \
+ -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 1 "rule6 dscp tcp no connect"
+
+ $IP -6 rule del dscp 0x3f table $RTABLE_PEER
+
cleanup_peer
}
@@ -326,12 +383,17 @@ fib_rule4_test_match_n_redirect()
{
local match="$1"
local getmatch="$2"
- local description="$3"
+ local getnomatch="$3"
+ local description="$4"
+ local nomatch_description="$5"
$IP rule add $match table $RTABLE
$IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
log_test $? 0 "rule4 check: $description"
+ $IP route get $GW_IP4 $getnomatch 2>&1 | grep -q "table $RTABLE"
+ log_test $? 1 "rule4 check: $nomatch_description"
+
fib_rule4_del_by_pref "$match"
log_test $? 0 "rule4 del by pref: $description"
}
@@ -352,23 +414,31 @@ fib_rule4_test_reject()
fib_rule4_test()
{
+ local ext_name=$1; shift
+ local getnomatch
local getmatch
local match
local cnt
+ echo
+ echo "IPv4 FIB rule tests $ext_name"
+
# setup the fib rule redirect route
$IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
match="oif $DEV"
- fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table"
+ getnomatch="oif lo"
+ fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \
+ "oif redirect to table" "oif no redirect to table"
- # need enable forwarding and disable rp_filter temporarily as all the
- # addresses are in the same subnet and egress device == ingress device.
+ # Enable forwarding and disable rp_filter as all the addresses are in
+ # the same subnet and egress device == ingress device.
ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1
ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0
match="from $SRC_IP iif $DEV"
- fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
- ip netns exec $testns sysctl -qw net.ipv4.ip_forward=0
+ getnomatch="from $SRC_IP iif lo"
+ fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \
+ "iif redirect to table" "iif no redirect to table"
# Reject dsfield (tos) options which have ECN bits set
for cnt in $(seq 1 3); do
@@ -382,44 +452,90 @@ fib_rule4_test()
# Using option 'tos' instead of 'dsfield' as old iproute2
# versions don't support 'dsfield' in ip rule show.
getmatch="tos $cnt"
+ getnomatch="tos 0x20"
fib_rule4_test_match_n_redirect "$match" "$getmatch" \
- "$getmatch redirect to table"
+ "$getnomatch" "$getmatch redirect to table" \
+ "$getnomatch no redirect to table"
+ done
+
+ # Re-test TOS matching, but with input routes since they are handled
+ # differently from output routes.
+ match="tos 0x10"
+ for cnt in "0x10" "0x11" "0x12" "0x13"; do
+ getmatch="tos $cnt"
+ getnomatch="tos 0x20"
+ fib_rule4_test_match_n_redirect "$match" \
+ "from $SRC_IP iif $DEV $getmatch" \
+ "from $SRC_IP iif $DEV $getnomatch" \
+ "iif $getmatch redirect to table" \
+ "iif $getnomatch no redirect to table"
done
match="fwmark 0x64"
getmatch="mark 0x64"
- fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+ getnomatch="mark 0x63"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" "$getnomatch" \
+ "fwmark redirect to table" "fwmark no redirect to table"
fib_check_iproute_support "uidrange" "uid"
if [ $? -eq 0 ]; then
match="uidrange 100-100"
getmatch="uid 100"
- fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+ getnomatch="uid 101"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "uid redirect to table" \
+ "uid no redirect to table"
fi
fib_check_iproute_support "sport" "sport"
if [ $? -eq 0 ]; then
match="sport 666 dport 777"
- fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+ getnomatch="sport 667 dport 778"
+ fib_rule4_test_match_n_redirect "$match" "$match" \
+ "$getnomatch" "sport and dport redirect to table" \
+ "sport and dport no redirect to table"
fi
fib_check_iproute_support "ipproto" "ipproto"
if [ $? -eq 0 ]; then
match="ipproto tcp"
- fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match"
+ getnomatch="ipproto udp"
+ fib_rule4_test_match_n_redirect "$match" "$match" \
+ "$getnomatch" "ipproto tcp match" \
+ "ipproto udp no match"
fi
fib_check_iproute_support "ipproto" "ipproto"
if [ $? -eq 0 ]; then
match="ipproto icmp"
- fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+ getnomatch="ipproto tcp"
+ fib_rule4_test_match_n_redirect "$match" "$match" \
+ "$getnomatch" "ipproto icmp match" \
+ "ipproto tcp no match"
+ fi
+
+ fib_check_iproute_support "dscp" "tos"
+ if [ $? -eq 0 ]; then
+ match="dscp 0x3f"
+ getmatch="tos 0xfc"
+ getnomatch="tos 0xf4"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "dscp redirect to table" \
+ "dscp no redirect to table"
+
+ match="dscp 0x3f"
+ getmatch="from $SRC_IP iif $DEV tos 0xfc"
+ getnomatch="from $SRC_IP iif $DEV tos 0xf4"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif dscp redirect to table" \
+ "iif dscp no redirect to table"
fi
}
fib_rule4_vrf_test()
{
setup_vrf
- fib_rule4_test
+ fib_rule4_test "- with VRF"
cleanup_vrf
}
@@ -429,10 +545,8 @@ fib_rule4_connect_test()
{
local dsfield
- if ! check_nettest; then
- echo "SKIP: Could not run test without nettest tool"
- return
- fi
+ echo
+ echo "IPv4 FIB rule connect tests"
setup_peer
$IP -4 rule add dsfield 0x04 table $RTABLE_PEER
@@ -450,16 +564,46 @@ fib_rule4_connect_test()
log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})"
done
+ # Check that UDP and TCP connections fail when using a DS Field that
+ # does not match the previously configured FIB rule.
+ nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0x20 \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 1 "rule4 dsfield udp no connect (dsfield 0x20)"
+
+ nettest -q -B -t 5 -N $testns -O $peerns -Q 0x20 \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 1 "rule4 dsfield tcp no connect (dsfield 0x20)"
+
$IP -4 rule del dsfield 0x04 table $RTABLE_PEER
- cleanup_peer
-}
-run_fibrule_tests()
-{
- log_section "IPv4 fib rule"
- fib_rule4_test
- log_section "IPv6 fib rule"
- fib_rule6_test
+ ip rule help 2>&1 | grep -q dscp
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 iprule too old, missing dscp match"
+ cleanup_peer
+ return
+ fi
+
+ $IP -4 rule add dscp 0x3f table $RTABLE_PEER
+
+ nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xfc \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 0 "rule4 dscp udp connect"
+
+ nettest -q -B -t 5 -N $testns -O $peerns -Q 0xfc \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 0 "rule4 dscp tcp connect"
+
+ nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xf4 \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 1 "rule4 dscp udp no connect"
+
+ nettest -q -B -t 5 -N $testns -O $peerns -Q 0xf4 \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 1 "rule4 dscp tcp no connect"
+
+ $IP -4 rule del dscp 0x3f table $RTABLE_PEER
+
+ cleanup_peer
}
################################################################################
# usage
@@ -495,6 +639,8 @@ if [ ! -x "$(command -v ip)" ]; then
exit $ksft_skip
fi
+check_gen_prog "nettest"
+
# start clean
cleanup &> /dev/null
setup
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
index 7fdb6a9ca543..a652429bfd53 100644
--- a/tools/testing/selftests/net/forwarding/README
+++ b/tools/testing/selftests/net/forwarding/README
@@ -6,7 +6,7 @@ to easily create and test complex environments.
Unfortunately, these namespaces can not be used with actual switching
ASICs, as their ports can not be migrated to other network namespaces
-(NETIF_F_NETNS_LOCAL) and most of them probably do not support the
+(dev->netns_local) and most of them probably do not support the
L1-separation provided by namespaces.
However, a similar kind of flexibility can be achieved by using VRFs and
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index 64bd00fe9a4f..90f8a244ea90 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn"
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid"
NUM_NETIFS=4
CHECK_TC="yes"
source lib.sh
@@ -142,6 +142,58 @@ extern_learn()
bridge fdb del de:ad:be:ef:13:37 dev $swp1 master vlan 1 &> /dev/null
}
+other_tpid()
+{
+ local mac=de:ad:be:ef:13:37
+
+ # Test that packets with TPID 802.1ad VID 3 + TPID 802.1Q VID 5 are
+ # classified as untagged by a bridge with vlan_protocol 802.1Q, and
+ # are processed in the PVID of the ingress port (here 1). Not VID 3,
+ # and not VID 5.
+ RET=0
+
+ tc qdisc add dev $h2 clsact
+ tc filter add dev $h2 ingress protocol all pref 1 handle 101 \
+ flower dst_mac $mac action drop
+ ip link set $h2 promisc on
+ ethtool -K $h2 rx-vlan-filter off rx-vlan-stag-filter off
+
+ $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa"
+ sleep 1
+
+ # Match on 'self' addresses as well, for those drivers which
+ # do not push their learned addresses to the bridge software
+ # database
+ bridge -j fdb show $swp1 | \
+ jq -e ".[] | select(.mac == \"$(mac_get $h1)\") | select(.vlan == 1)" &> /dev/null
+ check_err $? "FDB entry was not learned when it should"
+
+ log_test "FDB entry in PVID for VLAN-tagged with other TPID"
+
+ RET=0
+ tc -j -s filter show dev $h2 ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_err $? "Packet was not forwarded when it should"
+ log_test "Reception of VLAN with other TPID as untagged"
+
+ bridge vlan del dev $swp1 vid 1
+
+ $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa"
+ sleep 1
+
+ RET=0
+ tc -j -s filter show dev $h2 ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_err $? "Packet was forwarded when should not"
+ log_test "Reception of VLAN with other TPID as untagged (no PVID)"
+
+ bridge vlan add dev $swp1 vid 1 pvid untagged
+ ip link set $h2 promisc off
+ tc qdisc del dev $h2 clsact
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
index 1783c10215e5..7d531f7091e6 100755
--- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
@@ -224,10 +224,10 @@ send_dst_ipv6()
send_flowlabel()
{
# Generate 16384 echo requests, each with a random flow label.
- for _ in $(seq 1 16384); do
- ip vrf exec v$h1 \
- $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1
- done
+ ip vrf exec v$h1 sh -c \
+ "for _ in {1..16384}; do \
+ $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1; \
+ done"
}
send_src_udp6()
diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
index 9788bd0f6e8b..dda11a4a9450 100755
--- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
@@ -319,10 +319,10 @@ send_dst_ipv6()
send_flowlabel()
{
# Generate 16384 echo requests, each with a random flow label.
- for _ in $(seq 1 16384); do
- ip vrf exec v$h1 \
- $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
- done
+ ip vrf exec v$h1 sh -c \
+ "for _ in {1..16384}; do \
+ $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \
+ done"
}
send_src_udp6()
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
index 2ab9eaaa5532..e28b4a079e52 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
@@ -321,10 +321,10 @@ send_dst_ipv6()
send_flowlabel()
{
# Generate 16384 echo requests, each with a random flow label.
- for _ in $(seq 1 16384); do
- ip vrf exec v$h1 \
- $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
- done
+ ip vrf exec v$h1 sh -c \
+ "for _ in {1..16384}; do \
+ $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \
+ done"
}
send_src_udp6()
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index ff96bb7535ff..c992e385159c 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -500,6 +500,11 @@ check_err_fail()
fi
}
+xfail()
+{
+ FAIL_TO_XFAIL=yes "$@"
+}
+
xfail_on_slow()
{
if [[ $KSFT_MACHINE_SLOW = yes ]]; then
@@ -509,6 +514,13 @@ xfail_on_slow()
fi
}
+omit_on_slow()
+{
+ if [[ $KSFT_MACHINE_SLOW != yes ]]; then
+ "$@"
+ fi
+}
+
xfail_on_veth()
{
local dev=$1; shift
@@ -1113,6 +1125,39 @@ mac_get()
ip -j link show dev $if_name | jq -r '.[]["address"]'
}
+ether_addr_to_u64()
+{
+ local addr="$1"
+ local order="$((1 << 40))"
+ local val=0
+ local byte
+
+ addr="${addr//:/ }"
+
+ for byte in $addr; do
+ byte="0x$byte"
+ val=$((val + order * byte))
+ order=$((order >> 8))
+ done
+
+ printf "0x%x" $val
+}
+
+u64_to_ether_addr()
+{
+ local val=$1
+ local byte
+ local i
+
+ for ((i = 40; i >= 0; i -= 8)); do
+ byte=$(((val & (0xff << i)) >> i))
+ printf "%02x" $byte
+ if [ $i -ne 0 ]; then
+ printf ":"
+ fi
+ done
+}
+
ipv6_lladdr_get()
{
local if_name=$1
@@ -2229,3 +2274,22 @@ absval()
echo $((v > 0 ? v : -v))
}
+
+has_unicast_flt()
+{
+ local dev=$1; shift
+ local mac_addr=$(mac_get $dev)
+ local tmp=$(ether_addr_to_u64 $mac_addr)
+ local promisc
+
+ ip link set $dev up
+ ip link add link $dev name macvlan-tmp type macvlan mode private
+ ip link set macvlan-tmp address $(u64_to_ether_addr $((tmp + 1)))
+ ip link set macvlan-tmp up
+
+ promisc=$(ip -j -d link show dev $dev | jq -r '.[].promiscuity')
+
+ ip link del macvlan-tmp
+
+ [[ $promisc == 1 ]] && echo "no" || echo "yes"
+}
diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
index 4b364cdf3ef0..c35548767756 100755
--- a/tools/testing/selftests/net/forwarding/local_termination.sh
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -1,7 +1,9 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="standalone bridge"
+ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge test_vlan \
+ vlan_over_vlan_unaware_bridged_port vlan_over_vlan_aware_bridged_port \
+ vlan_over_vlan_unaware_bridge vlan_over_vlan_aware_bridge"
NUM_NETIFS=2
PING_COUNT=1
REQUIRE_MTOOLS=yes
@@ -37,9 +39,68 @@ UNKNOWN_MACV6_MC_ADDR1="33:33:01:02:03:05"
UNKNOWN_MACV6_MC_ADDR2="33:33:01:02:03:06"
UNKNOWN_MACV6_MC_ADDR3="33:33:01:02:03:07"
-NON_IP_MC="01:02:03:04:05:06"
-NON_IP_PKT="00:04 48:45:4c:4f"
-BC="ff:ff:ff:ff:ff:ff"
+PTP_1588_L2_SYNC=" \
+01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 00 02 \
+00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \
+00 00 00 00 00 00 00 00 00 00"
+PTP_1588_L2_FOLLOW_UP=" \
+01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 08 02 \
+00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \
+00 00 66 83 c5 f1 17 97 ed f0"
+PTP_1588_L2_PDELAY_REQ=" \
+01:80:c2:00:00:0e 00:00:de:ad:be:ef 88:f7 02 02 \
+00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 3e 37 63 ff fe cf 17 0e 00 01 00 06 05 7f \
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 00 00"
+PTP_1588_IPV4_SYNC=" \
+01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \
+00 48 0a 9a 40 00 01 11 cb 88 c0 00 02 01 e0 00 \
+01 81 01 3f 01 3f 00 34 a3 c8 00 02 00 2c 00 00 \
+02 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \
+63 ff fe cf 17 0e 00 01 00 00 00 00 00 00 00 00 \
+00 00 00 00 00 00"
+PTP_1588_IPV4_FOLLOW_UP="
+01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \
+00 48 0a 9b 40 00 01 11 cb 87 c0 00 02 01 e0 00 \
+01 81 01 40 01 40 00 34 a3 c8 08 02 00 2c 00 00 \
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \
+63 ff fe cf 17 0e 00 01 00 00 02 00 00 00 66 83 \
+c6 0f 1d 9a 61 87"
+PTP_1588_IPV4_PDELAY_REQ=" \
+01:00:5e:00:00:6b 00:00:de:ad:be:ef 08:00 45 00 \
+00 52 35 a9 40 00 01 11 a1 85 c0 00 02 01 e0 00 \
+00 6b 01 3f 01 3f 00 3e a2 bc 02 02 00 36 00 00 \
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \
+63 ff fe cf 17 0e 00 01 00 01 05 7f 00 00 00 00 \
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00"
+PTP_1588_IPV6_SYNC=" \
+33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 06 \
+7c 2f 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \
+00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \
+00 00 00 00 01 81 01 3f 01 3f 00 36 2e 92 00 02 \
+00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \
+00 00 00 00 00 00 00 00 00 00 00 00"
+PTP_1588_IPV6_FOLLOW_UP=" \
+33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 0a \
+00 bc 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \
+00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \
+00 00 00 00 01 81 01 40 01 40 00 36 2e 92 08 02 \
+00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \
+00 00 66 83 c6 2a 32 09 bd 74 00 00"
+PTP_1588_IPV6_PDELAY_REQ=" \
+33:33:00:00:00:6b 00:00:de:ad:be:ef 86:dd 60 0c \
+5c fd 00 40 11 01 fe 80 00 00 00 00 00 00 3c 37 \
+63 ff fe cf 17 0e ff 02 00 00 00 00 00 00 00 00 \
+00 00 00 00 00 6b 01 3f 01 3f 00 40 b4 54 02 02 \
+00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 3e 37 63 ff fe cf 17 0e 00 01 00 01 05 7f \
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 00 00 00 00"
# Disable promisc to ensure we don't receive unknown MAC DA packets
export TCPDUMP_EXTRA_FLAGS="-pl"
@@ -47,13 +108,15 @@ export TCPDUMP_EXTRA_FLAGS="-pl"
h1=${NETIFS[p1]}
h2=${NETIFS[p2]}
-send_non_ip()
+send_raw()
{
- local if_name=$1
- local smac=$2
- local dmac=$3
+ local if_name=$1; shift
+ local pkt="$1"; shift
+ local smac=$(mac_get $if_name)
+
+ pkt="${pkt/00:00:de:ad:be:ef/$smac}"
- $MZ -q $if_name "$dmac $smac $NON_IP_PKT"
+ $MZ -q $if_name "$pkt"
}
send_uc_ipv4()
@@ -68,10 +131,11 @@ send_uc_ipv4()
check_rcv()
{
- local if_name=$1
- local type=$2
- local pattern=$3
- local should_receive=$4
+ local if_name=$1; shift
+ local type=$1; shift
+ local pattern=$1; shift
+ local should_receive=$1; shift
+ local test_name="$1"; shift
local should_fail=
[ $should_receive = true ] && should_fail=0 || should_fail=1
@@ -81,7 +145,7 @@ check_rcv()
check_err_fail "$should_fail" "$?" "reception"
- log_test "$if_name: $type"
+ log_test "$test_name: $type"
}
mc_route_prepare()
@@ -104,44 +168,78 @@ mc_route_destroy()
run_test()
{
- local rcv_if_name=$1
- local smac=$(mac_get $h1)
+ local send_if_name=$1; shift
+ local rcv_if_name=$1; shift
+ local skip_ptp=$1; shift
+ local no_unicast_flt=$1; shift
+ local test_name="$1"; shift
+ local smac=$(mac_get $send_if_name)
local rcv_dmac=$(mac_get $rcv_if_name)
+ local should_receive
tcpdump_start $rcv_if_name
- mc_route_prepare $h1
+ mc_route_prepare $send_if_name
mc_route_prepare $rcv_if_name
- send_uc_ipv4 $h1 $rcv_dmac
- send_uc_ipv4 $h1 $MACVLAN_ADDR
- send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR1
+ send_uc_ipv4 $send_if_name $rcv_dmac
+ send_uc_ipv4 $send_if_name $MACVLAN_ADDR
+ send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR1
ip link set dev $rcv_if_name promisc on
- send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR2
- mc_send $h1 $UNKNOWN_IPV4_MC_ADDR2
- mc_send $h1 $UNKNOWN_IPV6_MC_ADDR2
+ send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR2
+ mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR2
+ mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR2
ip link set dev $rcv_if_name promisc off
mc_join $rcv_if_name $JOINED_IPV4_MC_ADDR
- mc_send $h1 $JOINED_IPV4_MC_ADDR
+ mc_send $send_if_name $JOINED_IPV4_MC_ADDR
mc_leave
mc_join $rcv_if_name $JOINED_IPV6_MC_ADDR
- mc_send $h1 $JOINED_IPV6_MC_ADDR
+ mc_send $send_if_name $JOINED_IPV6_MC_ADDR
mc_leave
- mc_send $h1 $UNKNOWN_IPV4_MC_ADDR1
- mc_send $h1 $UNKNOWN_IPV6_MC_ADDR1
+ mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR1
+ mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR1
ip link set dev $rcv_if_name allmulticast on
- send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR3
- mc_send $h1 $UNKNOWN_IPV4_MC_ADDR3
- mc_send $h1 $UNKNOWN_IPV6_MC_ADDR3
+ send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR3
+ mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR3
+ mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR3
ip link set dev $rcv_if_name allmulticast off
mc_route_destroy $rcv_if_name
- mc_route_destroy $h1
+ mc_route_destroy $send_if_name
+
+ if [ $skip_ptp = false ]; then
+ ip maddress add 01:1b:19:00:00:00 dev $rcv_if_name
+ send_raw $send_if_name "$PTP_1588_L2_SYNC"
+ send_raw $send_if_name "$PTP_1588_L2_FOLLOW_UP"
+ ip maddress del 01:1b:19:00:00:00 dev $rcv_if_name
+
+ ip maddress add 01:80:c2:00:00:0e dev $rcv_if_name
+ send_raw $send_if_name "$PTP_1588_L2_PDELAY_REQ"
+ ip maddress del 01:80:c2:00:00:0e dev $rcv_if_name
+
+ mc_join $rcv_if_name 224.0.1.129
+ send_raw $send_if_name "$PTP_1588_IPV4_SYNC"
+ send_raw $send_if_name "$PTP_1588_IPV4_FOLLOW_UP"
+ mc_leave
+
+ mc_join $rcv_if_name 224.0.0.107
+ send_raw $send_if_name "$PTP_1588_IPV4_PDELAY_REQ"
+ mc_leave
+
+ mc_join $rcv_if_name ff0e::181
+ send_raw $send_if_name "$PTP_1588_IPV6_SYNC"
+ send_raw $send_if_name "$PTP_1588_IPV6_FOLLOW_UP"
+ mc_leave
+
+ mc_join $rcv_if_name ff02::6b
+ send_raw $send_if_name "$PTP_1588_IPV6_PDELAY_REQ"
+ mc_leave
+ fi
sleep 1
@@ -149,61 +247,99 @@ run_test()
check_rcv $rcv_if_name "Unicast IPv4 to primary MAC address" \
"$smac > $rcv_dmac, ethertype IPv4 (0x0800)" \
- true
+ true "$test_name"
check_rcv $rcv_if_name "Unicast IPv4 to macvlan MAC address" \
"$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \
- true
+ true "$test_name"
- xfail_on_veth $h1 \
- check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
- "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
- false
+ [ $no_unicast_flt = true ] && should_receive=true || should_receive=false
+ check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
+ "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
+ $should_receive "$test_name"
check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \
"$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \
- true
+ true "$test_name"
- xfail_on_veth $h1 \
- check_rcv $rcv_if_name \
- "Unicast IPv4 to unknown MAC address, allmulti" \
- "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \
- false
+ [ $no_unicast_flt = true ] && should_receive=true || should_receive=false
+ check_rcv $rcv_if_name \
+ "Unicast IPv4 to unknown MAC address, allmulti" \
+ "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \
+ $should_receive "$test_name"
check_rcv $rcv_if_name "Multicast IPv4 to joined group" \
"$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \
- true
+ true "$test_name"
- xfail_on_veth $h1 \
+ xfail \
check_rcv $rcv_if_name \
"Multicast IPv4 to unknown group" \
"$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \
- false
+ false "$test_name"
check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \
"$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \
- true
+ true "$test_name"
check_rcv $rcv_if_name "Multicast IPv4 to unknown group, allmulti" \
"$smac > $UNKNOWN_MACV4_MC_ADDR3, ethertype IPv4 (0x0800)" \
- true
+ true "$test_name"
check_rcv $rcv_if_name "Multicast IPv6 to joined group" \
"$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \
- true
+ true "$test_name"
- xfail_on_veth $h1 \
+ xfail \
check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \
"$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \
- false
+ false "$test_name"
check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \
"$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \
- true
+ true "$test_name"
check_rcv $rcv_if_name "Multicast IPv6 to unknown group, allmulti" \
"$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \
- true
+ true "$test_name"
+
+ if [ $skip_ptp = false ]; then
+ check_rcv $rcv_if_name "1588v2 over L2 transport, Sync" \
+ "ethertype PTP (0x88f7).* PTPv2.* msg type : sync msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over L2 transport, Follow-Up" \
+ "ethertype PTP (0x88f7).* PTPv2.* msg type : follow up msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over L2 transport, Peer Delay Request" \
+ "ethertype PTP (0x88f7).* PTPv2.* msg type : peer delay req msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over IPv4, Sync" \
+ "ethertype IPv4 (0x0800).* PTPv2.* msg type : sync msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over IPv4, Follow-Up" \
+ "ethertype IPv4 (0x0800).* PTPv2.* msg type : follow up msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over IPv4, Peer Delay Request" \
+ "ethertype IPv4 (0x0800).* PTPv2.* msg type : peer delay req msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over IPv6, Sync" \
+ "ethertype IPv6 (0x86dd).* PTPv2.* msg type : sync msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over IPv6, Follow-Up" \
+ "ethertype IPv6 (0x86dd).* PTPv2.* msg type : follow up msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over IPv6, Peer Delay Request" \
+ "ethertype IPv6 (0x86dd).* PTPv2.* msg type : peer delay req msg" \
+ true "$test_name"
+ fi
tcpdump_cleanup $rcv_if_name
}
@@ -228,62 +364,217 @@ h2_destroy()
simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
}
+h1_vlan_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 100 v$h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_vlan_destroy()
+{
+ vlan_destroy $h1 100
+ simple_if_fini $h1
+}
+
+h2_vlan_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 100 v$h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_vlan_destroy()
+{
+ vlan_destroy $h2 100
+ simple_if_fini $h2
+}
+
bridge_create()
{
- ip link add br0 type bridge
+ local vlan_filtering=$1
+
+ ip link add br0 type bridge vlan_filtering $vlan_filtering
ip link set br0 address $BRIDGE_ADDR
ip link set br0 up
ip link set $h2 master br0
ip link set $h2 up
-
- simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64
}
bridge_destroy()
{
- simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64
-
ip link del br0
}
-standalone()
+macvlan_create()
{
- h1_create
- h2_create
+ local lower=$1
- ip link add link $h2 name macvlan0 type macvlan mode private
+ ip link add link $lower name macvlan0 type macvlan mode private
ip link set macvlan0 address $MACVLAN_ADDR
ip link set macvlan0 up
+}
- run_test $h2
-
+macvlan_destroy()
+{
ip link del macvlan0
+}
+
+standalone()
+{
+ local no_unicast_flt=true
+ local skip_ptp=false
+ if [ $(has_unicast_flt $h2) = yes ]; then
+ no_unicast_flt=false
+ fi
+
+ h1_create
+ h2_create
+ macvlan_create $h2
+
+ run_test $h1 $h2 $skip_ptp $no_unicast_flt "$h2"
+
+ macvlan_destroy
h2_destroy
h1_destroy
}
-bridge()
+test_bridge()
{
+ local no_unicast_flt=true
+ local vlan_filtering=$1
+ local skip_ptp=true
+
h1_create
- bridge_create
+ bridge_create $vlan_filtering
+ simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64
+ macvlan_create br0
- ip link add link br0 name macvlan0 type macvlan mode private
- ip link set macvlan0 address $MACVLAN_ADDR
- ip link set macvlan0 up
+ run_test $h1 br0 $skip_ptp $no_unicast_flt \
+ "vlan_filtering=$vlan_filtering bridge"
- run_test br0
+ macvlan_destroy
+ simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64
+ bridge_destroy
+ h1_destroy
+}
- ip link del macvlan0
+vlan_unaware_bridge()
+{
+ test_bridge 0
+}
+
+vlan_aware_bridge()
+{
+ test_bridge 1
+}
+
+test_vlan()
+{
+ local no_unicast_flt=true
+ local skip_ptp=false
+
+ if [ $(has_unicast_flt $h2) = yes ]; then
+ no_unicast_flt=false
+ fi
+
+ h1_vlan_create
+ h2_vlan_create
+ macvlan_create $h2.100
+ run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt "VLAN upper"
+
+ macvlan_destroy
+ h2_vlan_destroy
+ h1_vlan_destroy
+}
+
+vlan_over_bridged_port()
+{
+ local no_unicast_flt=true
+ local vlan_filtering=$1
+ local skip_ptp=false
+
+ # br_manage_promisc() will not force a single vlan_filtering port to
+ # promiscuous mode, so we should still expect unicast filtering to take
+ # place if the device can do it.
+ if [ $(has_unicast_flt $h2) = yes ] && [ $vlan_filtering = 1 ]; then
+ no_unicast_flt=false
+ fi
+
+ h1_vlan_create
+ h2_vlan_create
+ bridge_create $vlan_filtering
+ macvlan_create $h2.100
+
+ run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt \
+ "VLAN over vlan_filtering=$vlan_filtering bridged port"
+
+ macvlan_destroy
bridge_destroy
- h1_destroy
+ h2_vlan_destroy
+ h1_vlan_destroy
+}
+
+vlan_over_vlan_unaware_bridged_port()
+{
+ vlan_over_bridged_port 0
+}
+
+vlan_over_vlan_aware_bridged_port()
+{
+ vlan_over_bridged_port 1
+}
+
+vlan_over_bridge()
+{
+ local no_unicast_flt=true
+ local vlan_filtering=$1
+ local skip_ptp=true
+
+ h1_vlan_create
+ bridge_create $vlan_filtering
+ simple_if_init br0
+ vlan_create br0 100 vbr0 $H2_IPV4/24 $H2_IPV6/64
+ macvlan_create br0.100
+
+ if [ $vlan_filtering = 1 ]; then
+ bridge vlan add dev $h2 vid 100 master
+ bridge vlan add dev br0 vid 100 self
+ fi
+
+ run_test $h1.100 br0.100 $skip_ptp $no_unicast_flt \
+ "VLAN over vlan_filtering=$vlan_filtering bridge"
+
+ if [ $vlan_filtering = 1 ]; then
+ bridge vlan del dev br0 vid 100 self
+ bridge vlan del dev $h2 vid 100 master
+ fi
+
+ macvlan_destroy
+ vlan_destroy br0 100
+ simple_if_fini br0
+ bridge_destroy
+ h1_vlan_destroy
+}
+
+vlan_over_vlan_unaware_bridge()
+{
+ vlan_over_bridge 0
+}
+
+vlan_over_vlan_aware_bridge()
+{
+ vlan_over_bridge 1
}
cleanup()
{
pre_cleanup
+
+ ip link set $h2 down
+ ip link set $h1 down
+
vrf_cleanup
}
diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh
index af3b398d13f0..9e677aa64a06 100755
--- a/tools/testing/selftests/net/forwarding/no_forwarding.sh
+++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh
@@ -233,6 +233,9 @@ cleanup()
{
pre_cleanup
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
h2_destroy
h1_destroy
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index 2ba44247c60a..a7d8399c8d4f 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -40,6 +40,7 @@ ALL_TESTS="
ping_ipv4
ping_ipv6
multipath_test
+ multipath16_test
ping_ipv4_blackhole
ping_ipv6_blackhole
nh_stats_test_v4
@@ -226,9 +227,11 @@ routing_nh_obj()
multipath4_test()
{
- local desc="$1"
- local weight_rp12=$2
- local weight_rp13=$3
+ local desc=$1; shift
+ local weight_rp12=$1; shift
+ local weight_rp13=$1; shift
+ local ports=${1-sp=1024,dp=0-32768}; shift
+
local t0_rp12 t0_rp13 t1_rp12 t1_rp13
local packets_rp12 packets_rp13
@@ -242,7 +245,8 @@ multipath4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
- -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "$ports"
+ sleep 1
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -258,9 +262,11 @@ multipath4_test()
multipath6_test()
{
- local desc="$1"
- local weight_rp12=$2
- local weight_rp13=$3
+ local desc=$1; shift
+ local weight_rp12=$1; shift
+ local weight_rp13=$1; shift
+ local ports=${1-sp=1024,dp=0-32768}; shift
+
local t0_rp12 t0_rp13 t1_rp12 t1_rp13
local packets_rp12 packets_rp13
@@ -275,7 +281,8 @@ multipath6_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "$ports"
+ sleep 1
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -313,6 +320,23 @@ multipath_test()
multipath6_test "Weighted MP 11:45" 11 45
}
+multipath16_test()
+{
+ check_nhgw16 104 || return
+
+ log_info "Running 16-bit IPv4 multipath tests"
+ multipath4_test "65535:65535" 65535 65535
+ multipath4_test "128:512" 128 512
+ omit_on_slow \
+ multipath4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535
+
+ log_info "Running 16-bit IPv6 multipath tests"
+ multipath6_test "65535:65535" 65535 65535
+ multipath6_test "128:512" 128 512
+ omit_on_slow \
+ multipath6_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535
+}
+
ping_ipv4_blackhole()
{
RET=0
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
index 2903294d8bca..507b2852dabe 100644
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
@@ -117,3 +117,16 @@ __nh_stats_test_v6()
$MZ -6 $h1 -A 2001:db8:1::2 -B 2001:db8:2::2
sysctl_restore net.ipv6.fib_multipath_hash_policy
}
+
+check_nhgw16()
+{
+ local nhid=$1; shift
+
+ ip nexthop replace id 9999 group "$nhid,65535" &>/dev/null
+ if (( $? )); then
+ log_test_skip "16-bit multipath tests" \
+ "iproute2 or the kernel do not support 16-bit next hop weights"
+ return 1
+ fi
+ ip nexthop del id 9999 ||:
+}
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
index cd9e346436fc..88ddae05b39d 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -40,6 +40,7 @@ ALL_TESTS="
ping_ipv4
ping_ipv6
multipath_test
+ multipath16_test
nh_stats_test_v4
nh_stats_test_v6
"
@@ -228,9 +229,11 @@ routing_nh_obj()
multipath4_test()
{
- local desc="$1"
- local weight_rp12=$2
- local weight_rp13=$3
+ local desc=$1; shift
+ local weight_rp12=$1; shift
+ local weight_rp13=$1; shift
+ local ports=${1-sp=1024,dp=0-32768}; shift
+
local t0_rp12 t0_rp13 t1_rp12 t1_rp13
local packets_rp12 packets_rp13
@@ -243,7 +246,8 @@ multipath4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
- -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "$ports"
+ sleep 1
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -258,9 +262,11 @@ multipath4_test()
multipath6_l4_test()
{
- local desc="$1"
- local weight_rp12=$2
- local weight_rp13=$3
+ local desc=$1; shift
+ local weight_rp12=$1; shift
+ local weight_rp13=$1; shift
+ local ports=${1-sp=1024,dp=0-32768}; shift
+
local t0_rp12 t0_rp13 t1_rp12 t1_rp13
local packets_rp12 packets_rp13
@@ -273,7 +279,8 @@ multipath6_l4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "$ports"
+ sleep 1
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -371,6 +378,41 @@ multipath_test()
ip nexthop replace id 106 group 104,1/105,1 type resilient
}
+multipath16_test()
+{
+ check_nhgw16 104 || return
+
+ log_info "Running 16-bit IPv4 multipath tests"
+ ip nexthop replace id 103 group 101/102 type resilient idle_timer 0
+
+ ip nexthop replace id 103 group 101,65535/102,65535 type resilient
+ multipath4_test "65535:65535" 65535 65535
+
+ ip nexthop replace id 103 group 101,128/102,512 type resilient
+ multipath4_test "128:512" 128 512
+
+ ip nexthop replace id 103 group 101,255/102,65535 type resilient
+ omit_on_slow \
+ multipath4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535
+
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+ log_info "Running 16-bit IPv6 L4 hash multipath tests"
+ ip nexthop replace id 106 group 104/105 type resilient idle_timer 0
+
+ ip nexthop replace id 106 group 104,65535/105,65535 type resilient
+ multipath6_l4_test "65535:65535" 65535 65535
+
+ ip nexthop replace id 106 group 104,128/105,512 type resilient
+ multipath6_l4_test "128:512" 128 512
+
+ ip nexthop replace id 106 group 104,255/105,65535 type resilient
+ omit_on_slow \
+ multipath6_l4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535
+
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+}
+
nh_stats_test_v4()
{
__nh_stats_test_v4 resilient
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index e2be354167a1..46f365b557b7 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -180,6 +180,7 @@ multipath4_test()
ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
-d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+ sleep 1
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -217,6 +218,7 @@ multipath6_test()
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
-d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+ sleep 1
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index 589629636502..ea89e558672d 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -4,7 +4,8 @@
ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
mirred_egress_mirror_test matchall_mirred_egress_mirror_test \
gact_trap_test mirred_egress_to_ingress_test \
- mirred_egress_to_ingress_tcp_test"
+ mirred_egress_to_ingress_tcp_test \
+ ingress_2nd_vlan_push egress_2nd_vlan_push"
NUM_NETIFS=4
source tc_common.sh
source lib.sh
@@ -244,6 +245,49 @@ mirred_egress_to_ingress_tcp_test()
log_test "mirred_egress_to_ingress_tcp ($tcflags)"
}
+ingress_2nd_vlan_push()
+{
+ tc filter add dev $swp1 ingress pref 20 chain 0 handle 20 flower \
+ $tcflags num_of_vlans 1 \
+ action vlan push id 100 protocol 0x8100 action goto chain 5
+ tc filter add dev $swp1 ingress pref 30 chain 5 handle 30 flower \
+ $tcflags num_of_vlans 2 \
+ cvlan_ethtype 0x800 action pass
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -Q 10 -q
+
+ tc_check_packets "dev $swp1 ingress" 30 1
+ check_err $? "No double-vlan packets received"
+
+ tc filter del dev $swp1 ingress pref 20 chain 0 handle 20 flower
+ tc filter del dev $swp1 ingress pref 30 chain 5 handle 30 flower
+
+ log_test "ingress_2nd_vlan_push ($tcflags)"
+}
+
+egress_2nd_vlan_push()
+{
+ tc filter add dev $h1 egress pref 20 chain 0 handle 20 flower \
+ $tcflags num_of_vlans 0 \
+ action vlan push id 10 protocol 0x8100 \
+ pipe action vlan push id 100 protocol 0x8100 action goto chain 5
+ tc filter add dev $h1 egress pref 30 chain 5 handle 30 flower \
+ $tcflags num_of_vlans 2 \
+ cvlan_ethtype 0x800 action pass
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h1 egress" 30 1
+ check_err $? "No double-vlan packets received"
+
+ tc filter del dev $h1 egress pref 20 chain 0 handle 20 flower
+ tc filter del dev $h1 egress pref 30 chain 5 handle 30 flower
+
+ log_test "egress_2nd_vlan_push ($tcflags)"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 8ee4489238ca..be8707bfb46e 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -125,6 +125,21 @@ slowwait_for_counter()
slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@"
}
+# Check for existence of tools which are built as part of selftests
+# but may also already exist in $PATH
+check_gen_prog()
+{
+ local prog_name=$1; shift
+
+ if ! which $prog_name >/dev/null 2>/dev/null; then
+ PATH=$PWD:$PATH
+ if ! which $prog_name >/dev/null; then
+ echo "'$prog_name' command not found; skipping tests"
+ exit $ksft_skip
+ fi
+ fi
+}
+
remove_ns_list()
{
local item=$1
diff --git a/tools/testing/selftests/net/lib/csum.c b/tools/testing/selftests/net/lib/csum.c
index b9f3fc3c3426..e0a34e5e8dd5 100644
--- a/tools/testing/selftests/net/lib/csum.c
+++ b/tools/testing/selftests/net/lib/csum.c
@@ -654,10 +654,16 @@ static int recv_verify_packet_ipv4(void *nh, int len)
{
struct iphdr *iph = nh;
uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
+ uint16_t ip_len;
if (len < sizeof(*iph) || iph->protocol != proto)
return -1;
+ ip_len = ntohs(iph->tot_len);
+ if (ip_len > len || ip_len < sizeof(*iph))
+ return -1;
+
+ len = ip_len;
iph_addr_p = &iph->saddr;
if (proto == IPPROTO_TCP)
return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph));
@@ -669,16 +675,22 @@ static int recv_verify_packet_ipv6(void *nh, int len)
{
struct ipv6hdr *ip6h = nh;
uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
+ uint16_t ip_len;
if (len < sizeof(*ip6h) || ip6h->nexthdr != proto)
return -1;
+ ip_len = ntohs(ip6h->payload_len);
+ if (ip_len > len - sizeof(*ip6h))
+ return -1;
+
+ len = ip_len;
iph_addr_p = &ip6h->saddr;
if (proto == IPPROTO_TCP)
- return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h));
+ return recv_verify_packet_tcp(ip6h + 1, len);
else
- return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h));
+ return recv_verify_packet_udp(ip6h + 1, len);
}
/* return whether auxdata includes TP_STATUS_CSUM_VALID */
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index f26c20df9db4..477ae76de93d 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
import builtins
+import functools
import inspect
import sys
import time
@@ -10,6 +11,7 @@ from .utils import global_defer_queue
KSFT_RESULT = None
KSFT_RESULT_ALL = True
+KSFT_DISRUPTIVE = True
class KsftFailEx(Exception):
@@ -32,8 +34,18 @@ def _fail(*args):
global KSFT_RESULT
KSFT_RESULT = False
- frame = inspect.stack()[2]
- ksft_pr("At " + frame.filename + " line " + str(frame.lineno) + ":")
+ stack = inspect.stack()
+ started = False
+ for frame in reversed(stack[2:]):
+ # Start printing from the test case function
+ if not started:
+ if frame.function == 'ksft_run':
+ started = True
+ continue
+
+ ksft_pr("Check| At " + frame.filename + ", line " + str(frame.lineno) +
+ ", in " + frame.function + ":")
+ ksft_pr("Check| " + frame.code_context[0].strip())
ksft_pr(*args)
@@ -43,6 +55,12 @@ def ksft_eq(a, b, comment=""):
_fail("Check failed", a, "!=", b, comment)
+def ksft_ne(a, b, comment=""):
+ global KSFT_RESULT
+ if a == b:
+ _fail("Check failed", a, "==", b, comment)
+
+
def ksft_true(a, comment=""):
if not a:
_fail("Check failed", a, "does not eval to True", comment)
@@ -127,6 +145,44 @@ def ksft_flush_defer():
KSFT_RESULT = False
+def ksft_disruptive(func):
+ """
+ Decorator that marks the test as disruptive (e.g. the test
+ that can down the interface). Disruptive tests can be skipped
+ by passing DISRUPTIVE=False environment variable.
+ """
+
+ @functools.wraps(func)
+ def wrapper(*args, **kwargs):
+ if not KSFT_DISRUPTIVE:
+ raise KsftSkipEx(f"marked as disruptive")
+ return func(*args, **kwargs)
+ return wrapper
+
+
+def ksft_setup(env):
+ """
+ Setup test framework global state from the environment.
+ """
+
+ def get_bool(env, name):
+ value = env.get(name, "").lower()
+ if value in ["yes", "true"]:
+ return True
+ if value in ["no", "false"]:
+ return False
+ try:
+ return bool(int(value))
+ except:
+ raise Exception(f"failed to parse {name}")
+
+ if "DISRUPTIVE" in env:
+ global KSFT_DISRUPTIVE
+ KSFT_DISRUPTIVE = get_bool(env, "DISRUPTIVE")
+
+ return env
+
+
def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
cases = cases or []
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 7b936a926859..5d796622e730 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -11,6 +11,8 @@ TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq
TEST_FILES := mptcp_lib.sh settings
+TEST_INCLUDES := ../lib.sh ../net_helper.sh
+
EXTRA_CLEAN := *.pcap
include ../../lib.mk
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 776d43a6922d..2bd0c1eb70c5 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -284,7 +284,7 @@ echo "b" | \
./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \
127.0.0.1 >/dev/null &
wait_connected $ns 10000
-chk_msk_nr 2 "after MPC handshake "
+chk_msk_nr 2 "after MPC handshake"
chk_last_time_info 10000
chk_msk_remote_key_nr 2 "....chk remote_key"
chk_msk_fallback_nr 0 "....chk no fallback"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index b77fb7065bfb..57325d57e4c6 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -345,9 +345,11 @@ do_transfer()
local addr_port
addr_port=$(printf "%s:%d" ${connect_addr} ${port})
- local result_msg
- result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})"
- mptcp_lib_print_title "${result_msg}"
+ local pretty_title
+ pretty_title="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})"
+ mptcp_lib_print_title "${pretty_title}"
+
+ local tap_title="${connector_ns:0:3} ${cl_proto} -> ${listener_ns:0:3} (${addr_port}) ${srv_proto}"
if $capture; then
local capuser
@@ -431,7 +433,6 @@ do_transfer()
local duration
duration=$((stop-start))
- result_msg+=" # time=${duration}ms"
printf "(duration %05sms) " "${duration}"
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
mptcp_lib_pr_fail "client exit code $retc, server $rets"
@@ -444,7 +445,7 @@ do_transfer()
echo
cat "$capout"
- mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
+ mptcp_lib_result_fail "${TEST_GROUP}: ${tap_title}"
return 1
fi
@@ -544,12 +545,12 @@ do_transfer()
if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
mptcp_lib_pr_ok "${extra:1}"
- mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}"
+ mptcp_lib_result_pass "${TEST_GROUP}: ${tap_title}"
else
if [ -n "${extra}" ]; then
mptcp_lib_print_warn "${extra:1}"
fi
- mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
+ mptcp_lib_result_fail "${TEST_GROUP}: ${tap_title}"
fi
cat "$capout"
@@ -848,6 +849,8 @@ stop_if_error()
make_file "$cin" "client"
make_file "$sin" "server"
+mptcp_lib_subtests_last_ts_reset
+
check_mptcp_disabled
stop_if_error "The kernel configuration is not valid for MPTCP"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 9ea6d698e9d3..e8d0a01b4144 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -61,6 +61,16 @@ unset sflags
unset fastclose
unset fullmesh
unset speed
+unset join_csum_ns1
+unset join_csum_ns2
+unset join_fail_nr
+unset join_rst_nr
+unset join_infi_nr
+unset join_corrupted_pkts
+unset join_syn_tx
+unset join_create_err
+unset join_bind_err
+unset join_connect_err
# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
# (ip6 && (ip6[74] & 0xf0) == 0x30)'"
@@ -196,6 +206,22 @@ print_skip()
mptcp_lib_pr_skip "${@}"
}
+# $1: check name; $2: rc
+print_results()
+{
+ local check="${1}"
+ local rc=${2}
+
+ print_check "${check}"
+ if [ ${rc} = ${KSFT_PASS} ]; then
+ print_ok
+ elif [ ${rc} = ${KSFT_SKIP} ]; then
+ print_skip
+ else
+ fail_test "see above"
+ fi
+}
+
# [ $1: fail msg ]
mark_as_skipped()
{
@@ -337,7 +363,7 @@ reset_with_checksum()
local ns1_enable=$1
local ns2_enable=$2
- reset "checksum test ${1} ${2}" || return 1
+ reset "checksum test ${ns1_enable} ${ns2_enable}" || return 1
ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable
ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
@@ -420,12 +446,17 @@ reset_with_fail()
fi
}
+start_events()
+{
+ mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid
+ mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid
+}
+
reset_with_events()
{
reset "${1}" || return 1
- mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid
- mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid
+ start_events
}
reset_with_tcp_filter()
@@ -436,9 +467,10 @@ reset_with_tcp_filter()
local ns="${!1}"
local src="${2}"
local target="${3}"
+ local chain="${4:-INPUT}"
if ! ip netns exec "${ns}" ${iptables} \
- -A INPUT \
+ -A "${chain}" \
-s "${src}" \
-p tcp \
-j "${target}"; then
@@ -833,7 +865,7 @@ chk_cestab_nr()
local cestab=$2
local count
- print_check "cestab $cestab"
+ print_check "currently established: $cestab"
count=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPCurrEstab")
if [ -z "$count" ]; then
print_skip
@@ -1109,28 +1141,29 @@ chk_csum_nr()
csum_ns2=${csum_ns2:1}
fi
- print_check "sum"
+ print_check "checksum server"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr")
- if [ "$count" != "$csum_ns1" ]; then
+ if [ -n "$count" ] && [ "$count" != "$csum_ns1" ]; then
extra_msg+=" ns1=$count"
fi
if [ -z "$count" ]; then
print_skip
elif { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } ||
- { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then
+ { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then
fail_test "got $count data checksum error[s] expected $csum_ns1"
else
print_ok
fi
- print_check "csum"
+
+ print_check "checksum client"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr")
- if [ "$count" != "$csum_ns2" ]; then
+ if [ -n "$count" ] && [ "$count" != "$csum_ns2" ]; then
extra_msg+=" ns2=$count"
fi
if [ -z "$count" ]; then
print_skip
elif { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } ||
- { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then
+ { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then
fail_test "got $count data checksum error[s] expected $csum_ns2"
else
print_ok
@@ -1147,6 +1180,8 @@ chk_fail_nr()
local count
local ns_tx=$ns1
local ns_rx=$ns2
+ local tx="server"
+ local rx="client"
local extra_msg=""
local allow_tx_lost=0
local allow_rx_lost=0
@@ -1154,7 +1189,8 @@ chk_fail_nr()
if [[ $ns_invert = "invert" ]]; then
ns_tx=$ns2
ns_rx=$ns1
- extra_msg="invert"
+ tx="client"
+ rx="server"
fi
if [[ "${fail_tx}" = "-"* ]]; then
@@ -1166,29 +1202,29 @@ chk_fail_nr()
fail_rx=${fail_rx:1}
fi
- print_check "ftx"
+ print_check "fail tx ${tx}"
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx")
- if [ "$count" != "$fail_tx" ]; then
- extra_msg+=",tx=$count"
+ if [ -n "$count" ] && [ "$count" != "$fail_tx" ]; then
+ extra_msg+=" tx=$count"
fi
if [ -z "$count" ]; then
print_skip
elif { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } ||
- { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then
+ { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then
fail_test "got $count MP_FAIL[s] TX expected $fail_tx"
else
print_ok
fi
- print_check "failrx"
+ print_check "fail rx ${rx}"
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx")
- if [ "$count" != "$fail_rx" ]; then
- extra_msg+=",rx=$count"
+ if [ -n "$count" ] && [ "$count" != "$fail_rx" ]; then
+ extra_msg+=" rx=$count"
fi
if [ -z "$count" ]; then
print_skip
elif { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } ||
- { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then
+ { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then
fail_test "got $count MP_FAIL[s] RX expected $fail_rx"
else
print_ok
@@ -1205,37 +1241,35 @@ chk_fclose_nr()
local count
local ns_tx=$ns2
local ns_rx=$ns1
- local extra_msg=""
+ local tx="client"
+ local rx="server"
if [[ $ns_invert = "invert" ]]; then
ns_tx=$ns1
ns_rx=$ns2
- extra_msg="invert"
+ tx="server"
+ rx="client"
fi
- print_check "ctx"
+ print_check "fast close tx ${tx}"
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$fclose_tx" ]; then
- extra_msg+=",tx=$count"
fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx"
else
print_ok
fi
- print_check "fclzrx"
+ print_check "fast close rx ${rx}"
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$fclose_rx" ]; then
- extra_msg+=",rx=$count"
fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx"
else
print_ok
fi
-
- print_info "$extra_msg"
}
chk_rst_nr()
@@ -1246,15 +1280,17 @@ chk_rst_nr()
local count
local ns_tx=$ns1
local ns_rx=$ns2
- local extra_msg=""
+ local tx="server"
+ local rx="client"
if [[ $ns_invert = "invert" ]]; then
ns_tx=$ns2
ns_rx=$ns1
- extra_msg="invert"
+ tx="client"
+ rx="server"
fi
- print_check "rtx"
+ print_check "reset tx ${tx}"
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx")
if [ -z "$count" ]; then
print_skip
@@ -1266,7 +1302,7 @@ chk_rst_nr()
print_ok
fi
- print_check "rstrx"
+ print_check "reset rx ${rx}"
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx")
if [ -z "$count" ]; then
print_skip
@@ -1277,8 +1313,6 @@ chk_rst_nr()
else
print_ok
fi
-
- print_info "$extra_msg"
}
chk_infi_nr()
@@ -1287,7 +1321,7 @@ chk_infi_nr()
local infi_rx=$2
local count
- print_check "itx"
+ print_check "infi tx client"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx")
if [ -z "$count" ]; then
print_skip
@@ -1297,7 +1331,7 @@ chk_infi_nr()
print_ok
fi
- print_check "infirx"
+ print_check "infi rx server"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx")
if [ -z "$count" ]; then
print_skip
@@ -1308,17 +1342,66 @@ chk_infi_nr()
fi
}
+chk_join_tx_nr()
+{
+ local syn_tx=${join_syn_tx:-0}
+ local create=${join_create_err:-0}
+ local bind=${join_bind_err:-0}
+ local connect=${join_connect_err:-0}
+ local rc=${KSFT_PASS}
+ local count
+
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTx")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$syn_tx" ]; then
+ rc=${KSFT_FAIL}
+ print_check "syn tx"
+ fail_test "got $count JOIN[s] syn tx expected $syn_tx"
+ fi
+
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxCreatSkErr")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$create" ]; then
+ rc=${KSFT_FAIL}
+ print_check "syn tx create socket error"
+ fail_test "got $count JOIN[s] syn tx create socket error expected $create"
+ fi
+
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxBindErr")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$bind" ]; then
+ rc=${KSFT_FAIL}
+ print_check "syn tx bind error"
+ fail_test "got $count JOIN[s] syn tx bind error expected $bind"
+ fi
+
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxConnectErr")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$connect" ]; then
+ rc=${KSFT_FAIL}
+ print_check "syn tx connect error"
+ fail_test "got $count JOIN[s] syn tx connect error expected $connect"
+ fi
+
+ print_results "join Tx" ${rc}
+}
+
chk_join_nr()
{
local syn_nr=$1
local syn_ack_nr=$2
local ack_nr=$3
- local csum_ns1=${4:-0}
- local csum_ns2=${5:-0}
- local fail_nr=${6:-0}
- local rst_nr=${7:-0}
- local infi_nr=${8:-0}
- local corrupted_pkts=${9:-0}
+ local csum_ns1=${join_csum_ns1:-0}
+ local csum_ns2=${join_csum_ns2:-0}
+ local fail_nr=${join_fail_nr:-0}
+ local rst_nr=${join_rst_nr:-0}
+ local infi_nr=${join_infi_nr:-0}
+ local corrupted_pkts=${join_corrupted_pkts:-0}
+ local rc=${KSFT_PASS}
local count
local with_cookie
@@ -1326,43 +1409,44 @@ chk_join_nr()
print_info "${corrupted_pkts} corrupted pkts"
fi
- print_check "syn"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx")
if [ -z "$count" ]; then
- print_skip
+ rc=${KSFT_SKIP}
elif [ "$count" != "$syn_nr" ]; then
- fail_test "got $count JOIN[s] syn expected $syn_nr"
- else
- print_ok
+ rc=${KSFT_FAIL}
+ print_check "syn rx"
+ fail_test "got $count JOIN[s] syn rx expected $syn_nr"
fi
- print_check "synack"
with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies)
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx")
if [ -z "$count" ]; then
- print_skip
+ rc=${KSFT_SKIP}
elif [ "$count" != "$syn_ack_nr" ]; then
# simult connections exceeding the limit with cookie enabled could go up to
# synack validation as the conn limit can be enforced reliably only after
# the subflow creation
- if [ "$with_cookie" = 2 ] && [ "$count" -gt "$syn_ack_nr" ] && [ "$count" -le "$syn_nr" ]; then
- print_ok
- else
- fail_test "got $count JOIN[s] synack expected $syn_ack_nr"
+ if [ "$with_cookie" != 2 ] || [ "$count" -le "$syn_ack_nr" ] || [ "$count" -gt "$syn_nr" ]; then
+ rc=${KSFT_FAIL}
+ print_check "synack rx"
+ fail_test "got $count JOIN[s] synack rx expected $syn_ack_nr"
fi
- else
- print_ok
fi
- print_check "ack"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
if [ -z "$count" ]; then
- print_skip
+ rc=${KSFT_SKIP}
elif [ "$count" != "$ack_nr" ]; then
- fail_test "got $count JOIN[s] ack expected $ack_nr"
- else
- print_ok
+ rc=${KSFT_FAIL}
+ print_check "ack rx"
+ fail_test "got $count JOIN[s] ack rx expected $ack_nr"
fi
+
+ print_results "join Rx" ${rc}
+
+ join_syn_tx="${join_syn_tx:-${syn_nr}}" \
+ chk_join_tx_nr
+
if $validate_checksum; then
chk_csum_nr $csum_ns1 $csum_ns2
chk_fail_nr $fail_nr $fail_nr
@@ -1423,19 +1507,21 @@ chk_add_nr()
local mis_ack_nr=0
local ns_tx=$ns1
local ns_rx=$ns2
- local extra_msg=""
+ local tx=""
+ local rx=""
local count
local timeout
if [[ $ns_invert = "invert" ]]; then
ns_tx=$ns2
ns_rx=$ns1
- extra_msg="invert"
+ tx=" client"
+ rx=" server"
fi
timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout)
- print_check "add"
+ print_check "add addr rx${rx}"
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr")
if [ -z "$count" ]; then
print_skip
@@ -1447,7 +1533,7 @@ chk_add_nr()
print_ok
fi
- print_check "echo"
+ print_check "add addr echo rx${tx}"
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtEchoAdd")
if [ -z "$count" ]; then
print_skip
@@ -1458,7 +1544,7 @@ chk_add_nr()
fi
if [ $port_nr -gt 0 ]; then
- print_check "pt"
+ print_check "add addr rx with port${rx}"
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtPortAdd")
if [ -z "$count" ]; then
print_skip
@@ -1468,7 +1554,7 @@ chk_add_nr()
print_ok
fi
- print_check "syn"
+ print_check "syn rx port${tx}"
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortSynRx")
if [ -z "$count" ]; then
print_skip
@@ -1479,7 +1565,7 @@ chk_add_nr()
print_ok
fi
- print_check "synack"
+ print_check "synack rx port${rx}"
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPJoinPortSynAckRx")
if [ -z "$count" ]; then
print_skip
@@ -1490,7 +1576,7 @@ chk_add_nr()
print_ok
fi
- print_check "ack"
+ print_check "ack rx port${tx}"
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortAckRx")
if [ -z "$count" ]; then
print_skip
@@ -1501,7 +1587,7 @@ chk_add_nr()
print_ok
fi
- print_check "syn"
+ print_check "syn rx port mismatch${tx}"
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortSynRx")
if [ -z "$count" ]; then
print_skip
@@ -1512,7 +1598,7 @@ chk_add_nr()
print_ok
fi
- print_check "ack"
+ print_check "ack rx port mismatch${tx}"
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortAckRx")
if [ -z "$count" ]; then
print_skip
@@ -1523,8 +1609,6 @@ chk_add_nr()
print_ok
fi
fi
-
- print_info "$extra_msg"
}
chk_add_tx_nr()
@@ -1536,7 +1620,7 @@ chk_add_tx_nr()
timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
- print_check "add TX"
+ print_check "add addr tx"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx")
if [ -z "$count" ]; then
print_skip
@@ -1548,7 +1632,7 @@ chk_add_tx_nr()
print_ok
fi
- print_check "echo TX"
+ print_check "add addr echo tx"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx")
if [ -z "$count" ]; then
print_skip
@@ -1568,6 +1652,8 @@ chk_rm_nr()
local count
local addr_ns=$ns1
local subflow_ns=$ns2
+ local addr="server"
+ local subflow="client"
local extra_msg=""
shift 2
@@ -1577,16 +1663,14 @@ chk_rm_nr()
shift
done
- if [ -z $invert ]; then
- addr_ns=$ns1
- subflow_ns=$ns2
- elif [ $invert = "true" ]; then
+ if [ "$invert" = "true" ]; then
addr_ns=$ns2
subflow_ns=$ns1
- extra_msg="invert"
+ addr="client"
+ subflow="server"
fi
- print_check "rm"
+ print_check "rm addr rx ${addr}"
count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr")
if [ -z "$count" ]; then
print_skip
@@ -1596,7 +1680,7 @@ chk_rm_nr()
print_ok
fi
- print_check "rmsf"
+ print_check "rm subflow ${subflow}"
count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow")
if [ -z "$count" ]; then
print_skip
@@ -1610,7 +1694,7 @@ chk_rm_nr()
count=$((count + cnt))
if [ "$count" != "$rm_subflow_nr" ]; then
suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]"
- extra_msg+=" simult"
+ extra_msg="simult"
fi
if [ $count -ge "$rm_subflow_nr" ] && \
[ "$count" -le "$((rm_subflow_nr *2 ))" ]; then
@@ -1631,7 +1715,7 @@ chk_rm_tx_nr()
{
local rm_addr_tx_nr=$1
- print_check "rm TX"
+ print_check "rm addr tx client"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx")
if [ -z "$count" ]; then
print_skip
@@ -1650,7 +1734,7 @@ chk_prio_nr()
local mpj_syn_ack=$4
local count
- print_check "ptx"
+ print_check "mp_prio tx server"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx")
if [ -z "$count" ]; then
print_skip
@@ -1660,7 +1744,7 @@ chk_prio_nr()
print_ok
fi
- print_check "prx"
+ print_check "mp_prio rx client"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx")
if [ -z "$count" ]; then
print_skip
@@ -1903,9 +1987,11 @@ subflows_error_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.12.2 flags subflow
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
+ join_bind_err=1 \
+ chk_join_nr 0 0 0
fi
# multiple subflows, with subflow creation error
@@ -1917,7 +2003,8 @@ subflows_error_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1
+ join_syn_tx=2 \
+ chk_join_nr 1 1 1
fi
# multiple subflows, with subflow timeout on MPJ
@@ -1929,7 +2016,8 @@ subflows_error_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1
+ join_syn_tx=2 \
+ chk_join_nr 1 1 1
fi
# multiple subflows, check that the endpoint corresponding to
@@ -1950,7 +2038,8 @@ subflows_error_tests()
# additional subflow could be created only if the PM select
# the later endpoint, skipping the already used one
- chk_join_nr 1 1 1
+ join_syn_tx=2 \
+ chk_join_nr 1 1 1
fi
}
@@ -2036,7 +2125,8 @@ signal_address_tests()
pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
pm_nl_set_limits $ns2 3 3
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1
+ join_syn_tx=3 \
+ chk_join_nr 1 1 1
chk_add_nr 3 3
fi
@@ -2204,7 +2294,8 @@ add_addr_timeout_tests()
pm_nl_set_limits $ns2 2 2
speed=10 \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1
+ join_syn_tx=2 \
+ chk_join_nr 1 1 1
chk_add_nr 8 0
fi
}
@@ -2304,7 +2395,8 @@ remove_tests()
pm_nl_set_limits $ns2 2 2
addr_nr_ns1=-3 speed=10 \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1
+ join_syn_tx=2 join_connect_err=1 \
+ chk_join_nr 1 1 1
chk_add_nr 3 3
chk_rm_nr 3 1 invert
chk_rst_nr 0 0
@@ -2369,7 +2461,8 @@ remove_tests()
pm_nl_set_limits $ns2 3 3
addr_nr_ns1=-8 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1
+ join_syn_tx=3 \
+ chk_join_nr 1 1 1
chk_add_nr 3 3
chk_rm_nr 3 1 invert
chk_rst_nr 0 0
@@ -2939,37 +3032,16 @@ syncookies_tests()
checksum_tests()
{
- # checksum test 0 0
- if reset_with_checksum 0 0; then
- pm_nl_set_limits $ns1 0 1
- pm_nl_set_limits $ns2 0 1
- run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
- fi
-
- # checksum test 1 1
- if reset_with_checksum 1 1; then
- pm_nl_set_limits $ns1 0 1
- pm_nl_set_limits $ns2 0 1
- run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
- fi
-
- # checksum test 0 1
- if reset_with_checksum 0 1; then
- pm_nl_set_limits $ns1 0 1
- pm_nl_set_limits $ns2 0 1
- run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
- fi
-
- # checksum test 1 0
- if reset_with_checksum 1 0; then
- pm_nl_set_limits $ns1 0 1
- pm_nl_set_limits $ns2 0 1
- run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
- fi
+ local checksum_enable
+ for checksum_enable in "0 0" "1 1" "0 1" "1 0"; do
+ # checksum test 0 0, 1 1, 0 1, 1 0
+ if reset_with_checksum ${checksum_enable}; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ fi
+ done
}
deny_join_id0_tests()
@@ -3058,6 +3130,9 @@ fullmesh_tests()
pm_nl_set_limits $ns1 1 3
pm_nl_set_limits $ns2 1 3
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh
+ fi
fullmesh=1 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
@@ -3157,7 +3232,8 @@ fastclose_tests()
MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=1024 fastclose=server \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0 0 0 0 1
+ join_rst_nr=1 \
+ chk_join_nr 0 0 0
chk_fclose_nr 1 1 invert
chk_rst_nr 1 1
fi
@@ -3176,7 +3252,10 @@ fail_tests()
MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=128 \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
+ join_csum_ns1=+1 join_csum_ns2=+0 \
+ join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \
+ join_corrupted_pkts="$(pedit_action_pkts)" \
+ chk_join_nr 0 0 0
chk_fail_nr 1 -1 invert
fi
@@ -3189,7 +3268,10 @@ fail_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
test_linkfail=1024 \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1 1 0 1 1 0 "$(pedit_action_pkts)"
+ join_csum_ns1=1 join_csum_ns2=0 \
+ join_fail_nr=1 join_rst_nr=1 join_infi_nr=0 \
+ join_corrupted_pkts="$(pedit_action_pkts)" \
+ chk_join_nr 1 1 1
fi
}
@@ -3331,6 +3413,36 @@ userspace_pm_chk_get_addr()
fi
}
+# $1: ns ; $2: event type ; $3: count
+chk_evt_nr()
+{
+ local ns=${1}
+ local evt_name="${2}"
+ local exp="${3}"
+
+ local evts="${evts_ns1}"
+ local evt="${!evt_name}"
+ local count
+
+ evt_name="${evt_name:16}" # without MPTCP_LIB_EVENT_
+ [ "${ns}" == "ns2" ] && evts="${evts_ns2}"
+
+ print_check "event ${ns} ${evt_name} (${exp})"
+
+ if [[ "${evt_name}" = "LISTENER_"* ]] &&
+ ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then
+ print_skip "event not supported"
+ return
+ fi
+
+ count=$(grep -cw "type:${evt}" "${evts}")
+ if [ "${count}" != "${exp}" ]; then
+ fail_test "got ${count} events, expected ${exp}"
+ else
+ print_ok
+ fi
+}
+
userspace_tests()
{
# userspace pm type prevents add_addr
@@ -3411,8 +3523,8 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
pm_nl_set_limits $ns2 2 2
- speed=5 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns1
userspace_pm_add_addr $ns1 10.0.2.1 10
@@ -3427,14 +3539,12 @@ userspace_tests()
"signal"
userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1"
userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1"
- userspace_pm_rm_addr $ns1 10
userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $MPTCP_LIB_EVENT_SUB_ESTABLISHED
userspace_pm_chk_dump_addr "${ns1}" \
- "id 20 flags signal 10.0.3.1" "after rm_addr 10"
+ "id 20 flags signal 10.0.3.1" "after rm_sf 10"
userspace_pm_rm_addr $ns1 20
- userspace_pm_rm_sf $ns1 10.0.3.1 $MPTCP_LIB_EVENT_SUB_ESTABLISHED
userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20"
- chk_rm_nr 2 2 invert
+ chk_rm_nr 1 1 invert
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
kill_events_pids
@@ -3446,8 +3556,8 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
- speed=5 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
userspace_pm_add_sf $ns2 10.0.3.2 20
@@ -3458,12 +3568,11 @@ userspace_tests()
"id 20 flags subflow 10.0.3.2" \
"subflow"
userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2"
- userspace_pm_rm_addr $ns2 20
userspace_pm_rm_sf $ns2 10.0.3.2 $MPTCP_LIB_EVENT_SUB_ESTABLISHED
userspace_pm_chk_dump_addr "${ns2}" \
"" \
- "after rm_addr 20"
- chk_rm_nr 1 1
+ "after rm_sf 20"
+ chk_rm_nr 0 1
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
kill_events_pids
@@ -3475,8 +3584,8 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
- speed=5 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
chk_mptcp_info subflows 0 subflows 0
@@ -3496,8 +3605,8 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
- speed=5 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
userspace_pm_add_sf $ns2 10.0.3.2 20
@@ -3520,8 +3629,8 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
pm_nl_set_limits $ns2 1 1
- speed=5 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns1
userspace_pm_add_addr $ns1 10.0.2.1 10
@@ -3551,8 +3660,8 @@ endpoint_tests()
pm_nl_set_limits $ns1 2 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- speed=slow \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns1
@@ -3571,41 +3680,92 @@ endpoint_tests()
mptcp_lib_kill_wait $tests_pid
fi
- if reset "delete and re-add" &&
+ if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT &&
mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
- pm_nl_set_limits $ns1 1 1
- pm_nl_set_limits $ns2 1 1
+ start_events
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_set_limits $ns2 0 3
+ pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
- test_linkfail=4 speed=20 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { test_linkfail=4 speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
pm_nl_check_endpoint "creation" \
$ns2 10.0.2.2 id 2 flags subflow dev ns2eth2
- chk_subflow_nr "before delete" 2
+ chk_subflow_nr "before delete id 2" 2
chk_mptcp_info subflows 1 subflows 1
pm_nl_del_endpoint $ns2 2 10.0.2.2
sleep 0.5
- chk_subflow_nr "after delete" 1
+ chk_subflow_nr "after delete id 2" 1
chk_mptcp_info subflows 0 subflows 0
- pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
wait_mpj $ns2
- chk_subflow_nr "after re-add" 2
+ chk_subflow_nr "after re-add id 2" 2
chk_mptcp_info subflows 1 subflows 1
+
+ pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
+ wait_attempt_fail $ns2
+ chk_subflow_nr "after new reject" 2
+ chk_mptcp_info subflows 1 subflows 1
+
+ ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT
+ pm_nl_del_endpoint $ns2 3 10.0.3.2
+ pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
+ wait_mpj $ns2
+ chk_subflow_nr "after no reject" 3
+ chk_mptcp_info subflows 2 subflows 2
+
+ local i
+ for i in $(seq 3); do
+ pm_nl_del_endpoint $ns2 1 10.0.1.2
+ sleep 0.5
+ chk_subflow_nr "after delete id 0 ($i)" 2
+ chk_mptcp_info subflows 2 subflows 2 # only decr for additional sf
+
+ pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow
+ wait_mpj $ns2
+ chk_subflow_nr "after re-add id 0 ($i)" 3
+ chk_mptcp_info subflows 3 subflows 3
+ done
+
mptcp_lib_kill_wait $tests_pid
+
+ kill_events_pids
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_CREATED 1
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 4
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 6
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 4
+
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 0
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 0
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 6
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 5 # one has been closed before estab
+
+ join_syn_tx=7 \
+ chk_join_nr 6 6 6
+ chk_rm_nr 4 4
fi
# remove and re-add
- if reset "delete re-add signal" &&
+ if reset_with_events "delete re-add signal" &&
mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
- pm_nl_set_limits $ns1 1 1
- pm_nl_set_limits $ns2 1 1
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_set_limits $ns2 3 3
pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
- test_linkfail=4 speed=20 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ # broadcast IP: no packet for this address will be received on ns1
+ pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
+ pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal
+ { test_linkfail=4 speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
@@ -3615,17 +3775,91 @@ endpoint_tests()
chk_mptcp_info subflows 1 subflows 1
pm_nl_del_endpoint $ns1 1 10.0.2.1
+ pm_nl_del_endpoint $ns1 2 224.0.0.1
sleep 0.5
chk_subflow_nr "after delete" 1
chk_mptcp_info subflows 0 subflows 0
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
+ wait_mpj $ns2
+ chk_subflow_nr "after re-add" 3
+ chk_mptcp_info subflows 2 subflows 2
+
+ pm_nl_del_endpoint $ns1 42 10.0.1.1
+ sleep 0.5
+ chk_subflow_nr "after delete ID 0" 2
+ chk_mptcp_info subflows 2 subflows 2
+
+ pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal
+ wait_mpj $ns2
+ chk_subflow_nr "after re-add ID 0" 3
+ chk_mptcp_info subflows 3 subflows 3
+
+ pm_nl_del_endpoint $ns1 99 10.0.1.1
+ sleep 0.5
+ chk_subflow_nr "after re-delete ID 0" 2
+ chk_mptcp_info subflows 2 subflows 2
+
+ pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal
+ wait_mpj $ns2
+ chk_subflow_nr "after re-re-add ID 0" 3
+ chk_mptcp_info subflows 3 subflows 3
+ mptcp_lib_kill_wait $tests_pid
+
+ kill_events_pids
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_CREATED 1
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 0
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 5
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 3
+
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 6
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 4
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 5
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 3
+
+ join_connect_err=1 \
+ chk_join_nr 5 5 5
+ chk_add_nr 6 6
+ chk_rm_nr 4 3 invert
+ fi
+
+ # flush and re-add
+ if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT &&
+ mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 1 2
+ # broadcast IP: no packet for this address will be received on ns1
+ pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
+ pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
+ { test_linkfail=4 speed=20 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
+ local tests_pid=$!
+
+ wait_attempt_fail $ns2
+ chk_subflow_nr "before flush" 1
+ chk_mptcp_info subflows 0 subflows 0
+
+ pm_nl_flush_endpoint $ns2
+ pm_nl_flush_endpoint $ns1
+ wait_rm_addr $ns2 0
+ ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT
+ pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
+ wait_mpj $ns2
+ pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
wait_mpj $ns2
- chk_subflow_nr "after re-add" 2
- chk_mptcp_info subflows 1 subflows 1
mptcp_lib_kill_wait $tests_pid
- fi
+ join_syn_tx=3 join_connect_err=1 \
+ chk_join_nr 2 2 2
+ chk_add_nr 2 2
+ chk_rm_nr 1 0 invert
+ fi
}
# [$1: error message]
@@ -3727,9 +3961,11 @@ if [ ${#tests[@]} -eq 0 ]; then
tests=("${all_tests_names[@]}")
fi
+mptcp_lib_subtests_last_ts_reset
for subtests in "${tests[@]}"; do
"${subtests}"
done
+append_prev_results
if [ ${ret} -ne 0 ]; then
echo
@@ -3740,7 +3976,6 @@ if [ ${ret} -ne 0 ]; then
echo
fi
-append_prev_results
mptcp_lib_result_print_all_tap
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 438280e68434..975d4d4c862a 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -12,10 +12,14 @@ readonly KSFT_SKIP=4
readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}"
# These variables are used in some selftests, read-only
+declare -rx MPTCP_LIB_EVENT_CREATED=1 # MPTCP_EVENT_CREATED
+declare -rx MPTCP_LIB_EVENT_ESTABLISHED=2 # MPTCP_EVENT_ESTABLISHED
+declare -rx MPTCP_LIB_EVENT_CLOSED=3 # MPTCP_EVENT_CLOSED
declare -rx MPTCP_LIB_EVENT_ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED
declare -rx MPTCP_LIB_EVENT_REMOVED=7 # MPTCP_EVENT_REMOVED
declare -rx MPTCP_LIB_EVENT_SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED
declare -rx MPTCP_LIB_EVENT_SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED
+declare -rx MPTCP_LIB_EVENT_SUB_PRIORITY=13 # MPTCP_EVENT_SUB_PRIORITY
declare -rx MPTCP_LIB_EVENT_LISTENER_CREATED=15 # MPTCP_EVENT_LISTENER_CREATED
declare -rx MPTCP_LIB_EVENT_LISTENER_CLOSED=16 # MPTCP_EVENT_LISTENER_CLOSED
@@ -25,6 +29,7 @@ declare -rx MPTCP_LIB_AF_INET6=10
MPTCP_LIB_SUBTESTS=()
MPTCP_LIB_SUBTESTS_DUPLICATED=0
MPTCP_LIB_SUBTEST_FLAKY=0
+MPTCP_LIB_SUBTESTS_LAST_TS_MS=
MPTCP_LIB_TEST_COUNTER=0
MPTCP_LIB_TEST_FORMAT="%02u %-50s"
MPTCP_LIB_IP_MPTCP=0
@@ -201,6 +206,11 @@ mptcp_lib_kversion_ge() {
mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}"
}
+mptcp_lib_subtests_last_ts_reset() {
+ MPTCP_LIB_SUBTESTS_LAST_TS_MS="$(date +%s%3N)"
+}
+mptcp_lib_subtests_last_ts_reset
+
__mptcp_lib_result_check_duplicated() {
local subtest
@@ -215,13 +225,22 @@ __mptcp_lib_result_check_duplicated() {
__mptcp_lib_result_add() {
local result="${1}"
+ local time="time="
+ local ts_prev_ms
shift
local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1))
__mptcp_lib_result_check_duplicated "${*}"
- MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}")
+ # not to add two '#'
+ [[ "${*}" != *"#"* ]] && time="# ${time}"
+
+ ts_prev_ms="${MPTCP_LIB_SUBTESTS_LAST_TS_MS}"
+ mptcp_lib_subtests_last_ts_reset
+ time+="$((MPTCP_LIB_SUBTESTS_LAST_TS_MS - ts_prev_ms))ms"
+
+ MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*} ${time}")
}
# $1: test name
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 68899a303a1a..5e8d5b83e2d0 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -349,6 +349,7 @@ init
make_file "$cin" "client" 1
make_file "$sin" "server" 1
trap cleanup EXIT
+mptcp_lib_subtests_last_ts_reset
run_tests $ns1 $ns2 10.0.1.1
run_tests $ns1 $ns2 dead:beef:1::1
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 2757378b1b13..2e6648a2b2c0 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -137,6 +137,8 @@ check()
fi
}
+mptcp_lib_subtests_last_ts_reset
+
check "show_endpoints" "" "defaults addr list"
default_limits="$(get_limits)"
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 7ad5a59adff2..994a556f46c1 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -19,12 +19,6 @@
#include "linux/mptcp.h"
-#ifndef MPTCP_PM_NAME
-#define MPTCP_PM_NAME "mptcp_pm"
-#endif
-#ifndef MPTCP_PM_EVENTS
-#define MPTCP_PM_EVENTS "mptcp_pm_events"
-#endif
#ifndef IPPROTO_MPTCP
#define IPPROTO_MPTCP 262
#endif
@@ -116,7 +110,7 @@ static int capture_events(int fd, int event_group)
if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP,
&event_group, sizeof(event_group)) < 0)
- error(1, errno, "could not join the " MPTCP_PM_EVENTS " mcast group");
+ error(1, errno, "could not join the " MPTCP_PM_EV_GRP_NAME " mcast group");
do {
FD_ZERO(&rfds);
@@ -288,7 +282,7 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh, int *pm_family,
if (grp->rta_type == CTRL_ATTR_MCAST_GRP_ID)
*events_mcast_grp = *(__u32 *)RTA_DATA(grp);
else if (grp->rta_type == CTRL_ATTR_MCAST_GRP_NAME &&
- !strcmp(RTA_DATA(grp), MPTCP_PM_EVENTS))
+ !strcmp(RTA_DATA(grp), MPTCP_PM_EV_GRP_NAME))
got_events_grp = 1;
grp = RTA_NEXT(grp, grp_len);
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index f74e1c3c126d..8fa77c8e9b65 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -286,6 +286,7 @@ while getopts "bcdhi" option;do
done
setup
+mptcp_lib_subtests_last_ts_reset
run_test 10 10 0 0 "balanced bwidth"
run_test 10 10 1 25 "balanced bwidth with unbalanced delay"
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 9cb05978269d..3651f73451cf 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -150,6 +150,7 @@ mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid
server_evts=$(mktemp)
mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid
sleep 0.5
+mptcp_lib_subtests_last_ts_reset
print_title "Init"
print_test "Created network namespaces ns1, ns2"
diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c
new file mode 100644
index 000000000000..64d6805381c5
--- /dev/null
+++ b/tools/testing/selftests/net/ncdevmem.c
@@ -0,0 +1,570 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <linux/uio.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#define __iovec_defined
+#include <fcntl.h>
+#include <malloc.h>
+#include <error.h>
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+
+#include <linux/memfd.h>
+#include <linux/dma-buf.h>
+#include <linux/udmabuf.h>
+#include <libmnl/libmnl.h>
+#include <linux/types.h>
+#include <linux/netlink.h>
+#include <linux/genetlink.h>
+#include <linux/netdev.h>
+#include <time.h>
+#include <net/if.h>
+
+#include "netdev-user.h"
+#include <ynl.h>
+
+#define PAGE_SHIFT 12
+#define TEST_PREFIX "ncdevmem"
+#define NUM_PAGES 16000
+
+#ifndef MSG_SOCK_DEVMEM
+#define MSG_SOCK_DEVMEM 0x2000000
+#endif
+
+/*
+ * tcpdevmem netcat. Works similarly to netcat but does device memory TCP
+ * instead of regular TCP. Uses udmabuf to mock a dmabuf provider.
+ *
+ * Usage:
+ *
+ * On server:
+ * ncdevmem -s <server IP> -c <client IP> -f eth1 -l -p 5201 -v 7
+ *
+ * On client:
+ * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \
+ * tr \\n \\0 | \
+ * head -c 5G | \
+ * nc <server IP> 5201 -p 5201
+ *
+ * Note this is compatible with regular netcat. i.e. the sender or receiver can
+ * be replaced with regular netcat to test the RX or TX path in isolation.
+ */
+
+static char *server_ip = "192.168.1.4";
+static char *client_ip = "192.168.1.2";
+static char *port = "5201";
+static size_t do_validation;
+static int start_queue = 8;
+static int num_queues = 8;
+static char *ifname = "eth1";
+static unsigned int ifindex;
+static unsigned int dmabuf_id;
+
+void print_bytes(void *ptr, size_t size)
+{
+ unsigned char *p = ptr;
+ int i;
+
+ for (i = 0; i < size; i++)
+ printf("%02hhX ", p[i]);
+ printf("\n");
+}
+
+void print_nonzero_bytes(void *ptr, size_t size)
+{
+ unsigned char *p = ptr;
+ unsigned int i;
+
+ for (i = 0; i < size; i++)
+ putchar(p[i]);
+ printf("\n");
+}
+
+void validate_buffer(void *line, size_t size)
+{
+ static unsigned char seed = 1;
+ unsigned char *ptr = line;
+ int errors = 0;
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ if (ptr[i] != seed) {
+ fprintf(stderr,
+ "Failed validation: expected=%u, actual=%u, index=%lu\n",
+ seed, ptr[i], i);
+ errors++;
+ if (errors > 20)
+ error(1, 0, "validation failed.");
+ }
+ seed++;
+ if (seed == do_validation)
+ seed = 0;
+ }
+
+ fprintf(stdout, "Validated buffer\n");
+}
+
+#define run_command(cmd, ...) \
+ ({ \
+ char command[256]; \
+ memset(command, 0, sizeof(command)); \
+ snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \
+ printf("Running: %s\n", command); \
+ system(command); \
+ })
+
+static int reset_flow_steering(void)
+{
+ int ret = 0;
+
+ ret = run_command("sudo ethtool -K %s ntuple off", ifname);
+ if (ret)
+ return ret;
+
+ return run_command("sudo ethtool -K %s ntuple on", ifname);
+}
+
+static int configure_headersplit(bool on)
+{
+ return run_command("sudo ethtool -G %s tcp-data-split %s", ifname,
+ on ? "on" : "off");
+}
+
+static int configure_rss(void)
+{
+ return run_command("sudo ethtool -X %s equal %d", ifname, start_queue);
+}
+
+static int configure_channels(unsigned int rx, unsigned int tx)
+{
+ return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx);
+}
+
+static int configure_flow_steering(void)
+{
+ return run_command("sudo ethtool -N %s flow-type tcp4 src-ip %s dst-ip %s src-port %s dst-port %s queue %d",
+ ifname, client_ip, server_ip, port, port, start_queue);
+}
+
+static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
+ struct netdev_queue_id *queues,
+ unsigned int n_queue_index, struct ynl_sock **ys)
+{
+ struct netdev_bind_rx_req *req = NULL;
+ struct netdev_bind_rx_rsp *rsp = NULL;
+ struct ynl_error yerr;
+
+ *ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+ if (!*ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return -1;
+ }
+
+ req = netdev_bind_rx_req_alloc();
+ netdev_bind_rx_req_set_ifindex(req, ifindex);
+ netdev_bind_rx_req_set_fd(req, dmabuf_fd);
+ __netdev_bind_rx_req_set_queues(req, queues, n_queue_index);
+
+ rsp = netdev_bind_rx(*ys, req);
+ if (!rsp) {
+ perror("netdev_bind_rx");
+ goto err_close;
+ }
+
+ if (!rsp->_present.id) {
+ perror("id not present");
+ goto err_close;
+ }
+
+ printf("got dmabuf id=%d\n", rsp->id);
+ dmabuf_id = rsp->id;
+
+ netdev_bind_rx_req_free(req);
+ netdev_bind_rx_rsp_free(rsp);
+
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
+ netdev_bind_rx_req_free(req);
+ ynl_sock_destroy(*ys);
+ return -1;
+}
+
+static void create_udmabuf(int *devfd, int *memfd, int *buf, size_t dmabuf_size)
+{
+ struct udmabuf_create create;
+ int ret;
+
+ *devfd = open("/dev/udmabuf", O_RDWR);
+ if (*devfd < 0) {
+ error(70, 0,
+ "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n",
+ TEST_PREFIX);
+ }
+
+ *memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING);
+ if (*memfd < 0)
+ error(70, 0, "%s: [skip,no-memfd]\n", TEST_PREFIX);
+
+ /* Required for udmabuf */
+ ret = fcntl(*memfd, F_ADD_SEALS, F_SEAL_SHRINK);
+ if (ret < 0)
+ error(73, 0, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX);
+
+ ret = ftruncate(*memfd, dmabuf_size);
+ if (ret == -1)
+ error(74, 0, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
+
+ memset(&create, 0, sizeof(create));
+
+ create.memfd = *memfd;
+ create.offset = 0;
+ create.size = dmabuf_size;
+ *buf = ioctl(*devfd, UDMABUF_CREATE, &create);
+ if (*buf < 0)
+ error(75, 0, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX);
+}
+
+int do_server(void)
+{
+ char ctrl_data[sizeof(int) * 20000];
+ struct netdev_queue_id *queues;
+ size_t non_page_aligned_frags = 0;
+ struct sockaddr_in client_addr;
+ struct sockaddr_in server_sin;
+ size_t page_aligned_frags = 0;
+ int devfd, memfd, buf, ret;
+ size_t total_received = 0;
+ socklen_t client_addr_len;
+ bool is_devmem = false;
+ char *buf_mem = NULL;
+ struct ynl_sock *ys;
+ size_t dmabuf_size;
+ char iobuf[819200];
+ char buffer[256];
+ int socket_fd;
+ int client_fd;
+ size_t i = 0;
+ int opt = 1;
+
+ dmabuf_size = getpagesize() * NUM_PAGES;
+
+ create_udmabuf(&devfd, &memfd, &buf, dmabuf_size);
+
+ if (reset_flow_steering())
+ error(1, 0, "Failed to reset flow steering\n");
+
+ /* Configure RSS to divert all traffic from our devmem queues */
+ if (configure_rss())
+ error(1, 0, "Failed to configure rss\n");
+
+ /* Flow steer our devmem flows to start_queue */
+ if (configure_flow_steering())
+ error(1, 0, "Failed to configure flow steering\n");
+
+ sleep(1);
+
+ queues = malloc(sizeof(*queues) * num_queues);
+
+ for (i = 0; i < num_queues; i++) {
+ queues[i]._present.type = 1;
+ queues[i]._present.id = 1;
+ queues[i].type = NETDEV_QUEUE_TYPE_RX;
+ queues[i].id = start_queue + i;
+ }
+
+ if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys))
+ error(1, 0, "Failed to bind\n");
+
+ buf_mem = mmap(NULL, dmabuf_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ buf, 0);
+ if (buf_mem == MAP_FAILED)
+ error(1, 0, "mmap()");
+
+ server_sin.sin_family = AF_INET;
+ server_sin.sin_port = htons(atoi(port));
+
+ ret = inet_pton(server_sin.sin_family, server_ip, &server_sin.sin_addr);
+ if (socket < 0)
+ error(79, 0, "%s: [FAIL, create socket]\n", TEST_PREFIX);
+
+ socket_fd = socket(server_sin.sin_family, SOCK_STREAM, 0);
+ if (socket < 0)
+ error(errno, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX);
+
+ ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt));
+ if (ret)
+ error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX);
+
+ ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &opt,
+ sizeof(opt));
+ if (ret)
+ error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX);
+
+ printf("binding to address %s:%d\n", server_ip,
+ ntohs(server_sin.sin_port));
+
+ ret = bind(socket_fd, &server_sin, sizeof(server_sin));
+ if (ret)
+ error(errno, errno, "%s: [FAIL, bind]\n", TEST_PREFIX);
+
+ ret = listen(socket_fd, 1);
+ if (ret)
+ error(errno, errno, "%s: [FAIL, listen]\n", TEST_PREFIX);
+
+ client_addr_len = sizeof(client_addr);
+
+ inet_ntop(server_sin.sin_family, &server_sin.sin_addr, buffer,
+ sizeof(buffer));
+ printf("Waiting or connection on %s:%d\n", buffer,
+ ntohs(server_sin.sin_port));
+ client_fd = accept(socket_fd, &client_addr, &client_addr_len);
+
+ inet_ntop(client_addr.sin_family, &client_addr.sin_addr, buffer,
+ sizeof(buffer));
+ printf("Got connection from %s:%d\n", buffer,
+ ntohs(client_addr.sin_port));
+
+ while (1) {
+ struct iovec iov = { .iov_base = iobuf,
+ .iov_len = sizeof(iobuf) };
+ struct dmabuf_cmsg *dmabuf_cmsg = NULL;
+ struct dma_buf_sync sync = { 0 };
+ struct cmsghdr *cm = NULL;
+ struct msghdr msg = { 0 };
+ struct dmabuf_token token;
+ ssize_t ret;
+
+ is_devmem = false;
+ printf("\n\n");
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = ctrl_data;
+ msg.msg_controllen = sizeof(ctrl_data);
+ ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM);
+ printf("recvmsg ret=%ld\n", ret);
+ if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
+ continue;
+ if (ret < 0) {
+ perror("recvmsg");
+ continue;
+ }
+ if (ret == 0) {
+ printf("client exited\n");
+ goto cleanup;
+ }
+
+ i++;
+ for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+ if (cm->cmsg_level != SOL_SOCKET ||
+ (cm->cmsg_type != SCM_DEVMEM_DMABUF &&
+ cm->cmsg_type != SCM_DEVMEM_LINEAR)) {
+ fprintf(stdout, "skipping non-devmem cmsg\n");
+ continue;
+ }
+
+ dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm);
+ is_devmem = true;
+
+ if (cm->cmsg_type == SCM_DEVMEM_LINEAR) {
+ /* TODO: process data copied from skb's linear
+ * buffer.
+ */
+ fprintf(stdout,
+ "SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n",
+ dmabuf_cmsg->frag_size);
+
+ continue;
+ }
+
+ token.token_start = dmabuf_cmsg->frag_token;
+ token.token_count = 1;
+
+ total_received += dmabuf_cmsg->frag_size;
+ printf("received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n",
+ dmabuf_cmsg->frag_offset >> PAGE_SHIFT,
+ dmabuf_cmsg->frag_offset % getpagesize(),
+ dmabuf_cmsg->frag_offset, dmabuf_cmsg->frag_size,
+ dmabuf_cmsg->frag_token, total_received,
+ dmabuf_cmsg->dmabuf_id);
+
+ if (dmabuf_cmsg->dmabuf_id != dmabuf_id)
+ error(1, 0,
+ "received on wrong dmabuf_id: flow steering error\n");
+
+ if (dmabuf_cmsg->frag_size % getpagesize())
+ non_page_aligned_frags++;
+ else
+ page_aligned_frags++;
+
+ sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_START;
+ ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync);
+
+ if (do_validation)
+ validate_buffer(
+ ((unsigned char *)buf_mem) +
+ dmabuf_cmsg->frag_offset,
+ dmabuf_cmsg->frag_size);
+ else
+ print_nonzero_bytes(
+ ((unsigned char *)buf_mem) +
+ dmabuf_cmsg->frag_offset,
+ dmabuf_cmsg->frag_size);
+
+ sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_END;
+ ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync);
+
+ ret = setsockopt(client_fd, SOL_SOCKET,
+ SO_DEVMEM_DONTNEED, &token,
+ sizeof(token));
+ if (ret != 1)
+ error(1, 0,
+ "SO_DEVMEM_DONTNEED not enough tokens");
+ }
+ if (!is_devmem)
+ error(1, 0, "flow steering error\n");
+
+ printf("total_received=%lu\n", total_received);
+ }
+
+ fprintf(stdout, "%s: ok\n", TEST_PREFIX);
+
+ fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n",
+ page_aligned_frags, non_page_aligned_frags);
+
+ fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n",
+ page_aligned_frags, non_page_aligned_frags);
+
+cleanup:
+
+ munmap(buf_mem, dmabuf_size);
+ close(client_fd);
+ close(socket_fd);
+ close(buf);
+ close(memfd);
+ close(devfd);
+ ynl_sock_destroy(ys);
+
+ return 0;
+}
+
+void run_devmem_tests(void)
+{
+ struct netdev_queue_id *queues;
+ int devfd, memfd, buf;
+ struct ynl_sock *ys;
+ size_t dmabuf_size;
+ size_t i = 0;
+
+ dmabuf_size = getpagesize() * NUM_PAGES;
+
+ create_udmabuf(&devfd, &memfd, &buf, dmabuf_size);
+
+ /* Configure RSS to divert all traffic from our devmem queues */
+ if (configure_rss())
+ error(1, 0, "rss error\n");
+
+ queues = calloc(num_queues, sizeof(*queues));
+
+ if (configure_headersplit(1))
+ error(1, 0, "Failed to configure header split\n");
+
+ if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys))
+ error(1, 0, "Binding empty queues array should have failed\n");
+
+ for (i = 0; i < num_queues; i++) {
+ queues[i]._present.type = 1;
+ queues[i]._present.id = 1;
+ queues[i].type = NETDEV_QUEUE_TYPE_RX;
+ queues[i].id = start_queue + i;
+ }
+
+ if (configure_headersplit(0))
+ error(1, 0, "Failed to configure header split\n");
+
+ if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys))
+ error(1, 0, "Configure dmabuf with header split off should have failed\n");
+
+ if (configure_headersplit(1))
+ error(1, 0, "Failed to configure header split\n");
+
+ for (i = 0; i < num_queues; i++) {
+ queues[i]._present.type = 1;
+ queues[i]._present.id = 1;
+ queues[i].type = NETDEV_QUEUE_TYPE_RX;
+ queues[i].id = start_queue + i;
+ }
+
+ if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys))
+ error(1, 0, "Failed to bind\n");
+
+ /* Deactivating a bound queue should not be legal */
+ if (!configure_channels(num_queues, num_queues - 1))
+ error(1, 0, "Deactivating a bound queue should be illegal.\n");
+
+ /* Closing the netlink socket does an implicit unbind */
+ ynl_sock_destroy(ys);
+}
+
+int main(int argc, char *argv[])
+{
+ int is_server = 0, opt;
+
+ while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) {
+ switch (opt) {
+ case 'l':
+ is_server = 1;
+ break;
+ case 's':
+ server_ip = optarg;
+ break;
+ case 'c':
+ client_ip = optarg;
+ break;
+ case 'p':
+ port = optarg;
+ break;
+ case 'v':
+ do_validation = atoll(optarg);
+ break;
+ case 'q':
+ num_queues = atoi(optarg);
+ break;
+ case 't':
+ start_queue = atoi(optarg);
+ break;
+ case 'f':
+ ifname = optarg;
+ break;
+ case '?':
+ printf("unknown option: %c\n", optopt);
+ break;
+ }
+ }
+
+ ifindex = if_nametoindex(ifname);
+
+ for (; optind < argc; optind++)
+ printf("extra arguments: %s\n", argv[optind]);
+
+ run_devmem_tests();
+
+ if (is_server)
+ return do_server();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh
index e3afcb424710..438f7b2acc5f 100755
--- a/tools/testing/selftests/net/netdevice.sh
+++ b/tools/testing/selftests/net/netdevice.sh
@@ -67,8 +67,12 @@ kci_net_setup()
return $ksft_skip
fi
- # TODO what ipaddr to set ? DHCP ?
- echo "SKIP: $netdev: set IP address"
+ if [ "$veth_created" ]; then
+ echo "XFAIL: $netdev: set IP address unsupported for veth*"
+ else
+ # TODO what ipaddr to set ? DHCP ?
+ echo "SKIP: $netdev: set IP address"
+ fi
return $ksft_skip
}
@@ -86,7 +90,7 @@ kci_netdev_ethtool_test()
ret=$?
if [ $ret -ne 0 ];then
if [ $ret -eq "$1" ];then
- echo "SKIP: $netdev: ethtool $2 not supported"
+ echo "XFAIL: $netdev: ethtool $2 not supported"
return $ksft_skip
else
echo "FAIL: $netdev: ethtool $2"
@@ -124,11 +128,45 @@ kci_netdev_ethtool()
return 1
fi
echo "PASS: $netdev: ethtool list features"
- #TODO for each non fixed features, try to turn them on/off
+
+ while read -r FEATURE VALUE FIXED; do
+ [ "$FEATURE" != "Features" ] || continue # Skip "Features"
+ [ "$FIXED" != "[fixed]" ] || continue # Skip fixed features
+ feature="${FEATURE%:*}"
+
+ ethtool --offload "$netdev" "$feature" off
+ if [ $? -eq 0 ]; then
+ echo "PASS: $netdev: Turned off feature: $feature"
+ else
+ echo "FAIL: $netdev: Failed to turn off feature:" \
+ "$feature"
+ fi
+
+ ethtool --offload "$netdev" "$feature" on
+ if [ $? -eq 0 ]; then
+ echo "PASS: $netdev: Turned on feature: $feature"
+ else
+ echo "FAIL: $netdev: Failed to turn on feature:" \
+ "$feature"
+ fi
+
+ #restore the feature to its initial state
+ ethtool --offload "$netdev" "$feature" "$VALUE"
+ if [ $? -eq 0 ]; then
+ echo "PASS: $netdev: Restore feature $feature" \
+ "to initial state $VALUE"
+ else
+ echo "FAIL: $netdev: Failed to restore feature" \
+ "$feature to initial state $VALUE"
+ fi
+
+ done < "$TMP_ETHTOOL_FEATURES"
+
rm "$TMP_ETHTOOL_FEATURES"
kci_netdev_ethtool_test 74 'dump' "ethtool -d $netdev"
kci_netdev_ethtool_test 94 'stats' "ethtool -S $netdev"
+
return 0
}
@@ -196,10 +234,24 @@ if [ ! -e "$TMP_LIST_NETDEV" ];then
fi
ip link show |grep '^[0-9]' | grep -oE '[[:space:]].*eth[0-9]*:|[[:space:]].*enp[0-9]s[0-9]:' | cut -d\ -f2 | cut -d: -f1> "$TMP_LIST_NETDEV"
+
+if [ ! -s "$TMP_LIST_NETDEV" ]; then
+ echo "No valid network device found, creating veth pair"
+ ip link add veth0 type veth peer name veth1
+ echo "veth0" > "$TMP_LIST_NETDEV"
+ veth_created=1
+fi
+
while read netdev
do
kci_test_netdev "$netdev"
done < "$TMP_LIST_NETDEV"
+#clean up veth interface pair if it was created
+if [ "$veth_created" ]; then
+ ip link delete veth0
+ echo "Removed veth pair"
+fi
+
rm "$TMP_LIST_NETDEV"
exit 0
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 63ef80ef47a4..b2dd4db45215 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -87,3 +87,5 @@ CONFIG_XFRM_USER=m
CONFIG_XFRM_STATISTICS=y
CONFIG_NET_PKTGEN=m
CONFIG_TUN=m
+CONFIG_INET_DIAG=m
+CONFIG_SCTP_DIAG=m
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index c61d23a8c88d..d66e3c4dfec6 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -8,7 +8,7 @@
source lib.sh
ret=0
-timeout=2
+timeout=5
cleanup()
{
@@ -25,6 +25,9 @@ cleanup()
}
checktool "nft --version" "test without nft tool"
+checktool "socat -h" "run test without socat"
+
+modprobe -q sctp
trap cleanup EXIT
@@ -36,7 +39,9 @@ TMPFILE2=$(mktemp)
TMPFILE3=$(mktemp)
TMPINPUT=$(mktemp)
-dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT"
+COUNT=200
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=25
+dd conv=sparse status=none if=/dev/zero bs=1M count=$COUNT of="$TMPINPUT"
if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
echo "SKIP: No virtual ethernet pair device support in kernel"
@@ -250,45 +255,49 @@ listener_ready()
test_tcp_forward()
{
- ip netns exec "$nsrouter" ./nf_queue -q 2 -t "$timeout" &
+ ip netns exec "$nsrouter" ./nf_queue -q 2 &
local nfqpid=$!
timeout 5 ip netns exec "$ns2" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
local rpid=$!
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2"
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 2
ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain"
+ kill "$nfqpid"
}
test_tcp_localhost()
{
- dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT"
timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
local rpid=$!
- ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" &
+ ip netns exec "$nsrouter" ./nf_queue -q 3 &
local nfqpid=$!
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null
wait "$rpid" && echo "PASS: tcp via loopback"
- wait 2>/dev/null
+ kill "$nfqpid"
}
test_tcp_localhost_connectclose()
{
- ip netns exec "$nsrouter" ./connect_close -p 23456 -t "$timeout" &
- ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" &
+ ip netns exec "$nsrouter" ./nf_queue -q 3 &
+ local nfqpid=$!
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
+ timeout 10 ip netns exec "$nsrouter" ./connect_close -p 23456 -t 3
+
+ kill "$nfqpid"
wait && echo "PASS: tcp via loopback with connect/close"
- wait 2>/dev/null
}
test_tcp_localhost_requeue()
@@ -353,7 +362,7 @@ table inet filter {
}
}
EOF
- ip netns exec "$ns1" ./nf_queue -q 1 -t "$timeout" &
+ ip netns exec "$ns1" ./nf_queue -q 1 &
local nfqpid=$!
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 1
@@ -363,6 +372,7 @@ EOF
for n in output post; do
for d in tvrf eth0; do
if ! ip netns exec "$ns1" nft list chain inet filter "$n" | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"; then
+ kill "$nfqpid"
echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
ip netns exec "$ns1" nft list ruleset
ret=1
@@ -371,8 +381,96 @@ EOF
done
done
- wait "$nfqpid" && echo "PASS: icmp+nfqueue via vrf"
- wait 2>/dev/null
+ kill "$nfqpid"
+ echo "PASS: icmp+nfqueue via vrf"
+}
+
+sctp_listener_ready()
+{
+ ss -S -N "$1" -ln -o "sport = :12345" | grep -q 12345
+}
+
+check_output_files()
+{
+ local f1="$1"
+ local f2="$2"
+ local err="$3"
+
+ if ! cmp "$f1" "$f2" ; then
+ echo "FAIL: $err: input and output file differ" 1>&2
+ echo -n " Input file" 1>&2
+ ls -l "$f1" 1>&2
+ echo -n "Output file" 1>&2
+ ls -l "$f2" 1>&2
+ ret=1
+ fi
+}
+
+test_sctp_forward()
+{
+ ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet sctpq {
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ sctp dport 12345 queue num 10
+ }
+}
+EOF
+ timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
+ local rpid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
+
+ ip netns exec "$nsrouter" ./nf_queue -q 10 -G &
+ local nfqpid=$!
+
+ ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
+
+ if ! ip netns exec "$nsrouter" nft delete table inet sctpq; then
+ echo "FAIL: Could not delete sctpq table"
+ exit 1
+ fi
+
+ wait "$rpid" && echo "PASS: sctp and nfqueue in forward chain"
+ kill "$nfqpid"
+
+ check_output_files "$TMPINPUT" "$TMPFILE1" "sctp forward"
+}
+
+test_sctp_output()
+{
+ ip netns exec "$ns1" nft -f /dev/stdin <<EOF
+table inet sctpq {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ sctp dport 12345 queue num 11
+ }
+}
+EOF
+ # reduce test file size, software segmentation causes sk wmem increase.
+ dd conv=sparse status=none if=/dev/zero bs=1M count=$((COUNT/2)) of="$TMPINPUT"
+
+ timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
+ local rpid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
+
+ ip netns exec "$ns1" ./nf_queue -q 11 &
+ local nfqpid=$!
+
+ ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
+
+ if ! ip netns exec "$ns1" nft delete table inet sctpq; then
+ echo "FAIL: Could not delete sctpq table"
+ exit 1
+ fi
+
+ # must wait before checking completeness of output file.
+ wait "$rpid" && echo "PASS: sctp and nfqueue in output chain with GSO"
+ kill "$nfqpid"
+
+ check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output"
}
test_queue_removal()
@@ -388,7 +486,7 @@ table ip filter {
}
}
EOF
- ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 -t "$timeout" &
+ ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 &
local nfqpid=$!
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0
@@ -443,11 +541,16 @@ test_queue 10
# same. We queue to a second program as well.
load_ruleset "filter2" 20
test_queue 20
+ip netns exec "$ns1" nft flush ruleset
test_tcp_forward
test_tcp_localhost
test_tcp_localhost_connectclose
test_tcp_localhost_requeue
+test_sctp_forward
+test_sctp_output
+
+# should be last, adds vrf device in ns1 and changes routes
test_icmp_vrf
test_queue_removal
diff --git a/tools/testing/selftests/net/packetdrill/Makefile b/tools/testing/selftests/net/packetdrill/Makefile
new file mode 100644
index 000000000000..31cfb666ba8b
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_INCLUDES := ksft_runner.sh \
+ defaults.sh \
+ set_sysctls.py \
+ ../../kselftest/ktap_helpers.sh
+
+TEST_PROGS := $(wildcard *.pkt)
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/packetdrill/config b/tools/testing/selftests/net/packetdrill/config
new file mode 100644
index 000000000000..0237ed98f3c0
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/config
@@ -0,0 +1,11 @@
+CONFIG_IPV6=y
+CONFIG_HZ_1000=y
+CONFIG_HZ=1000
+CONFIG_NET_NS=y
+CONFIG_NET_SCH_FIFO=y
+CONFIG_NET_SCH_FQ=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYN_COOKIES=y
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_TCP_MD5SIG=y
+CONFIG_TUN=y
diff --git a/tools/testing/selftests/net/packetdrill/defaults.sh b/tools/testing/selftests/net/packetdrill/defaults.sh
new file mode 100755
index 000000000000..1095a7b22f44
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/defaults.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Set standard production config values that relate to TCP behavior.
+
+# Flush old cached data (fastopen cookies).
+ip tcp_metrics flush all > /dev/null 2>&1
+
+# TCP min, default, and max receive and send buffer sizes.
+sysctl -q net.ipv4.tcp_rmem="4096 540000 $((15*1024*1024))"
+sysctl -q net.ipv4.tcp_wmem="4096 $((256*1024)) 4194304"
+
+# TCP timestamps.
+sysctl -q net.ipv4.tcp_timestamps=1
+
+# TCP SYN(ACK) retry thresholds
+sysctl -q net.ipv4.tcp_syn_retries=5
+sysctl -q net.ipv4.tcp_synack_retries=5
+
+# TCP Forward RTO-Recovery, RFC 5682.
+sysctl -q net.ipv4.tcp_frto=2
+
+# TCP Selective Acknowledgements (SACK)
+sysctl -q net.ipv4.tcp_sack=1
+
+# TCP Duplicate Selective Acknowledgements (DSACK)
+sysctl -q net.ipv4.tcp_dsack=1
+
+# TCP FACK (Forward Acknowldgement)
+sysctl -q net.ipv4.tcp_fack=0
+
+# TCP reordering degree ("dupthresh" threshold for entering Fast Recovery).
+sysctl -q net.ipv4.tcp_reordering=3
+
+# TCP congestion control.
+sysctl -q net.ipv4.tcp_congestion_control=cubic
+
+# TCP slow start after idle.
+sysctl -q net.ipv4.tcp_slow_start_after_idle=0
+
+# TCP RACK and TLP.
+sysctl -q net.ipv4.tcp_early_retrans=4 net.ipv4.tcp_recovery=1
+
+# TCP method for deciding when to defer sending to accumulate big TSO packets.
+sysctl -q net.ipv4.tcp_tso_win_divisor=3
+
+# TCP Explicit Congestion Notification (ECN)
+sysctl -q net.ipv4.tcp_ecn=0
+
+sysctl -q net.ipv4.tcp_pacing_ss_ratio=200
+sysctl -q net.ipv4.tcp_pacing_ca_ratio=120
+sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1
+
+sysctl -q net.ipv4.tcp_fastopen=0x70403
+sysctl -q net.ipv4.tcp_fastopen_key=a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4
+
+sysctl -q net.ipv4.tcp_syncookies=1
+
+# Override the default qdisc on the tun device.
+# Many tests fail with timing errors if the default
+# is FQ and that paces their flows.
+tc qdisc add dev tun0 root pfifo
+
diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
new file mode 100755
index 000000000000..7478c0c0c9aa
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh"
+
+readonly ipv4_args=('--ip_version=ipv4 '
+ '--local_ip=192.168.0.1 '
+ '--gateway_ip=192.168.0.1 '
+ '--netmask_ip=255.255.0.0 '
+ '--remote_ip=192.0.2.1 '
+ '-D CMSG_LEVEL_IP=SOL_IP '
+ '-D CMSG_TYPE_RECVERR=IP_RECVERR ')
+
+readonly ipv6_args=('--ip_version=ipv6 '
+ '--mtu=1520 '
+ '--local_ip=fd3d:0a0b:17d6::1 '
+ '--gateway_ip=fd3d:0a0b:17d6:8888::1 '
+ '--remote_ip=fd3d:fa7b:d17d::1 '
+ '-D CMSG_LEVEL_IP=SOL_IPV6 '
+ '-D CMSG_TYPE_RECVERR=IPV6_RECVERR ')
+
+if [ $# -ne 1 ]; then
+ ktap_exit_fail_msg "usage: $0 <script>"
+ exit "$KSFT_FAIL"
+fi
+script="$1"
+
+if [ -z "$(which packetdrill)" ]; then
+ ktap_skip_all "packetdrill not found in PATH"
+ exit "$KSFT_SKIP"
+fi
+
+ktap_print_header
+ktap_set_plan 2
+
+unshare -n packetdrill ${ipv4_args[@]} $(basename $script) > /dev/null \
+ && ktap_test_pass "ipv4" || ktap_test_fail "ipv4"
+unshare -n packetdrill ${ipv6_args[@]} $(basename $script) > /dev/null \
+ && ktap_test_pass "ipv6" || ktap_test_fail "ipv6"
+
+ktap_finished
diff --git a/tools/testing/selftests/net/packetdrill/set_sysctls.py b/tools/testing/selftests/net/packetdrill/set_sysctls.py
new file mode 100755
index 000000000000..5ddf456ae973
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/set_sysctls.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Sets sysctl values and writes a file that restores them.
+
+The arguments are of the form "<proc-file>=<val>" separated by spaces.
+The program first reads the current value of the proc-file and creates
+a shell script named "/tmp/sysctl_restore_${PACKETDRILL_PID}.sh" which
+restores the values when executed. It then sets the new values.
+
+PACKETDRILL_PID is set by packetdrill to the pid of itself, so a .pkt
+file could restore sysctls by running `/tmp/sysctl_restore_${PPID}.sh`
+at the end.
+"""
+
+import os
+import subprocess
+import sys
+
+filename = '/tmp/sysctl_restore_%s.sh' % os.environ['PACKETDRILL_PID']
+
+# Open file for restoring sysctl values
+restore_file = open(filename, 'w')
+print('#!/bin/bash', file=restore_file)
+
+for a in sys.argv[1:]:
+ sysctl = a.split('=')
+ # sysctl[0] contains the proc-file name, sysctl[1] the new value
+
+ # read current value and add restore command to file
+ cur_val = subprocess.check_output(['cat', sysctl[0]], universal_newlines=True)
+ print('echo "%s" > %s' % (cur_val.strip(), sysctl[0]), file=restore_file)
+
+ # set new value
+ cmd = 'echo "%s" > %s' % (sysctl[1], sysctl[0])
+ os.system(cmd)
+
+os.system('chmod u+x %s' % filename)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
new file mode 100644
index 000000000000..df49c67645ac
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP_INQ and TCP_CM_INQ on the client side.
+`./defaults.sh
+`
+
+// Create a socket and set it to non-blocking.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+// Connect to the server and enable TCP_INQ.
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 setsockopt(3, SOL_TCP, TCP_INQ, [1], 4) = 0
+
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01 < S. 0:0(0) ack 1 win 5792 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 7>
+ +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700>
+
+// Now we have 10K of data ready on the socket.
+ +0 < . 1:10001(10000) ack 1 win 514
+ +0 > . 1:1(0) ack 10001 <nop,nop,TS val 200 ecr 700>
+
+// We read 1K and we should have 9K ready to read.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{..., 1000}],
+ msg_flags=0,
+ msg_control=[{cmsg_level=SOL_TCP,
+ cmsg_type=TCP_CM_INQ,
+ cmsg_data=9000}]}, 0) = 1000
+// We read 9K and we should have no further data ready to read.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{..., 9000}],
+ msg_flags=0,
+ msg_control=[{cmsg_level=SOL_TCP,
+ cmsg_type=TCP_CM_INQ,
+ cmsg_data=0}]}, 0) = 9000
+
+// Server sends more data and closes the connections.
+ +0 < F. 10001:20001(10000) ack 1 win 514
+ +0 > . 1:1(0) ack 20002 <nop,nop,TS val 200 ecr 700>
+
+// We read 10K and we should have one "fake" byte because the connection is
+// closed.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{..., 10000}],
+ msg_flags=0,
+ msg_control=[{cmsg_level=SOL_TCP,
+ cmsg_type=TCP_CM_INQ,
+ cmsg_data=1}]}, 0) = 10000
+// Now, receive EOF.
+ +0 read(3, ..., 2000) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
new file mode 100644
index 000000000000..04a5e2590c62
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP_INQ and TCP_CM_INQ on the server side.
+`./defaults.sh
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+// Accept the connection and enable TCP_INQ.
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_TCP, TCP_INQ, [1], 4) = 0
+
+// Now we have 10K of data ready on the socket.
+ +0 < . 1:10001(10000) ack 1 win 514
+ +0 > . 1:1(0) ack 10001
+
+// We read 2K and we should have 8K ready to read.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 2000}],
+ msg_flags=0,
+ msg_control=[{cmsg_level=SOL_TCP,
+ cmsg_type=TCP_CM_INQ,
+ cmsg_data=8000}]}, 0) = 2000
+// We read 8K and we should have no further data ready to read.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 8000}],
+ msg_flags=0,
+ msg_control=[{cmsg_level=SOL_TCP,
+ cmsg_type=TCP_CM_INQ,
+ cmsg_data=0}]}, 0) = 8000
+// Client sends more data and closes the connections.
+ +0 < F. 10001:20001(10000) ack 1 win 514
+ +0 > . 1:1(0) ack 20002
+
+// We read 10K and we should have one "fake" byte because the connection is
+// closed.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 10000}],
+ msg_flags=0,
+ msg_control=[{cmsg_level=SOL_TCP,
+ cmsg_type=TCP_CM_INQ,
+ cmsg_data=1}]}, 0) = 10000
+// Now, receive error.
+ +0 read(3, ..., 2000) = -1 ENOTCONN (Transport endpoint is not connected)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt
new file mode 100644
index 000000000000..25dfef95d3f8
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test what happens when client does not provide MD5 on SYN,
+// but then does on the ACK that completes the three-way handshake.
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+// Ooh, weird: client provides MD5 option on the ACK:
+ +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop>
+ +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop>
+
+// The TCP listener refcount should be 2, but on buggy kernels it can be 0:
+ +0 `grep " 0A " /proc/net/tcp /proc/net/tcp6 | grep ":1F90"`
+
+// Now here comes the legit ACK:
+ +.01 < . 1:1(0) ack 1 win 514
+
+// Make sure the connection is OK:
+ +0 accept(3, ..., ...) = 4
+
+ +.01 write(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt
new file mode 100644
index 000000000000..795c476d222d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when not application-limited, so that
+// the cwnd continues to grow.
+// In this variant, the receiver ACKs every packet.
+
+// Set up config. To keep things simple, disable the
+// mechanism that defers sending in order to send bigger TSO packets.
+`./defaults.sh
+sysctl -q net.ipv4.tcp_tso_win_divisor=100`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 30000) = 30000
+ +0 > P. 1:10001(10000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 1001 win 257
+ +0 > P. 10001:12001(2000) ack 1
+
+ +0 < . 1:1(0) ack 2001 win 257
+ +0 > P. 12001:14001(2000) ack 1
+
++.005 < . 1:1(0) ack 3001 win 257
+ +0 > P. 14001:16001(2000) ack 1
+
+ +0 < . 1:1(0) ack 4001 win 257
+ +0 > P. 16001:18001(2000) ack 1
+
++.005 < . 1:1(0) ack 5001 win 257
+ +0 > P. 18001:20001(2000) ack 1
+
+ +0 < . 1:1(0) ack 6001 win 257
+ +0 > P. 20001:22001(2000) ack 1
+
++.005 < . 1:1(0) ack 7001 win 257
+ +0 > P. 22001:24001(2000) ack 1
+
+ +0 < . 1:1(0) ack 8001 win 257
+ +0 > P. 24001:26001(2000) ack 1
+
++.005 < . 1:1(0) ack 9001 win 257
+ +0 > P. 26001:28001(2000) ack 1
+
+ +0 < . 1:1(0) ack 10001 win 257
+ +0 > P. 28001:30001(2000) ack 1
+
+ +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt
new file mode 100644
index 000000000000..9212ae1fd0f2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when an outstanding flight of packets is
+// less than the current cwnd, and not big enough to bump up cwnd.
+//
+// In this variant, the receiver ACKs every other packet,
+// approximating standard delayed ACKs.
+
+// Set up config.
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+
+// Only send 5 packets.
+ +0 write(4, ..., 5000) = 5000
+ +0 > P. 1:5001(5000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+ +0 < . 1:1(0) ack 2001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }%
+
+ +0 < . 1:1(0) ack 4001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }%
+
+ +0 < . 1:1(0) ack 5001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt
new file mode 100644
index 000000000000..416c901ddf51
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when an outstanding flight of packets is
+// less than the current cwnd, but still big enough that in slow
+// start we want to increase our cwnd a little.
+//
+// In this variant, the receiver ACKs every other packet,
+// approximating standard delayed ACKs.
+
+// Set up config.
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+
+// Only send 6 packets.
+ +0 write(4, ..., 6000) = 6000
+ +0 > P. 1:6001(6000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+ +0 < . 1:1(0) ack 2001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }%
+
+ +0 < . 1:1(0) ack 4001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }%
+
+ +0 < . 1:1(0) ack 6001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt
new file mode 100644
index 000000000000..a894b7d4559c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when not application-limited, so that
+// the cwnd continues to grow.
+// In this variant, the receiver ACKs every other packet,
+// approximating standard delayed ACKs.
+
+// Set up config. To keep things simple, disable the
+// mechanism that defers sending in order to send bigger TSO packets.
+`./defaults.sh
+sysctl -q net.ipv4.tcp_tso_win_divisor=100`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 30000) = 30000
+ +0 > P. 1:10001(10000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 2001 win 257
+ +0 > P. 10001:14001(4000) ack 1
+
++.005 < . 1:1(0) ack 4001 win 257
+ +0 > P. 14001:18001(4000) ack 1
+
++.005 < . 1:1(0) ack 6001 win 257
+ +0 > P. 18001:22001(4000) ack 1
+
++.005 < . 1:1(0) ack 8001 win 257
+ +0 > P. 22001:26001(4000) ack 1
+
++.005 < . 1:1(0) ack 10001 win 257
+ +0 > P. 26001:30001(4000) ack 1
+
+ +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt
new file mode 100644
index 000000000000..065fae9e9abd
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when not application-limited, so that
+// the cwnd continues to grow.
+// In this variant, the receiver sends one ACK per 4 packets.
+
+// Set up config. To keep things simple, disable the
+// mechanism that defers sending in order to send bigger TSO packets.
+`./defaults.sh
+sysctl -q net.ipv4.tcp_tso_win_divisor=100`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 30000) = 30000
+ +0 > P. 1:10001(10000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+ +.11 < . 1:1(0) ack 4001 win 257
+ +0 > P. 10001:18001(8000) ack 1
+
+ +.01 < . 1:1(0) ack 8001 win 257
+ +0 > P. 18001:26001(8000) ack 1
+
++.005 < . 1:1(0) ack 10001 win 257
+ +0 > P. 26001:30001(4000) ack 1
+
+ +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt
new file mode 100644
index 000000000000..11b213be1138
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start after idle
+// This test expects tso size to be at least initial cwnd * mss
+
+`./defaults.sh
+./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \
+ /proc/sys/net/ipv4/tcp_min_tso_segs=10`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 511
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 26000) = 26000
+ +0 > P. 1:5001(5000) ack 1
+ +0 > P. 5001:10001(5000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+ +.1 < . 1:1(0) ack 10001 win 511
+ +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
+ +0 > P. 10001:20001(10000) ack 1
+ +0 > P. 20001:26001(6000) ack 1
+
+ +.1 < . 1:1(0) ack 26001 win 511
+ +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+ +2 write(4, ..., 20000) = 20000
+// If slow start after idle works properly, we should send 5 MSS here (cwnd/2)
+ +0 > P. 26001:31001(5000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+// Reset sysctls
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt
new file mode 100644
index 000000000000..577ed8c8852c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start after window update
+// This test expects tso size to be at least initial cwnd * mss
+
+`./defaults.sh
+./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \
+ /proc/sys/net/ipv4/tcp_min_tso_segs=10`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 511
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 26000) = 26000
+ +0 > P. 1:5001(5000) ack 1
+ +0 > P. 5001:10001(5000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+ +.1 < . 1:1(0) ack 10001 win 511
+ +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
+ +0 > P. 10001:20001(10000) ack 1
+ +0 > P. 20001:26001(6000) ack 1
+
+ +.1 < . 1:1(0) ack 26001 win 0
+ +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+ +0 write(4, ..., 20000) = 20000
+// 1st win0 probe
++.3~+.310 > . 26000:26000(0) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+// 2nd win0 probe
++.6~+.620 > . 26000:26000(0) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+// 3rd win0 probe
++1.2~+1.240 > . 26000:26000(0) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+ +.9 < . 1:1(0) ack 26001 win 511
+ +0 > P. 26001:31001(5000) ack 1
+
+// Reset sysctls
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt
new file mode 100644
index 000000000000..869f32c35a2a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when application-limited: in this case,
+// with IW10, if we don't fully use our cwnd but instead
+// send just 9 packets, then cwnd should grow to twice that
+// value, or 18 packets.
+
+// Set up config.
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 9000) = 9000
+ +0 > P. 1:9001(9000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 2001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 4001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 6001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 8001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 9001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt
new file mode 100644
index 000000000000..0f77b7955db6
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when application-limited: in this case,
+// with IW10, if we send exactly 10 packets then cwnd should grow to 20.
+
+// Set up config.
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 2001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 4001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 6001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 8001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 10001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt
new file mode 100644
index 000000000000..7e9c83d617c2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when not application-limited, so that
+// the cwnd continues to grow, even if TSQ triggers.
+// In this variant, the receiver ACKs every other packet,
+// approximating standard delayed ACKs.
+
+// Note we use FQ/pacing to check if TCP Small Queues is not hurting
+
+`./defaults.sh
+tc qdisc replace dev tun0 root fq
+sysctl -q net/ipv4/tcp_pacing_ss_ratio=200
+sysctl -e -q net.ipv4.tcp_min_tso_segs=2`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 500
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 40000) = 40000
+// This might change if we cook the initial packet with 10 MSS.
+ +0 > P. 1:2921(2920) ack 1
+ +0 > P. 2921:5841(2920) ack 1
+ +0 > P. 5841:8761(2920) ack 1
+ +0 > P. 8761:11681(2920) ack 1
+ +0 > P. 11681:14601(2920) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 2921 win 500
+ +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }%
+
+// Note: after this commit : "net_sched: sch_fq: account for schedule/timers drifts"
+// FQ notices that this packet missed the 'time to send next packet' computed
+// when prior packet (11681:14601(2920)) was sent.
+// So FQ will allow following packet to be sent a bit earlier (quantum/2)
+// (FQ commit allows an application/cwnd limited flow to get at most quantum/2 extra credit)
+ +0 > P. 14601:17521(2920) ack 1
+
++.003 < . 1:1(0) ack 5841 win 500
+ +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }%
+
++.001 > P. 17521:20441(2920) ack 1
+
++.001 < . 1:1(0) ack 8761 win 500
+ +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }%
+
+// remaining packets are delivered at a constant rate.
++.007 > P. 20441:23361(2920) ack 1
+
++.002 < . 1:1(0) ack 11681 win 500
+ +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
++.001 < . 1:1(0) ack 14601 win 500
+
++.004 > P. 23361:26281(2920) ack 1
+
++.007 > P. 26281:29201(2920) ack 1
+
+ +0 %{ assert tcpi_snd_cwnd == 20, 'cwnd=%d' % tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
new file mode 100644
index 000000000000..a82c8899d36b
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+// basic zerocopy test:
+//
+// send a packet with MSG_ZEROCOPY and receive the notification ID
+// repeat and verify IDs are consecutive
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 4001:8001(4000) ack 1
+ +0 < . 1:1(0) ack 8001 win 257
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=1,
+ ee_data=1}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
new file mode 100644
index 000000000000..c01915e7f4a1
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+// batch zerocopy test:
+//
+// send multiple packets, then read one range of all notifications.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_MARK, [666], 4) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 4001:8001(4000) ack 1
+ +0 < . 1:1(0) ack 8001 win 257
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=1}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
new file mode 100644
index 000000000000..6509882932e9
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Minimal client-side zerocopy test
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0...0 connect(4, ..., ...) = 0
+
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8>
+ +0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > . 1:1(0) ack 1
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
new file mode 100644
index 000000000000..2cd78755cb2a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+// send with MSG_ZEROCOPY on a non-established socket
+//
+// verify that a send in state TCP_CLOSE correctly aborts the zerocopy
+// operation, specifically it does not increment the zerocopy counter.
+//
+// First send on a closed socket and wait for (absent) notification.
+// Then connect and send and verify that notification nr. is zero.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = -1 EPIPE (Broken pipe)
+
+ +0.1 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable)
+
+ +0...0 connect(4, ..., ...) = 0
+
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8>
+ +0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > . 1:1(0) ack 1
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
new file mode 100644
index 000000000000..7671c20e01cf
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+// epoll zerocopy test:
+//
+// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but
+// it is not level-triggered either.
+//
+// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
+// is correctly fired only once, when EPOLLET is set. send another packet with
+// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once.
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 epoll_create(1) = 5
+ +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, {events=EPOLLOUT|EPOLLET, fd=4}) = 0
+ +0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 4001:8001(4000) ack 1
+ +0 < . 1:1(0) ack 8001 win 257
+
+// receive only one EPOLLERR for the two sends above.
+ +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 8001:12001(4000) ack 1
+ +0 < . 1:1(0) ack 12001 win 257
+
+// receive only one EPOLLERR for the third send above.
+ +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=2}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
new file mode 100644
index 000000000000..fadc480fdb7f
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+// epoll zerocopy test:
+//
+// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but
+// it is not level-triggered either. this tests verify that the same behavior is
+// maintained when we have EPOLLEXCLUSIVE.
+//
+// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
+// is correctly fired only once, when EPOLLET is set. send another packet with
+// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once.
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 epoll_create(1) = 5
+ +0 epoll_ctl(5, EPOLL_CTL_ADD, 4,
+ {events=EPOLLOUT|EPOLLET|EPOLLEXCLUSIVE, fd=4}) = 0
+ +0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 4001:8001(4000) ack 1
+ +0 < . 1:1(0) ack 8001 win 257
+
+// receive only one EPOLLERR for the two sends above.
+ +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 8001:12001(4000) ack 1
+ +0 < . 1:1(0) ack 12001 win 257
+
+// receive only one EPOLLERR for the third send above.
+ +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=2}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
new file mode 100644
index 000000000000..5bfa0d1d2f4a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// epoll zerocopy test:
+//
+// This is a test to confirm that EPOLLERR is only fired once for an FD when
+// EPOLLONESHOT is set.
+//
+// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
+// is correctly fired only once, when EPOLLONESHOT is set. send another packet
+// with MSG_ZEROCOPY. confirm that EPOLLERR is not fired. Rearm the FD and
+// confirm that EPOLLERR is correctly set.
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 epoll_create(1) = 5
+ +0 epoll_ctl(5, EPOLL_CTL_ADD, 4,
+ {events=EPOLLOUT|EPOLLET|EPOLLONESHOT, fd=4}) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 4001:8001(4000) ack 1
+ +0 < . 1:1(0) ack 8001 win 257
+
+// receive only one EPOLLERR for the two sends above.
+ +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 8001:12001(4000) ack 1
+ +0 < . 1:1(0) ack 12001 win 257
+
+// receive no EPOLLERR for the third send above.
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+// rearm the FD and verify the EPOLLERR is fired again.
+ +0 epoll_ctl(5, EPOLL_CTL_MOD, 4, {events=EPOLLOUT|EPOLLONESHOT, fd=4}) = 0
+ +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=2}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
new file mode 100644
index 000000000000..4a73bbf46961
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+// Fastopen client zerocopy test:
+//
+// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the
+// kernel returns the notification ID.
+//
+// Fastopen requires a stored cookie. Create two sockets. The first
+// one will have no data in the initial send. On return 0 the
+// zerocopy notification counter is not incremented. Verify this too.
+
+`./defaults.sh`
+
+// Send a FastOpen request, no cookie yet so no data in SYN
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 sendto(3, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 1000 ecr 0,nop,wscale 8,FO,nop,nop>
+ +.01 < S. 123:123(0) ack 1 win 14600 <mss 940,TS val 2000 ecr 1000,sackOK,nop,wscale 6, FO abcd1234,nop,nop>
+ +0 > . 1:1(0) ack 1 <nop,nop,TS val 1001 ecr 2000>
+
+// Read from error queue: no zerocopy notification
+ +1 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable)
+
+ +.01 close(3) = 0
+ +0 > F. 1:1(0) ack 1 <nop,nop,TS val 1002 ecr 2000>
+ +.01 < F. 1:1(0) ack 2 win 92 <nop,nop,TS val 2001 ecr 1002>
+ +0 > . 2:2(0) ack 2 <nop,nop,TS val 1003 ecr 2001>
+
+// Send another Fastopen request, now SYN will have data
+ +.07 `sysctl -q net.ipv4.tcp_timestamps=0`
+ +.1 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5
+ +0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 setsockopt(5, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 sendto(5, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = 500
+ +0 > S 0:500(500) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO abcd1234,nop,nop>
+ +.05 < S. 5678:5678(0) ack 501 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6>
+ +0 > . 501:501(0) ack 1
+
+// Read from error queue: now has first zerocopy notification
+ +0.5 recvmsg(5, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
new file mode 100644
index 000000000000..36086c5877ce
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+// Fastopen server zerocopy test:
+//
+// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the
+// kernel returns the notification ID.
+
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x207`
+
+// Set up a TFO server listening socket.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +.1 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [2], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+
+// Client sends a SYN with data.
+ +.1 < S 0:1000(1000) win 32792 <mss 1460,sackOK,nop,nop>
+ +0 > S. 0:0(0) ack 1001 <mss 1460,nop,nop,sackOK>
+
+// Server accepts and replies with data.
++.005 accept(3, ..., ...) = 4
+ +0 read(4, ..., 1024) = 1000
+ +0 sendto(4, ..., 1000, MSG_ZEROCOPY, ..., ...) = 1000
+ +0 > P. 1:1001(1000) ack 1001
+ +.05 < . 1001:1001(0) ack 1001 win 32792
+
+// Read from error queue: now has first zerocopy notification
+ +0.1 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
+
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
new file mode 100644
index 000000000000..672f817faca0
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+// tcp_MAX_SKB_FRAGS test
+//
+// Verify that sending an iovec of tcp_MAX_SKB_FRAGS + 1 elements will
+// 1) fit in a single packet without zerocopy
+// 2) spill over into a second packet with zerocopy,
+// because each iovec element becomes a frag
+// 3) the PSH bit is set on an skb when it runs out of fragments
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+
+ // Each pinned zerocopy page is fully accounted to skb->truesize.
+ // This test generates a worst case packet with each frag storing
+ // one byte, but increasing truesize with a page (64KB on PPC).
+ +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [2000000], 4) = 0
+
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ // send an iov of 18 elements: just becomes a linear skb
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}],
+ msg_flags=0}, 0) = 18
+
+ +0 > P. 1:19(18) ack 1
+ +0 < . 1:1(0) ack 19 win 257
+
+ // send a zerocopy iov of 18 elements:
+ +1 sendmsg(4, {msg_name(...)=...,
+ msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}],
+ msg_flags=0}, MSG_ZEROCOPY) = 18
+
+ // verify that it is split in one skb of 17 frags + 1 of 1 frag
+ // verify that both have the PSH bit set
+ +0 > P. 19:36(17) ack 1
+ +0 < . 1:1(0) ack 36 win 257
+
+ +0 > P. 36:37(1) ack 1
+ +0 < . 1:1(0) ack 37 win 257
+
+ +1 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
+
+ // send a zerocopy iov of 64 elements:
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(64)=[{..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1}],
+ msg_flags=0}, MSG_ZEROCOPY) = 64
+
+ // verify that it is split in skbs with 17 frags
+ +0 > P. 37:54(17) ack 1
+ +0 < . 1:1(0) ack 54 win 257
+
+ +0 > P. 54:71(17) ack 1
+ +0 < . 1:1(0) ack 71 win 257
+
+ +0 > P. 71:88(17) ack 1
+ +0 < . 1:1(0) ack 88 win 257
+
+ +0 > P. 88:101(13) ack 1
+ +0 < . 1:1(0) ack 101 win 257
+
+ +1 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=1,
+ ee_data=1}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
new file mode 100644
index 000000000000..a9a1ac0aea4f
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+// small packet zerocopy test:
+//
+// verify that SO_EE_CODE_ZEROCOPY_COPIED is set on zerocopy
+// packets of all sizes, including the smallest payload, 1B.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ // send 1B
+ +0 send(4, ..., 1, MSG_ZEROCOPY) = 1
+ +0 > P. 1:2(1) ack 1
+ +0 < . 1:1(0) ack 2 win 257
+
+ +1 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
+
+ // send 1B again
+ +0 send(4, ..., 1, MSG_ZEROCOPY) = 1
+ +0 > P. 2:3(1) ack 1
+ +0 < . 1:1(0) ack 3 win 257
+
+ +1 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=1,
+ ee_data=1}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 5175c0c83a23..569bce8b6383 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -681,13 +681,7 @@ setup_xfrm() {
}
setup_nettest_xfrm() {
- if ! which nettest >/dev/null; then
- PATH=$PWD:$PATH
- if ! which nettest >/dev/null; then
- echo "'nettest' command not found; skipping tests"
- return 1
- fi
- fi
+ check_gen_prog "nettest"
[ ${1} -eq 6 ] && proto="-6" || proto=""
port=${2}
@@ -1447,7 +1441,7 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
size=$(du -sb $tmpoutfile)
size=${size%%/tmp/*}
- [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1
+ [ $size -ne 1048576 ] && err "File size $size mismatches expected value in locally bridged vxlan test" && return 1
done
rm -f "$tmpoutfile"
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 1a736f700be4..4f31e92ebd96 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -165,9 +165,9 @@ static void sock_fanout_set_ebpf(int fd)
attr.insns = (unsigned long) prog;
attr.insn_cnt = ARRAY_SIZE(prog);
attr.license = (unsigned long) "GPL";
- attr.log_buf = (unsigned long) log_buf,
- attr.log_size = sizeof(log_buf),
- attr.log_level = 1,
+ attr.log_buf = (unsigned long) log_buf;
+ attr.log_size = sizeof(log_buf);
+ attr.log_level = 1;
pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
if (pfd < 0) {
diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile
new file mode 100644
index 000000000000..da9714bc7aad
--- /dev/null
+++ b/tools/testing/selftests/net/rds/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+
+all:
+ @echo mk_build_dir="$(shell pwd)" > include.sh
+
+TEST_PROGS := run.sh \
+ include.sh \
+ test.py
+
+EXTRA_CLEAN := /tmp/rds_logs
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/rds/README.txt b/tools/testing/selftests/net/rds/README.txt
new file mode 100644
index 000000000000..cbde2951ab13
--- /dev/null
+++ b/tools/testing/selftests/net/rds/README.txt
@@ -0,0 +1,41 @@
+RDS self-tests
+==============
+
+These scripts provide a coverage test for RDS-TCP by creating two
+network namespaces and running rds packets between them. A loopback
+network is provisioned with optional probability of packet loss or
+corruption. A workload of 50000 hashes, each 64 characters in size,
+are passed over an RDS socket on this test network. A passing test means
+the RDS-TCP stack was able to recover properly. The provided config.sh
+can be used to compile the kernel with the necessary gcov options. The
+kernel may optionally be configured to omit the coverage report as well.
+
+USAGE:
+ run.sh [-d logdir] [-l packet_loss] [-c packet_corruption]
+ [-u packet_duplcate]
+
+OPTIONS:
+ -d Log directory. Defaults to tools/testing/selftests/net/rds/rds_logs
+
+ -l Simulates a percentage of packet loss
+
+ -c Simulates a percentage of packet corruption
+
+ -u Simulates a percentage of packet duplication.
+
+EXAMPLE:
+
+ # Create a suitable gcov enabled .config
+ tools/testing/selftests/net/rds/config.sh -g
+
+ # Alternatly create a gcov disabled .config
+ tools/testing/selftests/net/rds/config.sh
+
+ # build the kernel
+ vng --build --config tools/testing/selftests/net/config
+
+ # launch the tests in a VM
+ vng -v --rwdir ./ --run . --user root --cpus 4 -- \
+ "export PYTHONPATH=tools/testing/selftests/net/; tools/testing/selftests/net/rds/run.sh"
+
+An HTML coverage report will be output in tools/testing/selftests/net/rds/rds_logs/coverage/.
diff --git a/tools/testing/selftests/net/rds/config.sh b/tools/testing/selftests/net/rds/config.sh
new file mode 100755
index 000000000000..791c8dbe1095
--- /dev/null
+++ b/tools/testing/selftests/net/rds/config.sh
@@ -0,0 +1,53 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+set -u
+set -x
+
+unset KBUILD_OUTPUT
+
+GENERATE_GCOV_REPORT=0
+while getopts "g" opt; do
+ case ${opt} in
+ g)
+ GENERATE_GCOV_REPORT=1
+ ;;
+ :)
+ echo "USAGE: config.sh [-g]"
+ exit 1
+ ;;
+ ?)
+ echo "Invalid option: -${OPTARG}."
+ exit 1
+ ;;
+ esac
+done
+
+CONF_FILE="tools/testing/selftests/net/config"
+
+# no modules
+scripts/config --file "$CONF_FILE" --disable CONFIG_MODULES
+
+# enable RDS
+scripts/config --file "$CONF_FILE" --enable CONFIG_RDS
+scripts/config --file "$CONF_FILE" --enable CONFIG_RDS_TCP
+
+if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then
+ # instrument RDS and only RDS
+ scripts/config --file "$CONF_FILE" --enable CONFIG_GCOV_KERNEL
+ scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL
+ scripts/config --file "$CONF_FILE" --enable GCOV_PROFILE_RDS
+else
+ scripts/config --file "$CONF_FILE" --disable CONFIG_GCOV_KERNEL
+ scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL
+ scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_RDS
+fi
+
+# need network namespaces to run tests with veth network interfaces
+scripts/config --file "$CONF_FILE" --enable CONFIG_NET_NS
+scripts/config --file "$CONF_FILE" --enable CONFIG_VETH
+
+# simulate packet loss
+scripts/config --file "$CONF_FILE" --enable CONFIG_NET_SCH_NETEM
+
diff --git a/tools/testing/selftests/net/rds/run.sh b/tools/testing/selftests/net/rds/run.sh
new file mode 100755
index 000000000000..8aee244f582a
--- /dev/null
+++ b/tools/testing/selftests/net/rds/run.sh
@@ -0,0 +1,224 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+set -u
+
+unset KBUILD_OUTPUT
+
+current_dir="$(realpath "$(dirname "$0")")"
+build_dir="$current_dir"
+
+build_include="$current_dir/include.sh"
+if test -f "$build_include"; then
+ # this include will define "$mk_build_dir" as the location the test was
+ # built. We will need this if the tests are installed in a location
+ # other than the kernel source
+
+ source "$build_include"
+ build_dir="$mk_build_dir"
+fi
+
+# This test requires kernel source and the *.gcda data therein
+# Locate the top level of the kernel source, and the net/rds
+# subfolder with the appropriate *.gcno object files
+ksrc_dir="$(realpath "$build_dir"/../../../../../)"
+kconfig="$ksrc_dir/.config"
+obj_dir="$ksrc_dir/net/rds"
+
+GCOV_CMD=gcov
+
+#check to see if the host has the required packages to generate a gcov report
+check_gcov_env()
+{
+ if ! which "$GCOV_CMD" > /dev/null 2>&1; then
+ echo "Warning: Could not find gcov. "
+ GENERATE_GCOV_REPORT=0
+ return
+ fi
+
+ # the gcov version must match the gcc version
+ GCC_VER=$(gcc -dumpfullversion)
+ GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| awk 'BEGIN {FS="-"}{print $1}')
+ if [ "$GCOV_VER" != "$GCC_VER" ]; then
+ #attempt to find a matching gcov version
+ GCOV_CMD=gcov-$(gcc -dumpversion)
+
+ if ! which "$GCOV_CMD" > /dev/null 2>&1; then
+ echo "Warning: Could not find an appropriate gcov installation. \
+ gcov version must match gcc version"
+ GENERATE_GCOV_REPORT=0
+ return
+ fi
+
+ #recheck version number of found gcov executable
+ GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| \
+ awk 'BEGIN {FS="-"}{print $1}')
+ if [ "$GCOV_VER" != "$GCC_VER" ]; then
+ echo "Warning: Could not find an appropriate gcov installation. \
+ gcov version must match gcc version"
+ GENERATE_GCOV_REPORT=0
+ else
+ echo "Warning: Mismatched gcc and gcov detected. Using $GCOV_CMD"
+ fi
+ fi
+}
+
+# Check to see if the kconfig has the required configs to generate a coverage report
+check_gcov_conf()
+{
+ if ! grep -x "CONFIG_GCOV_PROFILE_RDS=y" "$kconfig" > /dev/null 2>&1; then
+ echo "INFO: CONFIG_GCOV_PROFILE_RDS should be enabled for coverage reports"
+ GENERATE_GCOV_REPORT=0
+ fi
+ if ! grep -x "CONFIG_GCOV_KERNEL=y" "$kconfig" > /dev/null 2>&1; then
+ echo "INFO: CONFIG_GCOV_KERNEL should be enabled for coverage reports"
+ GENERATE_GCOV_REPORT=0
+ fi
+ if grep -x "CONFIG_GCOV_PROFILE_ALL=y" "$kconfig" > /dev/null 2>&1; then
+ echo "INFO: CONFIG_GCOV_PROFILE_ALL should be disabled for coverage reports"
+ GENERATE_GCOV_REPORT=0
+ fi
+
+ if [ "$GENERATE_GCOV_REPORT" -eq 0 ]; then
+ echo "To enable gcov reports, please run "\
+ "\"tools/testing/selftests/net/rds/config.sh -g\" and rebuild the kernel"
+ else
+ # if we have the required kernel configs, proceed to check the environment to
+ # ensure we have the required gcov packages
+ check_gcov_env
+ fi
+}
+
+# Kselftest framework requirement - SKIP code is 4.
+check_conf_enabled() {
+ if ! grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then
+ echo "selftests: [SKIP] This test requires $1 enabled"
+ echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel"
+ exit 4
+ fi
+}
+check_conf_disabled() {
+ if grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then
+ echo "selftests: [SKIP] This test requires $1 disabled"
+ echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel"
+ exit 4
+ fi
+}
+check_conf() {
+ check_conf_enabled CONFIG_NET_SCH_NETEM
+ check_conf_enabled CONFIG_VETH
+ check_conf_enabled CONFIG_NET_NS
+ check_conf_enabled CONFIG_RDS_TCP
+ check_conf_enabled CONFIG_RDS
+ check_conf_disabled CONFIG_MODULES
+}
+
+check_env()
+{
+ if ! test -d "$obj_dir"; then
+ echo "selftests: [SKIP] This test requires a kernel source tree"
+ exit 4
+ fi
+ if ! test -e "$kconfig"; then
+ echo "selftests: [SKIP] This test requires a configured kernel source tree"
+ exit 4
+ fi
+ if ! which strace > /dev/null 2>&1; then
+ echo "selftests: [SKIP] Could not run test without strace"
+ exit 4
+ fi
+ if ! which tcpdump > /dev/null 2>&1; then
+ echo "selftests: [SKIP] Could not run test without tcpdump"
+ exit 4
+ fi
+
+ if ! which python3 > /dev/null 2>&1; then
+ echo "selftests: [SKIP] Could not run test without python3"
+ exit 4
+ fi
+
+ python_major=$(python3 -c "import sys; print(sys.version_info[0])")
+ python_minor=$(python3 -c "import sys; print(sys.version_info[1])")
+ if [[ python_major -lt 3 || ( python_major -eq 3 && python_minor -lt 9 ) ]] ; then
+ echo "selftests: [SKIP] Could not run test without at least python3.9"
+ python3 -V
+ exit 4
+ fi
+}
+
+LOG_DIR="$current_dir"/rds_logs
+PLOSS=0
+PCORRUPT=0
+PDUP=0
+GENERATE_GCOV_REPORT=1
+while getopts "d:l:c:u:" opt; do
+ case ${opt} in
+ d)
+ LOG_DIR=${OPTARG}
+ ;;
+ l)
+ PLOSS=${OPTARG}
+ ;;
+ c)
+ PCORRUPT=${OPTARG}
+ ;;
+ u)
+ PDUP=${OPTARG}
+ ;;
+ :)
+ echo "USAGE: run.sh [-d logdir] [-l packet_loss] [-c packet_corruption]" \
+ "[-u packet_duplcate] [-g]"
+ exit 1
+ ;;
+ ?)
+ echo "Invalid option: -${OPTARG}."
+ exit 1
+ ;;
+ esac
+done
+
+
+check_env
+check_conf
+check_gcov_conf
+
+
+rm -fr "$LOG_DIR"
+TRACE_FILE="${LOG_DIR}/rds-strace.txt"
+COVR_DIR="${LOG_DIR}/coverage/"
+mkdir -p "$LOG_DIR"
+mkdir -p "$COVR_DIR"
+
+set +e
+echo running RDS tests...
+echo Traces will be logged to "$TRACE_FILE"
+rm -f "$TRACE_FILE"
+strace -T -tt -o "$TRACE_FILE" python3 "$(dirname "$0")/test.py" --timeout 400 -d "$LOG_DIR" \
+ -l "$PLOSS" -c "$PCORRUPT" -u "$PDUP"
+
+test_rc=$?
+dmesg > "${LOG_DIR}/dmesg.out"
+
+if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then
+ echo saving coverage data...
+ (set +x; cd /sys/kernel/debug/gcov; find ./* -name '*.gcda' | \
+ while read -r f
+ do
+ cat < "/sys/kernel/debug/gcov/$f" > "/$f"
+ done)
+
+ echo running gcovr...
+ gcovr -s --html-details --gcov-executable "$GCOV_CMD" --gcov-ignore-parse-errors \
+ -o "${COVR_DIR}/gcovr" "${ksrc_dir}/net/rds/"
+else
+ echo "Coverage report will be skipped"
+fi
+
+if [ "$test_rc" -eq 0 ]; then
+ echo "PASS: Test completed successfully"
+else
+ echo "FAIL: Test failed"
+fi
+
+exit "$test_rc"
diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py
new file mode 100644
index 000000000000..e6bb109bcead
--- /dev/null
+++ b/tools/testing/selftests/net/rds/test.py
@@ -0,0 +1,262 @@
+#! /usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import argparse
+import ctypes
+import errno
+import hashlib
+import os
+import select
+import signal
+import socket
+import subprocess
+import sys
+import atexit
+from pwd import getpwuid
+from os import stat
+from lib.py import ip
+
+
+libc = ctypes.cdll.LoadLibrary('libc.so.6')
+setns = libc.setns
+
+net0 = 'net0'
+net1 = 'net1'
+
+veth0 = 'veth0'
+veth1 = 'veth1'
+
+# Helper function for creating a socket inside a network namespace.
+# We need this because otherwise RDS will detect that the two TCP
+# sockets are on the same interface and use the loop transport instead
+# of the TCP transport.
+def netns_socket(netns, *args):
+ u0, u1 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET)
+
+ child = os.fork()
+ if child == 0:
+ # change network namespace
+ with open(f'/var/run/netns/{netns}') as f:
+ try:
+ ret = setns(f.fileno(), 0)
+ except IOError as e:
+ print(e.errno)
+ print(e)
+
+ # create socket in target namespace
+ s = socket.socket(*args)
+
+ # send resulting socket to parent
+ socket.send_fds(u0, [], [s.fileno()])
+
+ sys.exit(0)
+
+ # receive socket from child
+ _, s, _, _ = socket.recv_fds(u1, 0, 1)
+ os.waitpid(child, 0)
+ u0.close()
+ u1.close()
+ return socket.fromfd(s[0], *args)
+
+def signal_handler(sig, frame):
+ print('Test timed out')
+ sys.exit(1)
+
+#Parse out command line arguments. We take an optional
+# timeout parameter and an optional log output folder
+parser = argparse.ArgumentParser(description="init script args",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument("-d", "--logdir", action="store",
+ help="directory to store logs", default="/tmp")
+parser.add_argument('--timeout', help="timeout to terminate hung test",
+ type=int, default=0)
+parser.add_argument('-l', '--loss', help="Simulate tcp packet loss",
+ type=int, default=0)
+parser.add_argument('-c', '--corruption', help="Simulate tcp packet corruption",
+ type=int, default=0)
+parser.add_argument('-u', '--duplicate', help="Simulate tcp packet duplication",
+ type=int, default=0)
+args = parser.parse_args()
+logdir=args.logdir
+packet_loss=str(args.loss)+'%'
+packet_corruption=str(args.corruption)+'%'
+packet_duplicate=str(args.duplicate)+'%'
+
+ip(f"netns add {net0}")
+ip(f"netns add {net1}")
+ip(f"link add type veth")
+
+addrs = [
+ # we technically don't need different port numbers, but this will
+ # help identify traffic in the network analyzer
+ ('10.0.0.1', 10000),
+ ('10.0.0.2', 20000),
+]
+
+# move interfaces to separate namespaces so they can no longer be
+# bound directly; this prevents rds from switching over from the tcp
+# transport to the loop transport.
+ip(f"link set {veth0} netns {net0} up")
+ip(f"link set {veth1} netns {net1} up")
+
+
+
+# add addresses
+ip(f"-n {net0} addr add {addrs[0][0]}/32 dev {veth0}")
+ip(f"-n {net1} addr add {addrs[1][0]}/32 dev {veth1}")
+
+# add routes
+ip(f"-n {net0} route add {addrs[1][0]}/32 dev {veth0}")
+ip(f"-n {net1} route add {addrs[0][0]}/32 dev {veth1}")
+
+# sanity check that our two interfaces/addresses are correctly set up
+# and communicating by doing a single ping
+ip(f"netns exec {net0} ping -c 1 {addrs[1][0]}")
+
+# Start a packet capture on each network
+for net in [net0, net1]:
+ tcpdump_pid = os.fork()
+ if tcpdump_pid == 0:
+ pcap = logdir+'/'+net+'.pcap'
+ subprocess.check_call(['touch', pcap])
+ user = getpwuid(stat(pcap).st_uid).pw_name
+ ip(f"netns exec {net} /usr/sbin/tcpdump -Z {user} -i any -w {pcap}")
+ sys.exit(0)
+
+# simulate packet loss, duplication and corruption
+for net, iface in [(net0, veth0), (net1, veth1)]:
+ ip(f"netns exec {net} /usr/sbin/tc qdisc add dev {iface} root netem \
+ corrupt {packet_corruption} loss {packet_loss} duplicate \
+ {packet_duplicate}")
+
+# add a timeout
+if args.timeout > 0:
+ signal.alarm(args.timeout)
+ signal.signal(signal.SIGALRM, signal_handler)
+
+sockets = [
+ netns_socket(net0, socket.AF_RDS, socket.SOCK_SEQPACKET),
+ netns_socket(net1, socket.AF_RDS, socket.SOCK_SEQPACKET),
+]
+
+for s, addr in zip(sockets, addrs):
+ s.bind(addr)
+ s.setblocking(0)
+
+fileno_to_socket = {
+ s.fileno(): s for s in sockets
+}
+
+addr_to_socket = {
+ addr: s for addr, s in zip(addrs, sockets)
+}
+
+socket_to_addr = {
+ s: addr for addr, s in zip(addrs, sockets)
+}
+
+send_hashes = {}
+recv_hashes = {}
+
+ep = select.epoll()
+
+for s in sockets:
+ ep.register(s, select.EPOLLRDNORM)
+
+n = 50000
+nr_send = 0
+nr_recv = 0
+
+while nr_send < n:
+ # Send as much as we can without blocking
+ print("sending...", nr_send, nr_recv)
+ while nr_send < n:
+ send_data = hashlib.sha256(
+ f'packet {nr_send}'.encode('utf-8')).hexdigest().encode('utf-8')
+
+ # pseudo-random send/receive pattern
+ sender = sockets[nr_send % 2]
+ receiver = sockets[1 - (nr_send % 3) % 2]
+
+ try:
+ sender.sendto(send_data, socket_to_addr[receiver])
+ send_hashes.setdefault((sender.fileno(), receiver.fileno()),
+ hashlib.sha256()).update(f'<{send_data}>'.encode('utf-8'))
+ nr_send = nr_send + 1
+ except BlockingIOError as e:
+ break
+ except OSError as e:
+ if e.errno in [errno.ENOBUFS, errno.ECONNRESET, errno.EPIPE]:
+ break
+ raise
+
+ # Receive as much as we can without blocking
+ print("receiving...", nr_send, nr_recv)
+ while nr_recv < nr_send:
+ for fileno, eventmask in ep.poll():
+ receiver = fileno_to_socket[fileno]
+
+ if eventmask & select.EPOLLRDNORM:
+ while True:
+ try:
+ recv_data, address = receiver.recvfrom(1024)
+ sender = addr_to_socket[address]
+ recv_hashes.setdefault((sender.fileno(),
+ receiver.fileno()), hashlib.sha256()).update(
+ f'<{recv_data}>'.encode('utf-8'))
+ nr_recv = nr_recv + 1
+ except BlockingIOError as e:
+ break
+
+ # exercise net/rds/tcp.c:rds_tcp_sysctl_reset()
+ for net in [net0, net1]:
+ ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_rcvbuf=10000")
+ ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_sndbuf=10000")
+
+print("done", nr_send, nr_recv)
+
+# the Python socket module doesn't know these
+RDS_INFO_FIRST = 10000
+RDS_INFO_LAST = 10017
+
+nr_success = 0
+nr_error = 0
+
+for s in sockets:
+ for optname in range(RDS_INFO_FIRST, RDS_INFO_LAST + 1):
+ # Sigh, the Python socket module doesn't allow us to pass
+ # buffer lengths greater than 1024 for some reason. RDS
+ # wants multiple pages.
+ try:
+ s.getsockopt(socket.SOL_RDS, optname, 1024)
+ nr_success = nr_success + 1
+ except OSError as e:
+ nr_error = nr_error + 1
+ if e.errno == errno.ENOSPC:
+ # ignore
+ pass
+
+print(f"getsockopt(): {nr_success}/{nr_error}")
+
+print("Stopping network packet captures")
+subprocess.check_call(['killall', '-q', 'tcpdump'])
+
+# We're done sending and receiving stuff, now let's check if what
+# we received is what we sent.
+for (sender, receiver), send_hash in send_hashes.items():
+ recv_hash = recv_hashes.get((sender, receiver))
+
+ if recv_hash is None:
+ print("FAIL: No data received")
+ sys.exit(1)
+
+ if send_hash.hexdigest() != recv_hash.hexdigest():
+ print("FAIL: Send/recv mismatch")
+ print("hash expected:", send_hash.hexdigest())
+ print("hash received:", recv_hash.hexdigest())
+ sys.exit(1)
+
+ print(f"{sender}/{receiver}: ok")
+
+print("Success")
+sys.exit(0)
diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index 9eb42570294d..16ac4df55fdb 100644
--- a/tools/testing/selftests/net/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
@@ -57,6 +57,8 @@ static struct sof_flag sof_flags[] = {
SOF_FLAG(SOF_TIMESTAMPING_SOFTWARE),
SOF_FLAG(SOF_TIMESTAMPING_RX_SOFTWARE),
SOF_FLAG(SOF_TIMESTAMPING_RX_HARDWARE),
+ SOF_FLAG(SOF_TIMESTAMPING_OPT_RX_FILTER),
+ SOF_FLAG(SOF_TIMESTAMPING_RAW_HARDWARE),
};
static struct socket_type socket_types[] = {
@@ -98,6 +100,22 @@ static struct test_case test_cases[] = {
{}
},
{
+ { .so_timestamping = SOF_TIMESTAMPING_RAW_HARDWARE
+ | SOF_TIMESTAMPING_OPT_RX_FILTER },
+ {}
+ },
+ {
+ { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
+ | SOF_TIMESTAMPING_OPT_RX_FILTER },
+ {}
+ },
+ {
+ { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
+ | SOF_TIMESTAMPING_RX_SOFTWARE
+ | SOF_TIMESTAMPING_OPT_RX_FILTER },
+ { .swtstamp = true }
+ },
+ {
{ .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
| SOF_TIMESTAMPING_RX_SOFTWARE },
{ .swtstamp = true }
diff --git a/tools/testing/selftests/net/sk_so_peek_off.c b/tools/testing/selftests/net/sk_so_peek_off.c
new file mode 100644
index 000000000000..d87dd8d8d491
--- /dev/null
+++ b/tools/testing/selftests/net/sk_so_peek_off.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include "../kselftest.h"
+
+static char *afstr(int af, int proto)
+{
+ if (proto == IPPROTO_TCP)
+ return af == AF_INET ? "TCP/IPv4" : "TCP/IPv6";
+ else
+ return af == AF_INET ? "UDP/IPv4" : "UDP/IPv6";
+}
+
+int sk_peek_offset_probe(sa_family_t af, int proto)
+{
+ int type = (proto == IPPROTO_TCP ? SOCK_STREAM : SOCK_DGRAM);
+ int optv = 0;
+ int ret = 0;
+ int s;
+
+ s = socket(af, type, proto);
+ if (s < 0) {
+ ksft_perror("Temporary TCP socket creation failed");
+ } else {
+ if (!setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &optv, sizeof(int)))
+ ret = 1;
+ else
+ printf("%s does not support SO_PEEK_OFF\n", afstr(af, proto));
+ close(s);
+ }
+ return ret;
+}
+
+static void sk_peek_offset_set(int s, int offset)
+{
+ if (setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, sizeof(offset)))
+ ksft_perror("Failed to set SO_PEEK_OFF value\n");
+}
+
+static int sk_peek_offset_get(int s)
+{
+ int offset;
+ socklen_t len = sizeof(offset);
+
+ if (getsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, &len))
+ ksft_perror("Failed to get SO_PEEK_OFF value\n");
+ return offset;
+}
+
+static int sk_peek_offset_test(sa_family_t af, int proto)
+{
+ int type = (proto == IPPROTO_TCP ? SOCK_STREAM : SOCK_DGRAM);
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in a4;
+ struct sockaddr_in6 a6;
+ } a;
+ int res = 0;
+ int s[2] = {0, 0};
+ int recv_sock = 0;
+ int offset = 0;
+ ssize_t len;
+ char buf[2];
+
+ memset(&a, 0, sizeof(a));
+ a.sa.sa_family = af;
+
+ s[0] = recv_sock = socket(af, type, proto);
+ s[1] = socket(af, type, proto);
+
+ if (s[0] < 0 || s[1] < 0) {
+ ksft_perror("Temporary socket creation failed\n");
+ goto out;
+ }
+ if (bind(s[0], &a.sa, sizeof(a)) < 0) {
+ ksft_perror("Temporary socket bind() failed\n");
+ goto out;
+ }
+ if (getsockname(s[0], &a.sa, &((socklen_t) { sizeof(a) })) < 0) {
+ ksft_perror("Temporary socket getsockname() failed\n");
+ goto out;
+ }
+ if (proto == IPPROTO_TCP && listen(s[0], 0) < 0) {
+ ksft_perror("Temporary socket listen() failed\n");
+ goto out;
+ }
+ if (connect(s[1], &a.sa, sizeof(a)) < 0) {
+ ksft_perror("Temporary socket connect() failed\n");
+ goto out;
+ }
+ if (proto == IPPROTO_TCP) {
+ recv_sock = accept(s[0], NULL, NULL);
+ if (recv_sock <= 0) {
+ ksft_perror("Temporary socket accept() failed\n");
+ goto out;
+ }
+ }
+
+ /* Some basic tests of getting/setting offset */
+ offset = sk_peek_offset_get(recv_sock);
+ if (offset != -1) {
+ ksft_perror("Initial value of socket offset not -1\n");
+ goto out;
+ }
+ sk_peek_offset_set(recv_sock, 0);
+ offset = sk_peek_offset_get(recv_sock);
+ if (offset != 0) {
+ ksft_perror("Failed to set socket offset to 0\n");
+ goto out;
+ }
+
+ /* Transfer a message */
+ if (send(s[1], (char *)("ab"), 2, 0) != 2) {
+ ksft_perror("Temporary probe socket send() failed\n");
+ goto out;
+ }
+ /* Read first byte */
+ len = recv(recv_sock, buf, 1, MSG_PEEK);
+ if (len != 1 || buf[0] != 'a') {
+ ksft_perror("Failed to read first byte of message\n");
+ goto out;
+ }
+ offset = sk_peek_offset_get(recv_sock);
+ if (offset != 1) {
+ ksft_perror("Offset not forwarded correctly at first byte\n");
+ goto out;
+ }
+ /* Try to read beyond last byte */
+ len = recv(recv_sock, buf, 2, MSG_PEEK);
+ if (len != 1 || buf[0] != 'b') {
+ ksft_perror("Failed to read last byte of message\n");
+ goto out;
+ }
+ offset = sk_peek_offset_get(recv_sock);
+ if (offset != 2) {
+ ksft_perror("Offset not forwarded correctly at last byte\n");
+ goto out;
+ }
+ /* Flush message */
+ len = recv(recv_sock, buf, 2, MSG_TRUNC);
+ if (len != 2) {
+ ksft_perror("Failed to flush message\n");
+ goto out;
+ }
+ offset = sk_peek_offset_get(recv_sock);
+ if (offset != 0) {
+ ksft_perror("Offset not reverted correctly after flush\n");
+ goto out;
+ }
+
+ printf("%s with MSG_PEEK_OFF works correctly\n", afstr(af, proto));
+ res = 1;
+out:
+ if (proto == IPPROTO_TCP && recv_sock >= 0)
+ close(recv_sock);
+ if (s[1] >= 0)
+ close(s[1]);
+ if (s[0] >= 0)
+ close(s[0]);
+ return res;
+}
+
+static int do_test(int proto)
+{
+ int res4, res6;
+
+ res4 = sk_peek_offset_probe(AF_INET, proto);
+ res6 = sk_peek_offset_probe(AF_INET6, proto);
+
+ if (!res4 && !res6)
+ return KSFT_SKIP;
+
+ if (res4)
+ res4 = sk_peek_offset_test(AF_INET, proto);
+
+ if (res6)
+ res6 = sk_peek_offset_test(AF_INET6, proto);
+
+ if (!res4 || !res6)
+ return KSFT_FAIL;
+
+ return KSFT_PASS;
+}
+
+int main(void)
+{
+ int restcp, resudp;
+
+ restcp = do_test(IPPROTO_TCP);
+ resudp = do_test(IPPROTO_UDP);
+ if (restcp == KSFT_FAIL || resudp == KSFT_FAIL)
+ return KSFT_FAIL;
+
+ return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
index bd88b90b902b..5b0205c70c39 100644
--- a/tools/testing/selftests/net/tcp_ao/Makefile
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -31,7 +31,8 @@ CFLAGS += $(KHDR_INCLUDES)
CFLAGS += -iquote ./lib/ -I ../../../../include/
# Library
-LIBSRC := kconfig.c netlink.c proc.c repair.c setup.c sock.c utils.c
+LIBSRC := ftrace.c ftrace-tcp.c kconfig.c netlink.c
+LIBSRC += proc.c repair.c setup.c sock.c utils.c
LIBOBJ := $(LIBSRC:%.c=$(LIBDIR)/%.o)
EXTRA_CLEAN += $(LIBOBJ) $(LIB)
diff --git a/tools/testing/selftests/net/tcp_ao/bench-lookups.c b/tools/testing/selftests/net/tcp_ao/bench-lookups.c
index a1e6e007c291..6736484996a3 100644
--- a/tools/testing/selftests/net/tcp_ao/bench-lookups.c
+++ b/tools/testing/selftests/net/tcp_ao/bench-lookups.c
@@ -355,6 +355,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(30, server_fn, client_fn);
+ test_init(31, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config
index d3277a9de987..3605e38711cb 100644
--- a/tools/testing/selftests/net/tcp_ao/config
+++ b/tools/testing/selftests/net/tcp_ao/config
@@ -7,4 +7,5 @@ CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NET_VRF=y
CONFIG_TCP_AO=y
CONFIG_TCP_MD5SIG=y
+CONFIG_TRACEPOINTS=y
CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c
index 185a2f6e5ff3..d418162d335f 100644
--- a/tools/testing/selftests/net/tcp_ao/connect-deny.c
+++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c
@@ -71,10 +71,12 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
}
}
+ synchronize_threads(); /* before counter checks */
if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2))
test_error("test_get_tcp_ao_counters()");
close(lsk);
+
if (pwd)
test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
@@ -84,10 +86,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
after_cnt = netstat_get_one(cnt_name, NULL);
if (after_cnt <= before_cnt) {
- test_fail("%s: %s counter did not increase: %zu <= %zu",
+ test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
tst_name, cnt_name, after_cnt, before_cnt);
} else {
- test_ok("%s: counter %s increased %zu => %zu",
+ test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64,
tst_name, cnt_name, before_cnt, after_cnt);
}
@@ -180,6 +182,7 @@ static void try_connect(const char *tst_name, unsigned int port,
timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+ synchronize_threads(); /* before counter checks */
if (ret < 0) {
if (fault(KEYREJECT) && ret == -EKEYREJECTED) {
test_ok("%s: connect() was prevented", tst_name);
@@ -212,30 +215,44 @@ out:
static void *client_fn(void *arg)
{
- union tcp_addr wrong_addr, network_addr;
+ union tcp_addr wrong_addr, network_addr, addr_any = {};
unsigned int port = test_server_port;
if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1)
test_error("Can't convert ip address %s", TEST_WRONG_IP);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("Non-AO server + AO client", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ trace_hash_event_expect(TCP_HASH_AO_REQUIRED, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO server + Non-AO client", port++, NULL,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("Wrong password", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ trace_ao_event_sk_expect(TCP_AO_SYNACK_NO_KEY, this_ip_dest, addr_any,
+ port, 0, 100, 100);
try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ trace_ao_event_expect(TCP_AO_WRONG_MACLEN, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("Different maclen", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
@@ -259,6 +276,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(21, server_fn, client_fn);
+ test_init(22, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c
index 81653b47f303..f1d8d29e393f 100644
--- a/tools/testing/selftests/net/tcp_ao/connect.c
+++ b/tools/testing/selftests/net/tcp_ao/connect.c
@@ -67,14 +67,14 @@ static void *client_fn(void *arg)
netstat_free(ns_after);
if (nr_packets > (after_aogood - before_aogood)) {
- test_fail("TCPAOGood counter mismatch: %zu > (%zu - %zu)",
+ test_fail("TCPAOGood counter mismatch: %zu > (%" PRIu64 " - %" PRIu64 ")",
nr_packets, after_aogood, before_aogood);
return NULL;
}
if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD))
return NULL;
- test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %" PRIu64,
+ test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %zu",
before_aogood, ao1.ao_info_pkt_good,
ao1.key_cnts[0].pkt_good,
after_aogood, ao2.ao_info_pkt_good,
@@ -85,6 +85,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(1, server_fn, client_fn);
+ test_init(2, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
index d69bcba3c929..a1614f0d8c44 100644
--- a/tools/testing/selftests/net/tcp_ao/icmps-discard.c
+++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
@@ -444,6 +444,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(3, server_fn, client_fn);
+ test_init(4, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c
index 24e62120b792..d4385b52c10b 100644
--- a/tools/testing/selftests/net/tcp_ao/key-management.c
+++ b/tools/testing/selftests/net/tcp_ao/key-management.c
@@ -965,7 +965,7 @@ static void end_client(const char *tst_name, int sk, unsigned int nr_keys,
synchronize_threads(); /* 5: counters */
}
-static void try_unmatched_keys(int sk, int *rnext_index)
+static void try_unmatched_keys(int sk, int *rnext_index, unsigned int port)
{
struct test_key *key;
unsigned int i = 0;
@@ -1013,6 +1013,9 @@ static void try_unmatched_keys(int sk, int *rnext_index)
test_error("all keys on server match the client");
if (test_set_key(sk, -1, key->server_keyid))
test_error("Can't change the current key");
+ trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_addr, this_ip_dest,
+ -1, port, 0, -1, -1, -1, -1, -1,
+ -1, key->server_keyid, -1);
if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
test_fail("verify failed");
*rnext_index = i;
@@ -1054,6 +1057,10 @@ static void check_current_back(const char *tst_name, unsigned int port,
return;
if (test_set_key(sk, collection.keys[rotate_to_index].client_keyid, -1))
test_error("Can't change the current key");
+ trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_dest, this_ip_addr,
+ port, -1, 0, -1, -1, -1, -1, -1,
+ collection.keys[rotate_to_index].client_keyid,
+ collection.keys[current_index].client_keyid, -1);
if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
test_fail("verify failed");
/* There is a race here: between setting the current_key with
@@ -1085,6 +1092,11 @@ static void roll_over_keys(const char *tst_name, unsigned int port,
for (i = rnext_index + 1; rotations > 0; i++, rotations--) {
if (i >= collection.nr_keys)
i = 0;
+ trace_ao_event_expect(TCP_AO_RNEXT_REQUEST,
+ this_ip_addr, this_ip_dest,
+ -1, port, 0, -1, -1, -1, -1, -1,
+ i == 0 ? -1 : collection.keys[i - 1].server_keyid,
+ collection.keys[i].server_keyid, -1);
if (test_set_key(sk, -1, collection.keys[i].server_keyid))
test_error("Can't change the Rnext key");
if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) {
@@ -1124,7 +1136,7 @@ static void try_client_match(const char *tst_name, unsigned int port,
rnext_index, msg_len, nr_packets);
if (sk < 0)
return;
- try_unmatched_keys(sk, &rnext_index);
+ try_unmatched_keys(sk, &rnext_index, port);
end_client(tst_name, sk, nr_keys, current_index, rnext_index, NULL);
}
@@ -1181,6 +1193,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(120, server_fn, client_fn);
+ test_init(121, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
index fbc7f6111815..db44e77428dd 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/aolib.h
+++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
@@ -37,17 +37,58 @@ extern void __test_xfail(const char *buf);
extern void __test_error(const char *buf);
extern void __test_skip(const char *buf);
-__attribute__((__format__(__printf__, 2, 3)))
-static inline void __test_print(void (*fn)(const char *), const char *fmt, ...)
+static inline char *test_snprintf(const char *fmt, va_list vargs)
{
-#define TEST_MSG_BUFFER_SIZE 4096
- char buf[TEST_MSG_BUFFER_SIZE];
- va_list arg;
-
- va_start(arg, fmt);
- vsnprintf(buf, sizeof(buf), fmt, arg);
- va_end(arg);
- fn(buf);
+ char *ret = NULL;
+ size_t size = 0;
+ va_list tmp;
+ int n = 0;
+
+ va_copy(tmp, vargs);
+ n = vsnprintf(ret, size, fmt, tmp);
+ if (n < 0)
+ return NULL;
+
+ size = n + 1;
+ ret = malloc(size);
+ if (!ret)
+ return NULL;
+
+ n = vsnprintf(ret, size, fmt, vargs);
+ if (n < 0 || n > size - 1) {
+ free(ret);
+ return NULL;
+ }
+ return ret;
+}
+
+static __printf(1, 2) inline char *test_sprintf(const char *fmt, ...)
+{
+ va_list vargs;
+ char *ret;
+
+ va_start(vargs, fmt);
+ ret = test_snprintf(fmt, vargs);
+ va_end(vargs);
+
+ return ret;
+}
+
+static __printf(2, 3) inline void __test_print(void (*fn)(const char *),
+ const char *fmt, ...)
+{
+ va_list vargs;
+ char *msg;
+
+ va_start(vargs, fmt);
+ msg = test_snprintf(fmt, vargs);
+ va_end(vargs);
+
+ if (!msg)
+ return;
+
+ fn(msg);
+ free(msg);
}
#define test_print(fmt, ...) \
@@ -103,6 +144,7 @@ enum test_needs_kconfig {
KCONFIG_TCP_AO, /* required */
KCONFIG_TCP_MD5, /* optional, for TCP-MD5 features */
KCONFIG_NET_VRF, /* optional, for L3/VRF testing */
+ KCONFIG_FTRACE, /* optional, for tracepoints checks */
__KCONFIG_LAST__
};
extern bool kernel_config_has(enum test_needs_kconfig k);
@@ -142,6 +184,8 @@ static inline void test_init2(unsigned int ntests,
__test_init(ntests, family, prefix, taddr1, taddr2, peer1, peer2);
}
extern void test_add_destructor(void (*d)(void));
+extern void test_init_ftrace(int nsfd1, int nsfd2);
+extern int test_setup_tracing(void);
/* To adjust optmem socket limit, approximately estimate a number,
* that is bigger than sizeof(struct tcp_ao_key).
@@ -216,12 +260,17 @@ static inline void test_init(unsigned int ntests,
}
extern void synchronize_threads(void);
extern void switch_ns(int fd);
+extern int switch_save_ns(int fd);
+extern void switch_close_ns(int fd);
extern __thread union tcp_addr this_ip_addr;
extern __thread union tcp_addr this_ip_dest;
extern int test_family;
extern void randomize_buffer(void *buf, size_t buflen);
+extern __printf(3, 4) int test_echo(const char *fname, bool append,
+ const char *fmt, ...);
+
extern int open_netns(void);
extern int unshare_open_netns(void);
extern const char veth_name[];
@@ -602,4 +651,115 @@ static inline int test_add_repaired_key(int sk,
return test_verify_socket_key(sk, &tmp);
}
+#define DEFAULT_FTRACE_BUFFER_KB 10000
+#define DEFAULT_TRACER_LINES_ARR 200
+struct test_ftracer;
+extern uint64_t ns_cookie1, ns_cookie2;
+
+enum ftracer_op {
+ FTRACER_LINE_DISCARD = 0,
+ FTRACER_LINE_PRESERVE,
+ FTRACER_EXIT,
+};
+
+extern struct test_ftracer *create_ftracer(const char *name,
+ enum ftracer_op (*process_line)(const char *line),
+ void (*destructor)(struct test_ftracer *tracer),
+ bool (*expecting_more)(void),
+ size_t lines_buf_sz, size_t buffer_size_kb);
+extern int setup_trace_event(struct test_ftracer *tracer,
+ const char *event, const char *filter);
+extern void destroy_ftracer(struct test_ftracer *tracer);
+extern const size_t tracer_get_savedlines_nr(struct test_ftracer *tracer);
+extern const char **tracer_get_savedlines(struct test_ftracer *tracer);
+
+enum trace_events {
+ /* TCP_HASH_EVENT */
+ TCP_HASH_BAD_HEADER = 0,
+ TCP_HASH_MD5_REQUIRED,
+ TCP_HASH_MD5_UNEXPECTED,
+ TCP_HASH_MD5_MISMATCH,
+ TCP_HASH_AO_REQUIRED,
+ /* TCP_AO_EVENT */
+ TCP_AO_HANDSHAKE_FAILURE,
+ TCP_AO_WRONG_MACLEN,
+ TCP_AO_MISMATCH,
+ TCP_AO_KEY_NOT_FOUND,
+ TCP_AO_RNEXT_REQUEST,
+ /* TCP_AO_EVENT_SK */
+ TCP_AO_SYNACK_NO_KEY,
+ /* TCP_AO_EVENT_SNE */
+ TCP_AO_SND_SNE_UPDATE,
+ TCP_AO_RCV_SNE_UPDATE,
+ __MAX_TRACE_EVENTS
+};
+
+extern int __trace_event_expect(enum trace_events type, int family,
+ union tcp_addr src, union tcp_addr dst,
+ int src_port, int dst_port, int L3index,
+ int fin, int syn, int rst, int psh, int ack,
+ int keyid, int rnext, int maclen, int sne);
+
+static inline void trace_hash_event_expect(enum trace_events type,
+ union tcp_addr src, union tcp_addr dst,
+ int src_port, int dst_port, int L3index,
+ int fin, int syn, int rst, int psh, int ack)
+{
+ int err;
+
+ err = __trace_event_expect(type, TEST_FAMILY, src, dst,
+ src_port, dst_port, L3index,
+ fin, syn, rst, psh, ack,
+ -1, -1, -1, -1);
+ if (err)
+ test_error("Couldn't add a trace event: %d", err);
+}
+
+static inline void trace_ao_event_expect(enum trace_events type,
+ union tcp_addr src, union tcp_addr dst,
+ int src_port, int dst_port, int L3index,
+ int fin, int syn, int rst, int psh, int ack,
+ int keyid, int rnext, int maclen)
+{
+ int err;
+
+ err = __trace_event_expect(type, TEST_FAMILY, src, dst,
+ src_port, dst_port, L3index,
+ fin, syn, rst, psh, ack,
+ keyid, rnext, maclen, -1);
+ if (err)
+ test_error("Couldn't add a trace event: %d", err);
+}
+
+static inline void trace_ao_event_sk_expect(enum trace_events type,
+ union tcp_addr src, union tcp_addr dst,
+ int src_port, int dst_port,
+ int keyid, int rnext)
+{
+ int err;
+
+ err = __trace_event_expect(type, TEST_FAMILY, src, dst,
+ src_port, dst_port, -1,
+ -1, -1, -1, -1, -1,
+ keyid, rnext, -1, -1);
+ if (err)
+ test_error("Couldn't add a trace event: %d", err);
+}
+
+static inline void trace_ao_event_sne_expect(enum trace_events type,
+ union tcp_addr src, union tcp_addr dst,
+ int src_port, int dst_port, int sne)
+{
+ int err;
+
+ err = __trace_event_expect(type, TEST_FAMILY, src, dst,
+ src_port, dst_port, -1,
+ -1, -1, -1, -1, -1,
+ -1, -1, -1, sne);
+ if (err)
+ test_error("Couldn't add a trace event: %d", err);
+}
+
+extern int setup_aolib_ftracer(void);
+
#endif /* _AOLIB_H_ */
diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c
new file mode 100644
index 000000000000..24380c68fec6
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <pthread.h>
+#include "aolib.h"
+
+static const char *trace_event_names[__MAX_TRACE_EVENTS] = {
+ /* TCP_HASH_EVENT */
+ "tcp_hash_bad_header",
+ "tcp_hash_md5_required",
+ "tcp_hash_md5_unexpected",
+ "tcp_hash_md5_mismatch",
+ "tcp_hash_ao_required",
+ /* TCP_AO_EVENT */
+ "tcp_ao_handshake_failure",
+ "tcp_ao_wrong_maclen",
+ "tcp_ao_mismatch",
+ "tcp_ao_key_not_found",
+ "tcp_ao_rnext_request",
+ /* TCP_AO_EVENT_SK */
+ "tcp_ao_synack_no_key",
+ /* TCP_AO_EVENT_SNE */
+ "tcp_ao_snd_sne_update",
+ "tcp_ao_rcv_sne_update"
+};
+
+struct expected_trace_point {
+ /* required */
+ enum trace_events type;
+ int family;
+ union tcp_addr src;
+ union tcp_addr dst;
+
+ /* optional */
+ int src_port;
+ int dst_port;
+ int L3index;
+
+ int fin;
+ int syn;
+ int rst;
+ int psh;
+ int ack;
+
+ int keyid;
+ int rnext;
+ int maclen;
+ int sne;
+
+ size_t matched;
+};
+
+static struct expected_trace_point *exp_tps;
+static size_t exp_tps_nr;
+static size_t exp_tps_size;
+static pthread_mutex_t exp_tps_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+int __trace_event_expect(enum trace_events type, int family,
+ union tcp_addr src, union tcp_addr dst,
+ int src_port, int dst_port, int L3index,
+ int fin, int syn, int rst, int psh, int ack,
+ int keyid, int rnext, int maclen, int sne)
+{
+ struct expected_trace_point new_tp = {
+ .type = type,
+ .family = family,
+ .src = src,
+ .dst = dst,
+ .src_port = src_port,
+ .dst_port = dst_port,
+ .L3index = L3index,
+ .fin = fin,
+ .syn = syn,
+ .rst = rst,
+ .psh = psh,
+ .ack = ack,
+ .keyid = keyid,
+ .rnext = rnext,
+ .maclen = maclen,
+ .sne = sne,
+ .matched = 0,
+ };
+ int ret = 0;
+
+ if (!kernel_config_has(KCONFIG_FTRACE))
+ return 0;
+
+ pthread_mutex_lock(&exp_tps_mutex);
+ if (exp_tps_nr == exp_tps_size) {
+ struct expected_trace_point *tmp;
+
+ if (exp_tps_size == 0)
+ exp_tps_size = 10;
+ else
+ exp_tps_size = exp_tps_size * 1.6;
+
+ tmp = reallocarray(exp_tps, exp_tps_size, sizeof(exp_tps[0]));
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ exp_tps = tmp;
+ }
+ exp_tps[exp_tps_nr] = new_tp;
+ exp_tps_nr++;
+out:
+ pthread_mutex_unlock(&exp_tps_mutex);
+ return ret;
+}
+
+static void free_expected_events(void)
+{
+ /* We're from the process destructor - not taking the mutex */
+ exp_tps_size = 0;
+ exp_tps = NULL;
+ free(exp_tps);
+}
+
+struct trace_point {
+ int family;
+ union tcp_addr src;
+ union tcp_addr dst;
+ unsigned int src_port;
+ unsigned int dst_port;
+ int L3index;
+ unsigned int fin:1,
+ syn:1,
+ rst:1,
+ psh:1,
+ ack:1;
+
+ unsigned int keyid;
+ unsigned int rnext;
+ unsigned int maclen;
+
+ unsigned int sne;
+};
+
+static bool lookup_expected_event(int event_type, struct trace_point *e)
+{
+ size_t i;
+
+ pthread_mutex_lock(&exp_tps_mutex);
+ for (i = 0; i < exp_tps_nr; i++) {
+ struct expected_trace_point *p = &exp_tps[i];
+ size_t sk_size;
+
+ if (p->type != event_type)
+ continue;
+ if (p->family != e->family)
+ continue;
+ if (p->family == AF_INET)
+ sk_size = sizeof(p->src.a4);
+ else
+ sk_size = sizeof(p->src.a6);
+ if (memcmp(&p->src, &e->src, sk_size))
+ continue;
+ if (memcmp(&p->dst, &e->dst, sk_size))
+ continue;
+ if (p->src_port >= 0 && p->src_port != e->src_port)
+ continue;
+ if (p->dst_port >= 0 && p->dst_port != e->dst_port)
+ continue;
+ if (p->L3index >= 0 && p->L3index != e->L3index)
+ continue;
+
+ if (p->fin >= 0 && p->fin != e->fin)
+ continue;
+ if (p->syn >= 0 && p->syn != e->syn)
+ continue;
+ if (p->rst >= 0 && p->rst != e->rst)
+ continue;
+ if (p->psh >= 0 && p->psh != e->psh)
+ continue;
+ if (p->ack >= 0 && p->ack != e->ack)
+ continue;
+
+ if (p->keyid >= 0 && p->keyid != e->keyid)
+ continue;
+ if (p->rnext >= 0 && p->rnext != e->rnext)
+ continue;
+ if (p->maclen >= 0 && p->maclen != e->maclen)
+ continue;
+ if (p->sne >= 0 && p->sne != e->sne)
+ continue;
+ p->matched++;
+ pthread_mutex_unlock(&exp_tps_mutex);
+ return true;
+ }
+ pthread_mutex_unlock(&exp_tps_mutex);
+ return false;
+}
+
+static int check_event_type(const char *line)
+{
+ size_t i;
+
+ /*
+ * This should have been a set or hashmap, but it's a selftest,
+ * so... KISS.
+ */
+ for (i = 0; i < __MAX_TRACE_EVENTS; i++) {
+ if (!strncmp(trace_event_names[i], line, strlen(trace_event_names[i])))
+ return i;
+ }
+ return -1;
+}
+
+static bool event_has_flags(enum trace_events event)
+{
+ switch (event) {
+ case TCP_HASH_BAD_HEADER:
+ case TCP_HASH_MD5_REQUIRED:
+ case TCP_HASH_MD5_UNEXPECTED:
+ case TCP_HASH_MD5_MISMATCH:
+ case TCP_HASH_AO_REQUIRED:
+ case TCP_AO_HANDSHAKE_FAILURE:
+ case TCP_AO_WRONG_MACLEN:
+ case TCP_AO_MISMATCH:
+ case TCP_AO_KEY_NOT_FOUND:
+ case TCP_AO_RNEXT_REQUEST:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int tracer_ip_split(int family, char *src, char **addr, char **port)
+{
+ char *p;
+
+ if (family == AF_INET) {
+ /* fomat is <addr>:port, i.e.: 10.0.254.1:7015 */
+ *addr = src;
+ p = strchr(src, ':');
+ if (!p) {
+ test_print("Couldn't parse trace event addr:port %s", src);
+ return -EINVAL;
+ }
+ *p++ = '\0';
+ *port = p;
+ return 0;
+ }
+ if (family != AF_INET6)
+ return -EAFNOSUPPORT;
+
+ /* format is [<addr>]:port, i.e.: [2001:db8:254::1]:7013 */
+ *addr = strchr(src, '[');
+ p = strchr(src, ']');
+
+ if (!p || !*addr) {
+ test_print("Couldn't parse trace event [addr]:port %s", src);
+ return -EINVAL;
+ }
+
+ *addr = *addr + 1; /* '[' */
+ *p++ = '\0'; /* ']' */
+ if (*p != ':') {
+ test_print("Couldn't parse trace event :port %s", p);
+ return -EINVAL;
+ }
+ *p++ = '\0'; /* ':' */
+ *port = p;
+ return 0;
+}
+
+static int tracer_scan_address(int family, char *src,
+ union tcp_addr *dst, unsigned int *port)
+{
+ char *addr, *port_str;
+ int ret;
+
+ ret = tracer_ip_split(family, src, &addr, &port_str);
+ if (ret)
+ return ret;
+
+ if (inet_pton(family, addr, dst) != 1) {
+ test_print("Couldn't parse trace event addr %s", addr);
+ return -EINVAL;
+ }
+ errno = 0;
+ *port = (unsigned int)strtoul(port_str, NULL, 10);
+ if (errno != 0) {
+ test_print("Couldn't parse trace event port %s", port_str);
+ return -errno;
+ }
+ return 0;
+}
+
+static int tracer_scan_event(const char *line, enum trace_events event,
+ struct trace_point *out)
+{
+ char *src = NULL, *dst = NULL, *family = NULL;
+ char fin, syn, rst, psh, ack;
+ int nr_matched, ret = 0;
+ uint64_t netns_cookie;
+
+ switch (event) {
+ case TCP_HASH_BAD_HEADER:
+ case TCP_HASH_MD5_REQUIRED:
+ case TCP_HASH_MD5_UNEXPECTED:
+ case TCP_HASH_MD5_MISMATCH:
+ case TCP_HASH_AO_REQUIRED: {
+ nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms L3index=%d [%c%c%c%c%c]",
+ &netns_cookie, &family,
+ &src, &dst, &out->L3index,
+ &fin, &syn, &rst, &psh, &ack);
+ if (nr_matched != 10)
+ test_print("Couldn't parse trace event, matched = %d/10",
+ nr_matched);
+ break;
+ }
+ case TCP_AO_HANDSHAKE_FAILURE:
+ case TCP_AO_WRONG_MACLEN:
+ case TCP_AO_MISMATCH:
+ case TCP_AO_KEY_NOT_FOUND:
+ case TCP_AO_RNEXT_REQUEST: {
+ nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms L3index=%d [%c%c%c%c%c] keyid=%u rnext=%u maclen=%u",
+ &netns_cookie, &family,
+ &src, &dst, &out->L3index,
+ &fin, &syn, &rst, &psh, &ack,
+ &out->keyid, &out->rnext, &out->maclen);
+ if (nr_matched != 13)
+ test_print("Couldn't parse trace event, matched = %d/13",
+ nr_matched);
+ break;
+ }
+ case TCP_AO_SYNACK_NO_KEY: {
+ nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms keyid=%u rnext=%u",
+ &netns_cookie, &family,
+ &src, &dst, &out->keyid, &out->rnext);
+ if (nr_matched != 6)
+ test_print("Couldn't parse trace event, matched = %d/6",
+ nr_matched);
+ break;
+ }
+ case TCP_AO_SND_SNE_UPDATE:
+ case TCP_AO_RCV_SNE_UPDATE: {
+ nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms sne=%u",
+ &netns_cookie, &family,
+ &src, &dst, &out->sne);
+ if (nr_matched != 5)
+ test_print("Couldn't parse trace event, matched = %d/5",
+ nr_matched);
+ break;
+ }
+ default:
+ return -1;
+ }
+
+ if (family) {
+ if (!strcmp(family, "AF_INET")) {
+ out->family = AF_INET;
+ } else if (!strcmp(family, "AF_INET6")) {
+ out->family = AF_INET6;
+ } else {
+ test_print("Couldn't parse trace event family %s", family);
+ ret = -EINVAL;
+ goto out_free;
+ }
+ }
+
+ if (event_has_flags(event)) {
+ out->fin = (fin == 'F');
+ out->syn = (syn == 'S');
+ out->rst = (rst == 'R');
+ out->psh = (psh == 'P');
+ out->ack = (ack == '.');
+
+ if ((fin != 'F' && fin != ' ') ||
+ (syn != 'S' && syn != ' ') ||
+ (rst != 'R' && rst != ' ') ||
+ (psh != 'P' && psh != ' ') ||
+ (ack != '.' && ack != ' ')) {
+ test_print("Couldn't parse trace event flags %c%c%c%c%c",
+ fin, syn, rst, psh, ack);
+ ret = -EINVAL;
+ goto out_free;
+ }
+ }
+
+ if (src && tracer_scan_address(out->family, src, &out->src, &out->src_port)) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ if (dst && tracer_scan_address(out->family, dst, &out->dst, &out->dst_port)) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ if (netns_cookie != ns_cookie1 && netns_cookie != ns_cookie2) {
+ test_print("Net namespace filter for trace event didn't work: %" PRIu64 " != %" PRIu64 " OR %" PRIu64,
+ netns_cookie, ns_cookie1, ns_cookie2);
+ ret = -EINVAL;
+ }
+
+out_free:
+ free(src);
+ free(dst);
+ free(family);
+ return ret;
+}
+
+static enum ftracer_op aolib_tracer_process_event(const char *line)
+{
+ int event_type = check_event_type(line);
+ struct trace_point tmp = {};
+
+ if (event_type < 0)
+ return FTRACER_LINE_PRESERVE;
+
+ if (tracer_scan_event(line, event_type, &tmp))
+ return FTRACER_LINE_PRESERVE;
+
+ return lookup_expected_event(event_type, &tmp) ?
+ FTRACER_LINE_DISCARD : FTRACER_LINE_PRESERVE;
+}
+
+static void dump_trace_event(struct expected_trace_point *e)
+{
+ char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN];
+
+ if (!inet_ntop(e->family, &e->src, src, INET6_ADDRSTRLEN))
+ test_error("inet_ntop()");
+ if (!inet_ntop(e->family, &e->dst, dst, INET6_ADDRSTRLEN))
+ test_error("inet_ntop()");
+ test_print("trace event filter %s [%s:%d => %s:%d, L3index %d, flags: %s%s%s%s%s, keyid: %d, rnext: %d, maclen: %d, sne: %d] = %zu",
+ trace_event_names[e->type],
+ src, e->src_port, dst, e->dst_port, e->L3index,
+ (e->fin > 0) ? "F" : (e->fin == 0) ? "!F" : "",
+ (e->syn > 0) ? "S" : (e->syn == 0) ? "!S" : "",
+ (e->rst > 0) ? "R" : (e->rst == 0) ? "!R" : "",
+ (e->psh > 0) ? "P" : (e->psh == 0) ? "!P" : "",
+ (e->ack > 0) ? "." : (e->ack == 0) ? "!." : "",
+ e->keyid, e->rnext, e->maclen, e->sne, e->matched);
+}
+
+static void print_match_stats(bool unexpected_events)
+{
+ size_t matches_per_type[__MAX_TRACE_EVENTS] = {};
+ bool expected_but_none = false;
+ size_t i, total_matched = 0;
+ char *stat_line = NULL;
+
+ for (i = 0; i < exp_tps_nr; i++) {
+ struct expected_trace_point *e = &exp_tps[i];
+
+ total_matched += e->matched;
+ matches_per_type[e->type] += e->matched;
+ if (!e->matched)
+ expected_but_none = true;
+ }
+ for (i = 0; i < __MAX_TRACE_EVENTS; i++) {
+ if (!matches_per_type[i])
+ continue;
+ stat_line = test_sprintf("%s%s[%zu] ", stat_line ?: "",
+ trace_event_names[i],
+ matches_per_type[i]);
+ if (!stat_line)
+ test_error("test_sprintf()");
+ }
+
+ if (unexpected_events || expected_but_none) {
+ for (i = 0; i < exp_tps_nr; i++)
+ dump_trace_event(&exp_tps[i]);
+ }
+
+ if (unexpected_events)
+ return;
+
+ if (expected_but_none)
+ test_fail("Some trace events were expected, but didn't occur");
+ else if (total_matched)
+ test_ok("Trace events matched expectations: %zu %s",
+ total_matched, stat_line);
+ else
+ test_ok("No unexpected trace events during the test run");
+}
+
+#define dump_events(fmt, ...) \
+ __test_print(__test_msg, fmt, ##__VA_ARGS__)
+static void check_free_events(struct test_ftracer *tracer)
+{
+ const char **lines;
+ size_t nr;
+
+ if (!kernel_config_has(KCONFIG_FTRACE)) {
+ test_skip("kernel config doesn't have ftrace - no checks");
+ return;
+ }
+
+ nr = tracer_get_savedlines_nr(tracer);
+ lines = tracer_get_savedlines(tracer);
+ print_match_stats(!!nr);
+ if (!nr)
+ return;
+
+ errno = 0;
+ test_xfail("Trace events [%zu] were not expected:", nr);
+ while (nr)
+ dump_events("\t%s", lines[--nr]);
+}
+
+static int setup_tcp_trace_events(struct test_ftracer *tracer)
+{
+ char *filter;
+ size_t i;
+ int ret;
+
+ filter = test_sprintf("net_cookie == %zu || net_cookie == %zu",
+ ns_cookie1, ns_cookie2);
+ if (!filter)
+ return -ENOMEM;
+
+ for (i = 0; i < __MAX_TRACE_EVENTS; i++) {
+ char *event_name = test_sprintf("tcp/%s", trace_event_names[i]);
+
+ if (!event_name) {
+ ret = -ENOMEM;
+ break;
+ }
+ ret = setup_trace_event(tracer, event_name, filter);
+ free(event_name);
+ if (ret)
+ break;
+ }
+
+ free(filter);
+ return ret;
+}
+
+static void aolib_tracer_destroy(struct test_ftracer *tracer)
+{
+ check_free_events(tracer);
+ free_expected_events();
+}
+
+static bool aolib_tracer_expecting_more(void)
+{
+ size_t i;
+
+ for (i = 0; i < exp_tps_nr; i++)
+ if (!exp_tps[i].matched)
+ return true;
+ return false;
+}
+
+int setup_aolib_ftracer(void)
+{
+ struct test_ftracer *f;
+
+ f = create_ftracer("aolib", aolib_tracer_process_event,
+ aolib_tracer_destroy, aolib_tracer_expecting_more,
+ DEFAULT_FTRACE_BUFFER_KB, DEFAULT_TRACER_LINES_ARR);
+ if (!f)
+ return -1;
+
+ return setup_tcp_trace_events(f);
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace.c
new file mode 100644
index 000000000000..e4d0b173bc94
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace.c
@@ -0,0 +1,543 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include "../../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+static char ftrace_path[] = "ksft-ftrace-XXXXXX";
+static bool ftrace_mounted;
+uint64_t ns_cookie1, ns_cookie2;
+
+struct test_ftracer {
+ pthread_t tracer_thread;
+ int error;
+ char *instance_path;
+ FILE *trace_pipe;
+
+ enum ftracer_op (*process_line)(const char *line);
+ void (*destructor)(struct test_ftracer *tracer);
+ bool (*expecting_more)(void);
+
+ char **saved_lines;
+ size_t saved_lines_size;
+ size_t next_line_ind;
+
+ pthread_cond_t met_all_expected;
+ pthread_mutex_t met_all_expected_lock;
+
+ struct test_ftracer *next;
+};
+
+static struct test_ftracer *ftracers;
+static pthread_mutex_t ftracers_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static int mount_ftrace(void)
+{
+ if (!mkdtemp(ftrace_path))
+ test_error("Can't create temp dir");
+
+ if (mount("tracefs", ftrace_path, "tracefs", 0, "rw"))
+ return -errno;
+
+ ftrace_mounted = true;
+
+ return 0;
+}
+
+static void unmount_ftrace(void)
+{
+ if (ftrace_mounted && umount(ftrace_path))
+ test_print("Failed on cleanup: can't unmount tracefs: %m");
+
+ if (rmdir(ftrace_path))
+ test_error("Failed on cleanup: can't remove ftrace dir %s",
+ ftrace_path);
+}
+
+struct opts_list_t {
+ char *opt_name;
+ struct opts_list_t *next;
+};
+
+static int disable_trace_options(const char *ftrace_path)
+{
+ struct opts_list_t *opts_list = NULL;
+ char *fopts, *line = NULL;
+ size_t buf_len = 0;
+ ssize_t line_len;
+ int ret = 0;
+ FILE *opts;
+
+ fopts = test_sprintf("%s/%s", ftrace_path, "trace_options");
+ if (!fopts)
+ return -ENOMEM;
+
+ opts = fopen(fopts, "r+");
+ if (!opts) {
+ ret = -errno;
+ goto out_free;
+ }
+
+ while ((line_len = getline(&line, &buf_len, opts)) != -1) {
+ struct opts_list_t *tmp;
+
+ if (!strncmp(line, "no", 2))
+ continue;
+
+ tmp = malloc(sizeof(*tmp));
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto out_free_opts_list;
+ }
+ tmp->next = opts_list;
+ tmp->opt_name = test_sprintf("no%s", line);
+ if (!tmp->opt_name) {
+ ret = -ENOMEM;
+ free(tmp);
+ goto out_free_opts_list;
+ }
+ opts_list = tmp;
+ }
+
+ while (opts_list) {
+ struct opts_list_t *tmp = opts_list;
+
+ fseek(opts, 0, SEEK_SET);
+ fwrite(tmp->opt_name, 1, strlen(tmp->opt_name), opts);
+
+ opts_list = opts_list->next;
+ free(tmp->opt_name);
+ free(tmp);
+ }
+
+out_free_opts_list:
+ while (opts_list) {
+ struct opts_list_t *tmp = opts_list;
+
+ opts_list = opts_list->next;
+ free(tmp->opt_name);
+ free(tmp);
+ }
+ free(line);
+ fclose(opts);
+out_free:
+ free(fopts);
+ return ret;
+}
+
+static int setup_buffer_size(const char *ftrace_path, size_t sz)
+{
+ char *fbuf_size = test_sprintf("%s/buffer_size_kb", ftrace_path);
+ int ret;
+
+ if (!fbuf_size)
+ return -1;
+
+ ret = test_echo(fbuf_size, 0, "%zu", sz);
+ free(fbuf_size);
+ return ret;
+}
+
+static int setup_ftrace_instance(struct test_ftracer *tracer, const char *name)
+{
+ char *tmp;
+
+ tmp = test_sprintf("%s/instances/ksft-%s-XXXXXX", ftrace_path, name);
+ if (!tmp)
+ return -ENOMEM;
+
+ tracer->instance_path = mkdtemp(tmp);
+ if (!tracer->instance_path) {
+ free(tmp);
+ return -errno;
+ }
+
+ return 0;
+}
+
+static void remove_ftrace_instance(struct test_ftracer *tracer)
+{
+ if (rmdir(tracer->instance_path))
+ test_print("Failed on cleanup: can't remove ftrace instance %s",
+ tracer->instance_path);
+ free(tracer->instance_path);
+}
+
+static void tracer_cleanup(void *arg)
+{
+ struct test_ftracer *tracer = arg;
+
+ fclose(tracer->trace_pipe);
+}
+
+static void tracer_set_error(struct test_ftracer *tracer, int error)
+{
+ if (!tracer->error)
+ tracer->error = error;
+}
+
+const size_t tracer_get_savedlines_nr(struct test_ftracer *tracer)
+{
+ return tracer->next_line_ind;
+}
+
+const char **tracer_get_savedlines(struct test_ftracer *tracer)
+{
+ return (const char **)tracer->saved_lines;
+}
+
+static void *tracer_thread_func(void *arg)
+{
+ struct test_ftracer *tracer = arg;
+
+ pthread_cleanup_push(tracer_cleanup, arg);
+
+ while (tracer->next_line_ind < tracer->saved_lines_size) {
+ char **lp = &tracer->saved_lines[tracer->next_line_ind];
+ enum ftracer_op op;
+ size_t buf_len = 0;
+ ssize_t line_len;
+
+ line_len = getline(lp, &buf_len, tracer->trace_pipe);
+ if (line_len == -1)
+ break;
+
+ pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
+ op = tracer->process_line(*lp);
+ pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+
+ if (tracer->expecting_more) {
+ pthread_mutex_lock(&tracer->met_all_expected_lock);
+ if (!tracer->expecting_more())
+ pthread_cond_signal(&tracer->met_all_expected);
+ pthread_mutex_unlock(&tracer->met_all_expected_lock);
+ }
+
+ if (op == FTRACER_LINE_DISCARD)
+ continue;
+ if (op == FTRACER_EXIT)
+ break;
+ if (op != FTRACER_LINE_PRESERVE)
+ test_error("unexpected tracer command %d", op);
+
+ tracer->next_line_ind++;
+ buf_len = 0;
+ }
+ test_print("too many lines in ftracer buffer %zu, exiting tracer",
+ tracer->next_line_ind);
+
+ pthread_cleanup_pop(1);
+ return NULL;
+}
+
+static int setup_trace_thread(struct test_ftracer *tracer)
+{
+ int ret = 0;
+ char *path;
+
+ path = test_sprintf("%s/trace_pipe", tracer->instance_path);
+ if (!path)
+ return -ENOMEM;
+
+ tracer->trace_pipe = fopen(path, "r");
+ if (!tracer->trace_pipe) {
+ ret = -errno;
+ goto out_free;
+ }
+
+ if (pthread_create(&tracer->tracer_thread, NULL,
+ tracer_thread_func, (void *)tracer)) {
+ ret = -errno;
+ fclose(tracer->trace_pipe);
+ }
+
+out_free:
+ free(path);
+ return ret;
+}
+
+static void stop_trace_thread(struct test_ftracer *tracer)
+{
+ void *res;
+
+ if (pthread_cancel(tracer->tracer_thread)) {
+ test_print("Can't stop tracer pthread: %m");
+ tracer_set_error(tracer, -errno);
+ }
+ if (pthread_join(tracer->tracer_thread, &res)) {
+ test_print("Can't join tracer pthread: %m");
+ tracer_set_error(tracer, -errno);
+ }
+ if (res != PTHREAD_CANCELED) {
+ test_print("Tracer thread wasn't canceled");
+ tracer_set_error(tracer, -errno);
+ }
+ if (tracer->error)
+ test_fail("tracer errored by %s", strerror(tracer->error));
+}
+
+static void final_wait_for_events(struct test_ftracer *tracer,
+ unsigned timeout_sec)
+{
+ struct timespec timeout;
+ struct timeval now;
+ int ret = 0;
+
+ if (!tracer->expecting_more)
+ return;
+
+ pthread_mutex_lock(&tracer->met_all_expected_lock);
+ gettimeofday(&now, NULL);
+ timeout.tv_sec = now.tv_sec + timeout_sec;
+ timeout.tv_nsec = now.tv_usec * 1000;
+
+ while (tracer->expecting_more() && ret != ETIMEDOUT)
+ ret = pthread_cond_timedwait(&tracer->met_all_expected,
+ &tracer->met_all_expected_lock, &timeout);
+ pthread_mutex_unlock(&tracer->met_all_expected_lock);
+}
+
+int setup_trace_event(struct test_ftracer *tracer,
+ const char *event, const char *filter)
+{
+ char *enable_path, *filter_path, *instance = tracer->instance_path;
+ int ret;
+
+ enable_path = test_sprintf("%s/events/%s/enable", instance, event);
+ if (!enable_path)
+ return -ENOMEM;
+
+ filter_path = test_sprintf("%s/events/%s/filter", instance, event);
+ if (!filter_path) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+
+ ret = test_echo(filter_path, 0, "%s", filter);
+ if (!ret)
+ ret = test_echo(enable_path, 0, "1");
+
+out_free:
+ free(filter_path);
+ free(enable_path);
+ return ret;
+}
+
+struct test_ftracer *create_ftracer(const char *name,
+ enum ftracer_op (*process_line)(const char *line),
+ void (*destructor)(struct test_ftracer *tracer),
+ bool (*expecting_more)(void),
+ size_t lines_buf_sz, size_t buffer_size_kb)
+{
+ struct test_ftracer *tracer;
+ int err;
+
+ /* XXX: separate __create_ftracer() helper and do here
+ * if (!kernel_config_has(KCONFIG_FTRACE))
+ * return NULL;
+ */
+
+ tracer = malloc(sizeof(*tracer));
+ if (!tracer) {
+ test_print("malloc()");
+ return NULL;
+ }
+
+ memset(tracer, 0, sizeof(*tracer));
+
+ err = setup_ftrace_instance(tracer, name);
+ if (err) {
+ test_print("setup_ftrace_instance(): %d", err);
+ goto err_free;
+ }
+
+ err = disable_trace_options(tracer->instance_path);
+ if (err) {
+ test_print("disable_trace_options(): %d", err);
+ goto err_remove;
+ }
+
+ err = setup_buffer_size(tracer->instance_path, buffer_size_kb);
+ if (err) {
+ test_print("disable_trace_options(): %d", err);
+ goto err_remove;
+ }
+
+ tracer->saved_lines = calloc(lines_buf_sz, sizeof(tracer->saved_lines[0]));
+ if (!tracer->saved_lines) {
+ test_print("calloc()");
+ goto err_remove;
+ }
+ tracer->saved_lines_size = lines_buf_sz;
+
+ tracer->process_line = process_line;
+ tracer->destructor = destructor;
+ tracer->expecting_more = expecting_more;
+
+ err = pthread_cond_init(&tracer->met_all_expected, NULL);
+ if (err) {
+ test_print("pthread_cond_init(): %d", err);
+ goto err_free_lines;
+ }
+
+ err = pthread_mutex_init(&tracer->met_all_expected_lock, NULL);
+ if (err) {
+ test_print("pthread_mutex_init(): %d", err);
+ goto err_cond_destroy;
+ }
+
+ err = setup_trace_thread(tracer);
+ if (err) {
+ test_print("setup_trace_thread(): %d", err);
+ goto err_mutex_destroy;
+ }
+
+ pthread_mutex_lock(&ftracers_lock);
+ tracer->next = ftracers;
+ ftracers = tracer;
+ pthread_mutex_unlock(&ftracers_lock);
+
+ return tracer;
+
+err_mutex_destroy:
+ pthread_mutex_destroy(&tracer->met_all_expected_lock);
+err_cond_destroy:
+ pthread_cond_destroy(&tracer->met_all_expected);
+err_free_lines:
+ free(tracer->saved_lines);
+err_remove:
+ remove_ftrace_instance(tracer);
+err_free:
+ free(tracer);
+ return NULL;
+}
+
+static void __destroy_ftracer(struct test_ftracer *tracer)
+{
+ size_t i;
+
+ final_wait_for_events(tracer, TEST_TIMEOUT_SEC);
+ stop_trace_thread(tracer);
+ remove_ftrace_instance(tracer);
+ if (tracer->destructor)
+ tracer->destructor(tracer);
+ for (i = 0; i < tracer->saved_lines_size; i++)
+ free(tracer->saved_lines[i]);
+ pthread_cond_destroy(&tracer->met_all_expected);
+ pthread_mutex_destroy(&tracer->met_all_expected_lock);
+ free(tracer);
+}
+
+void destroy_ftracer(struct test_ftracer *tracer)
+{
+ pthread_mutex_lock(&ftracers_lock);
+ if (tracer == ftracers) {
+ ftracers = tracer->next;
+ } else {
+ struct test_ftracer *f = ftracers;
+
+ while (f->next != tracer) {
+ if (!f->next)
+ test_error("tracers list corruption or double free %p", tracer);
+ f = f->next;
+ }
+ f->next = tracer->next;
+ }
+ tracer->next = NULL;
+ pthread_mutex_unlock(&ftracers_lock);
+ __destroy_ftracer(tracer);
+}
+
+static void destroy_all_ftracers(void)
+{
+ struct test_ftracer *f;
+
+ pthread_mutex_lock(&ftracers_lock);
+ f = ftracers;
+ ftracers = NULL;
+ pthread_mutex_unlock(&ftracers_lock);
+
+ while (f) {
+ struct test_ftracer *n = f->next;
+
+ f->next = NULL;
+ __destroy_ftracer(f);
+ f = n;
+ }
+}
+
+static void test_unset_tracing(void)
+{
+ destroy_all_ftracers();
+ unmount_ftrace();
+}
+
+int test_setup_tracing(void)
+{
+ /*
+ * Just a basic protection - this should be called only once from
+ * lib/kconfig. Not thread safe, which is fine as it's early, before
+ * threads are created.
+ */
+ static int already_set;
+ int err;
+
+ if (already_set)
+ return -1;
+
+ /* Needs net-namespace cookies for filters */
+ if (ns_cookie1 == ns_cookie2) {
+ test_print("net-namespace cookies: %" PRIu64 " == %" PRIu64 ", can't set up tracing",
+ ns_cookie1, ns_cookie2);
+ return -1;
+ }
+
+ already_set = 1;
+
+ test_add_destructor(test_unset_tracing);
+
+ err = mount_ftrace();
+ if (err) {
+ test_print("failed to mount_ftrace(): %d", err);
+ return err;
+ }
+
+ return setup_aolib_ftracer();
+}
+
+static int get_ns_cookie(int nsfd, uint64_t *out)
+{
+ int old_ns = switch_save_ns(nsfd);
+ socklen_t size = sizeof(*out);
+ int sk;
+
+ sk = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0) {
+ test_print("socket(): %m");
+ return -errno;
+ }
+
+ if (getsockopt(sk, SOL_SOCKET, SO_NETNS_COOKIE, out, &size)) {
+ test_print("getsockopt(SO_NETNS_COOKIE): %m");
+ close(sk);
+ return -errno;
+ }
+
+ close(sk);
+ switch_close_ns(old_ns);
+ return 0;
+}
+
+void test_init_ftrace(int nsfd1, int nsfd2)
+{
+ get_ns_cookie(nsfd1, &ns_cookie1);
+ get_ns_cookie(nsfd2, &ns_cookie2);
+ /* Populate kernel config state */
+ kernel_config_has(KCONFIG_FTRACE);
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c
index f279ffc3843b..9f1c175846f8 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c
@@ -6,7 +6,7 @@
#include "aolib.h"
struct kconfig_t {
- int _errno; /* the returned error if not supported */
+ int _error; /* negative errno if not supported */
int (*check_kconfig)(int *error);
};
@@ -62,7 +62,7 @@ static int has_tcp_ao(int *err)
memcpy(&tmp.addr, &addr, sizeof(addr));
*err = 0;
if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) {
- *err = errno;
+ *err = -errno;
if (errno != ENOPROTOOPT)
ret = -errno;
}
@@ -87,7 +87,7 @@ static int has_tcp_md5(int *err)
*/
*err = 0;
if (test_set_md5(sk, addr_any, 0, -1, DEFAULT_TEST_PASSWORD)) {
- *err = errno;
+ *err = -errno;
if (errno != ENOPROTOOPT && errno == ENOMEM) {
test_print("setsockopt(TCP_MD5SIG_EXT): %m");
ret = -errno;
@@ -116,13 +116,21 @@ static int has_vrfs(int *err)
return ret;
}
+static int has_ftrace(int *err)
+{
+ *err = test_setup_tracing();
+ return 0;
+}
+
+#define KCONFIG_UNKNOWN 1
static pthread_mutex_t kconfig_lock = PTHREAD_MUTEX_INITIALIZER;
static struct kconfig_t kconfig[__KCONFIG_LAST__] = {
- { -1, has_net_ns },
- { -1, has_veth },
- { -1, has_tcp_ao },
- { -1, has_tcp_md5 },
- { -1, has_vrfs },
+ { KCONFIG_UNKNOWN, has_net_ns },
+ { KCONFIG_UNKNOWN, has_veth },
+ { KCONFIG_UNKNOWN, has_tcp_ao },
+ { KCONFIG_UNKNOWN, has_tcp_md5 },
+ { KCONFIG_UNKNOWN, has_vrfs },
+ { KCONFIG_UNKNOWN, has_ftrace },
};
const char *tests_skip_reason[__KCONFIG_LAST__] = {
@@ -131,6 +139,7 @@ const char *tests_skip_reason[__KCONFIG_LAST__] = {
"Tests require TCP-AO support (CONFIG_TCP_AO)",
"setsockopt(TCP_MD5SIG_EXT) is not supported (CONFIG_TCP_MD5)",
"VRFs are not supported (CONFIG_NET_VRF)",
+ "Ftrace points are not supported (CONFIG_TRACEPOINTS)",
};
bool kernel_config_has(enum test_needs_kconfig k)
@@ -138,11 +147,11 @@ bool kernel_config_has(enum test_needs_kconfig k)
bool ret;
pthread_mutex_lock(&kconfig_lock);
- if (kconfig[k]._errno == -1) {
- if (kconfig[k].check_kconfig(&kconfig[k]._errno))
+ if (kconfig[k]._error == KCONFIG_UNKNOWN) {
+ if (kconfig[k].check_kconfig(&kconfig[k]._error))
test_error("Failed to initialize kconfig %u", k);
}
- ret = kconfig[k]._errno == 0;
+ ret = kconfig[k]._error == 0;
pthread_mutex_unlock(&kconfig_lock);
return ret;
}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c
index e408b9243b2c..a27cc03c9fbd 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/setup.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c
@@ -111,7 +111,7 @@ static void sig_int(int signo)
int open_netns(void)
{
- const char *netns_path = "/proc/self/ns/net";
+ const char *netns_path = "/proc/thread-self/ns/net";
int fd;
fd = open(netns_path, O_RDONLY);
@@ -142,6 +142,13 @@ int switch_save_ns(int new_ns)
return ret;
}
+void switch_close_ns(int fd)
+{
+ if (setns(fd, CLONE_NEWNET))
+ test_error("setns()");
+ close(fd);
+}
+
static int nsfd_outside = -1;
static int nsfd_parent = -1;
static int nsfd_child = -1;
@@ -243,9 +250,9 @@ void __test_init(unsigned int ntests, int family, unsigned int prefix,
test_print("rand seed %u", (unsigned int)seed);
srand(seed);
-
ksft_print_header();
init_namespaces();
+ test_init_ftrace(nsfd_parent, nsfd_child);
if (add_veth(veth_name, nsfd_parent, nsfd_child))
test_error("Failed to add veth");
@@ -296,7 +303,7 @@ static bool is_optmem_namespaced(void)
int old_ns = switch_save_ns(nsfd_child);
optmem_ns = !access(optmem_file, F_OK);
- switch_ns(old_ns);
+ switch_close_ns(old_ns);
}
return !!optmem_ns;
}
@@ -317,7 +324,7 @@ size_t test_get_optmem(void)
test_error("can't read from %s", optmem_file);
fclose(foptmem);
if (!is_optmem_namespaced())
- switch_ns(old_ns);
+ switch_close_ns(old_ns);
return ret;
}
@@ -339,7 +346,7 @@ static void __test_set_optmem(size_t new, size_t *old)
test_error("can't write %zu to %s", new, optmem_file);
fclose(foptmem);
if (!is_optmem_namespaced())
- switch_ns(old_ns);
+ switch_close_ns(old_ns);
}
static void test_revert_optmem(void)
diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c
index 15aeb0963058..0ffda966c677 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/sock.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c
@@ -379,7 +379,6 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
key_dump[0].nkeys = nr_keys;
key_dump[0].get_all = 1;
- key_dump[0].get_all = 1;
err = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS,
key_dump, &key_dump_sz);
if (err) {
diff --git a/tools/testing/selftests/net/tcp_ao/lib/utils.c b/tools/testing/selftests/net/tcp_ao/lib/utils.c
index 372daca525f5..bdf5522c9213 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/utils.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/utils.c
@@ -21,6 +21,32 @@ void randomize_buffer(void *buf, size_t buflen)
}
}
+__printf(3, 4) int test_echo(const char *fname, bool append,
+ const char *fmt, ...)
+{
+ size_t len, written;
+ va_list vargs;
+ char *msg;
+ FILE *f;
+
+ f = fopen(fname, append ? "a" : "w");
+ if (!f)
+ return -errno;
+
+ va_start(vargs, fmt);
+ msg = test_snprintf(fmt, vargs);
+ va_end(vargs);
+ if (!msg) {
+ fclose(f);
+ return -1;
+ }
+ len = strlen(msg);
+ written = fwrite(msg, 1, len, f);
+ fclose(f);
+ free(msg);
+ return written == len ? 0 : -1;
+}
+
const struct sockaddr_in6 addr_any6 = {
.sin6_family = AF_INET6,
};
diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c
index 8fdc808df325..ecc6f1e3a414 100644
--- a/tools/testing/selftests/net/tcp_ao/restore.c
+++ b/tools/testing/selftests/net/tcp_ao/restore.c
@@ -64,6 +64,7 @@ static void try_server_run(const char *tst_name, unsigned int port,
else
test_ok("%s: server alive", tst_name);
}
+ synchronize_threads(); /* 3: counters checks */
if (test_get_tcp_ao_counters(sk, &ao2))
test_error("test_get_tcp_ao_counters()");
after_cnt = netstat_get_one(cnt_name, NULL);
@@ -71,10 +72,10 @@ static void try_server_run(const char *tst_name, unsigned int port,
test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
if (after_cnt <= before_cnt) {
- test_fail("%s: %s counter did not increase: %zu <= %zu",
+ test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
tst_name, cnt_name, after_cnt, before_cnt);
} else {
- test_ok("%s: counter %s increased %zu => %zu",
+ test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64,
tst_name, cnt_name, before_cnt, after_cnt);
}
@@ -82,7 +83,7 @@ static void try_server_run(const char *tst_name, unsigned int port,
* Before close() as that will send FIN and move the peer in TCP_CLOSE
* and that will prevent reading AO counters from the peer's socket.
*/
- synchronize_threads(); /* 3: verified => closed */
+ synchronize_threads(); /* 4: verified => closed */
out:
close(sk);
}
@@ -176,6 +177,7 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port,
else
test_ok("%s: post-migrate connection is alive", tst_name);
}
+ synchronize_threads(); /* 3: counters checks */
if (test_get_tcp_ao_counters(sk, &ao2))
test_error("test_get_tcp_ao_counters()");
after_cnt = netstat_get_one(cnt_name, NULL);
@@ -183,13 +185,13 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port,
test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
if (after_cnt <= before_cnt) {
- test_fail("%s: %s counter did not increase: %zu <= %zu",
+ test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
tst_name, cnt_name, after_cnt, before_cnt);
} else {
- test_ok("%s: counter %s increased %zu => %zu",
+ test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64,
tst_name, cnt_name, before_cnt, after_cnt);
}
- synchronize_threads(); /* 3: verified => closed */
+ synchronize_threads(); /* 4: verified => closed */
close(sk);
}
@@ -206,22 +208,36 @@ static void *client_fn(void *arg)
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
ao_img.snt_isn += 1;
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
+ -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
+ port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
test_sk_restore("TCP-AO with wrong send ISN", port++,
&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
ao_img.rcv_isn += 1;
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
+ -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
+ port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
test_sk_restore("TCP-AO with wrong receive ISN", port++,
&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
ao_img.snd_sne += 1;
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
+ -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
+ /* not expecting server => client mismatches as only snd sne is broken */
test_sk_restore("TCP-AO with wrong send SEQ ext number", port++,
&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
TEST_CNT_NS_BAD | TEST_CNT_GOOD);
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
ao_img.rcv_sne += 1;
+ /* not expecting client => server mismatches as only rcv sne is broken */
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
+ port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++,
&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
TEST_CNT_NS_GOOD | TEST_CNT_BAD);
@@ -231,6 +247,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(20, server_fn, client_fn);
+ test_init(21, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c
index a2fe88d35ac0..6364facaa63e 100644
--- a/tools/testing/selftests/net/tcp_ao/rst.c
+++ b/tools/testing/selftests/net/tcp_ao/rst.c
@@ -455,6 +455,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(14, server_fn, client_fn);
+ test_init(15, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c
index a5698b0a3718..3ecd2b58de6a 100644
--- a/tools/testing/selftests/net/tcp_ao/self-connect.c
+++ b/tools/testing/selftests/net/tcp_ao/self-connect.c
@@ -87,7 +87,7 @@ static void tcp_self_connect(const char *tst, unsigned int port,
netstat_free(ns_after);
if (after_aogood <= before_aogood) {
- test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu",
+ test_fail("%s: TCPAOGood counter mismatch: %" PRIu64 " <= %" PRIu64,
tst, after_aogood, before_aogood);
close(sk);
return;
@@ -148,7 +148,7 @@ static void tcp_self_connect(const char *tst, unsigned int port,
netstat_free(ns_after);
close(sk);
if (after_aogood <= before_aogood) {
- test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu",
+ test_fail("%s: TCPAOGood counter mismatch: %" PRIu64 " <= %" PRIu64,
tst, after_aogood, before_aogood);
return;
}
@@ -163,17 +163,26 @@ static void *client_fn(void *arg)
setup_lo_intf("lo");
tcp_self_connect("self-connect(same keyids)", port++, false, false);
+
+ /* expecting rnext to change based on the first segment RNext != Current */
+ trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr,
+ port, port, 0, -1, -1, -1, -1, -1, 7, 5, -1);
tcp_self_connect("self-connect(different keyids)", port++, true, false);
tcp_self_connect("self-connect(restore)", port, false, true);
- port += 2;
+ port += 2; /* restore test restores over different port */
+ trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr,
+ port, port, 0, -1, -1, -1, -1, -1, 7, 5, -1);
+ /* intentionally on restore they are added to the socket in different order */
+ trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr,
+ port + 1, port + 1, 0, -1, -1, -1, -1, -1, 5, 7, -1);
tcp_self_connect("self-connect(restore, different keyids)", port, true, true);
- port += 2;
+ port += 2; /* restore test restores over different port */
return NULL;
}
int main(int argc, char *argv[])
{
- test_init(4, client_fn, NULL);
+ test_init(5, client_fn, NULL);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c
index ad4e77d6823e..8901a6785dc8 100644
--- a/tools/testing/selftests/net/tcp_ao/seq-ext.c
+++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c
@@ -116,7 +116,15 @@ static void *server_fn(void *arg)
sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
client_new_port, &ao1);
- synchronize_threads(); /* 5: verify counters during SEQ-number rollover */
+ trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_addr,
+ this_ip_dest, test_server_port + 1, client_new_port, 1);
+ trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_dest,
+ this_ip_addr, client_new_port, test_server_port + 1, 1);
+ trace_ao_event_sne_expect(TCP_AO_RCV_SNE_UPDATE, this_ip_addr,
+ this_ip_dest, test_server_port + 1, client_new_port, 1);
+ trace_ao_event_sne_expect(TCP_AO_RCV_SNE_UPDATE, this_ip_dest,
+ this_ip_addr, client_new_port, test_server_port + 1, 1);
+ synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */
bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
if (bytes != quota) {
if (bytes > 0)
@@ -127,6 +135,7 @@ static void *server_fn(void *arg)
test_ok("server alive");
}
+ synchronize_threads(); /* 6: verify counters after SEQ-number rollover */
if (test_get_tcp_ao_counters(sk, &ao2))
test_error("test_get_tcp_ao_counters()");
after_good = netstat_get_one("TCPAOGood", NULL);
@@ -134,15 +143,15 @@ static void *server_fn(void *arg)
test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
if (after_good <= before_good) {
- test_fail("TCPAOGood counter did not increase: %zu <= %zu",
+ test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64,
after_good, before_good);
} else {
- test_ok("TCPAOGood counter increased %zu => %zu",
+ test_ok("TCPAOGood counter increased %" PRIu64 " => %" PRIu64,
before_good, after_good);
}
after_bad = netstat_get_one("TCPAOBad", NULL);
if (after_bad)
- test_fail("TCPAOBad counter is non-zero: %zu", after_bad);
+ test_fail("TCPAOBad counter is non-zero: %" PRIu64, after_bad);
else
test_ok("TCPAOBad counter didn't increase");
test_enable_repair(sk);
@@ -206,12 +215,13 @@ static void *client_fn(void *arg)
sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
test_server_port + 1, &ao1);
- synchronize_threads(); /* 5: verify counters during SEQ-number rollover */
+ synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */
if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
test_fail("post-migrate verify failed");
else
test_ok("post-migrate connection alive");
+ synchronize_threads(); /* 5: verify counters after SEQ-number rollover */
if (test_get_tcp_ao_counters(sk, &ao2))
test_error("test_get_tcp_ao_counters()");
after_good = netstat_get_one("TCPAOGood", NULL);
@@ -219,15 +229,15 @@ static void *client_fn(void *arg)
test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
if (after_good <= before_good) {
- test_fail("TCPAOGood counter did not increase: %zu <= %zu",
+ test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64,
after_good, before_good);
} else {
- test_ok("TCPAOGood counter increased %zu => %zu",
+ test_ok("TCPAOGood counter increased %" PRIu64 " => %" PRIu64,
before_good, after_good);
}
after_bad = netstat_get_one("TCPAOBad", NULL);
if (after_bad)
- test_fail("TCPAOBad counter is non-zero: %zu", after_bad);
+ test_fail("TCPAOBad counter is non-zero: %" PRIu64, after_bad);
else
test_ok("TCPAOBad counter didn't increase");
@@ -240,6 +250,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(7, server_fn, client_fn);
+ test_init(8, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
index 517930f9721b..084db4ecdff6 100644
--- a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
+++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
@@ -30,8 +30,8 @@ static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info,
#define __cmp_ao(member) \
do { \
if (info->member != tmp.member) { \
- test_fail("%s: getsockopt(): " __stringify(member) " %zu != %zu", \
- tst, (size_t)info->member, (size_t)tmp.member); \
+ test_fail("%s: getsockopt(): " __stringify(member) " %" PRIu64 " != %" PRIu64, \
+ tst, (uint64_t)info->member, (uint64_t)tmp.member); \
return; \
} \
} while(0)
@@ -830,6 +830,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(120, client_fn, NULL);
+ test_init(121, client_fn, NULL);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
index 6b59a652159f..f779e5892bc1 100644
--- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
+++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
@@ -70,6 +70,7 @@ static void try_accept(const char *tst_name, unsigned int port,
timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
err = test_wait_fd(lsk, timeout, 0);
+ synchronize_threads(); /* connect()/accept() timeouts */
if (err == -ETIMEDOUT) {
if (!fault(TIMEOUT))
test_fail("timed out for accept()");
@@ -100,10 +101,10 @@ static void try_accept(const char *tst_name, unsigned int port,
after_cnt = netstat_get_one(cnt_name, NULL);
if (after_cnt <= before_cnt) {
- test_fail("%s: %s counter did not increase: %zu <= %zu",
+ test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
tst_name, cnt_name, after_cnt, before_cnt);
} else {
- test_ok("%s: counter %s increased %zu => %zu",
+ test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64,
tst_name, cnt_name, before_cnt, after_cnt);
}
if (ao_addr)
@@ -283,6 +284,7 @@ static void try_connect(const char *tst_name, unsigned int port,
timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+ synchronize_threads(); /* connect()/accept() timeouts */
if (ret < 0) {
if (fault(KEYREJECT) && ret == -EKEYREJECTED)
test_ok("%s: connect() was prevented", tst_name);
@@ -451,6 +453,7 @@ static void try_to_add(const char *tst_name, unsigned int port,
timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+ synchronize_threads(); /* connect()/accept() timeouts */
if (ret <= 0) {
test_error("%s: connect() returned %d", tst_name, ret);
goto out;
@@ -671,24 +674,38 @@ static void *client_fn(void *arg)
try_connect("AO server (INADDR_ANY): AO client", port++, NULL, 0,
&addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr);
+ trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, this_ip_addr,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+ trace_hash_event_expect(TCP_HASH_AO_REQUIRED, this_ip_addr,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO server (INADDR_ANY): unsigned client", port++, NULL, 0,
NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr);
try_connect("AO server (AO_REQUIRED): AO client", port++, NULL, 0,
&addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr);
+ trace_hash_event_expect(TCP_HASH_AO_REQUIRED, client2,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0,
NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &client2);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("MD5 server (INADDR_ANY): AO client", port++, NULL, 0,
&addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
try_connect("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr);
+ trace_hash_event_expect(TCP_HASH_MD5_REQUIRED, this_ip_addr,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("MD5 server (INADDR_ANY): no sign client", port++, NULL, 0,
NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("no sign server: AO client", port++, NULL, 0,
&addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr);
+ trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, this_ip_addr,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("no sign server: MD5 client", port++, &addr_any, 0,
NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
try_connect("no sign server: no sign client", port++, NULL, 0,
@@ -696,25 +713,37 @@ static void *client_fn(void *arg)
try_connect("AO+MD5 server: AO client (matching)", port++, NULL, 0,
&addr_any, 0, 100, 100, 0, 0, 1, &client2);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("AO+MD5 server: AO client (misconfig, matching MD5)",
port++, NULL, 0, &addr_any, 0, 100, 100, 0,
FAULT_TIMEOUT, 1, &this_ip_addr);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, client3, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("AO+MD5 server: AO client (misconfig, non-matching)",
port++, NULL, 0, &addr_any, 0, 100, 100, 0,
FAULT_TIMEOUT, 1, &client3);
try_connect("AO+MD5 server: MD5 client (matching)", port++, &addr_any, 0,
NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr);
+ trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, client2,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO+MD5 server: MD5 client (misconfig, matching AO)",
port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
1, &client2);
+ trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, client3,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO+MD5 server: MD5 client (misconfig, non-matching)",
port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
1, &client3);
try_connect("AO+MD5 server: no sign client (unmatched)",
port++, NULL, 0, NULL, 0, 100, 100, 0, 0, 1, &client3);
+ trace_hash_event_expect(TCP_HASH_AO_REQUIRED, client2,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO+MD5 server: no sign client (misconfig, matching AO)",
port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
1, &client2);
+ trace_hash_event_expect(TCP_HASH_MD5_REQUIRED, this_ip_addr,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO+MD5 server: no sign client (misconfig, matching MD5)",
port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
1, &this_ip_addr);
@@ -736,6 +765,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(72, server_fn, client_fn);
+ test_init(73, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index ec60a16c9307..d626f22f9550 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -356,8 +356,12 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
}
}
- if (batch > 1)
+ if (batch > 1) {
fprintf(stderr, "batched %d timestamps\n", batch);
+ } else if (!batch) {
+ fprintf(stderr, "Failed to report timestamps\n");
+ test_failed = true;
+ }
}
static int recv_errmsg(int fd)
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index 11a1ebda564f..d5ffd8c9172e 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -7,8 +7,6 @@ source net_helper.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="xdp_dummy.bpf.o"
-
# set global exit status, but never reset nonzero one.
check_err()
{
@@ -38,7 +36,7 @@ cfg_veth() {
ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24
ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
ip -netns "${PEER_NS}" link set dev veth1 up
- ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp
+ ip netns exec "${PEER_NS}" ethtool -K veth1 gro on
}
run_one() {
@@ -46,17 +44,19 @@ run_one() {
local -r all="$@"
local -r tx_args=${all%rx*}
local -r rx_args=${all#*rx}
+ local ret=0
cfg_veth
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} && \
- echo "ok" || \
- echo "failed" &
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} &
+ local PID1=$!
wait_local_port_listen ${PEER_NS} 8000 udp
./udpgso_bench_tx ${tx_args}
- ret=$?
- wait $(jobs -p)
+ check_err $?
+ wait ${PID1}
+ check_err $?
+ [ "$ret" -eq 0 ] && echo "ok" || echo "failed"
return $ret
}
@@ -73,6 +73,7 @@ run_one_nat() {
local -r all="$@"
local -r tx_args=${all%rx*}
local -r rx_args=${all#*rx}
+ local ret=0
if [[ ${tx_args} = *-4* ]]; then
ipt_cmd=iptables
@@ -93,16 +94,17 @@ run_one_nat() {
# ... so that GRO will match the UDP_GRO enabled socket, but packets
# will land on the 'plain' one
ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 &
- pid=$!
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} && \
- echo "ok" || \
- echo "failed"&
+ local PID1=$!
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} &
+ local PID2=$!
wait_local_port_listen "${PEER_NS}" 8000 udp
./udpgso_bench_tx ${tx_args}
- ret=$?
- kill -INT $pid
- wait $(jobs -p)
+ check_err $?
+ kill -INT ${PID1}
+ wait ${PID2}
+ check_err $?
+ [ "$ret" -eq 0 ] && echo "ok" || echo "failed"
return $ret
}
@@ -111,20 +113,26 @@ run_one_2sock() {
local -r all="$@"
local -r tx_args=${all%rx*}
local -r rx_args=${all#*rx}
+ local ret=0
cfg_veth
ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 &
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} && \
- echo "ok" || \
- echo "failed" &
+ local PID1=$!
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} &
+ local PID2=$!
wait_local_port_listen "${PEER_NS}" 12345 udp
./udpgso_bench_tx ${tx_args} -p 12345
+ check_err $?
wait_local_port_listen "${PEER_NS}" 8000 udp
./udpgso_bench_tx ${tx_args}
- ret=$?
- wait $(jobs -p)
+ check_err $?
+ wait ${PID1}
+ check_err $?
+ wait ${PID2}
+ check_err $?
+ [ "$ret" -eq 0 ] && echo "ok" || echo "failed"
return $ret
}
@@ -196,11 +204,6 @@ run_all() {
return $ret
}
-if [ ! -f ${BPF_FILE} ]; then
- echo "Missing ${BPF_FILE}. Run 'make' first"
- exit -1
-fi
-
if [[ $# -eq 0 ]]; then
run_all
elif [[ $1 == "__subprocess" ]]; then
diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh
index f52aa5f7da52..3e751234ccfe 100755
--- a/tools/testing/selftests/net/unicast_extensions.sh
+++ b/tools/testing/selftests/net/unicast_extensions.sh
@@ -30,14 +30,7 @@
source lib.sh
-# nettest can be run from PATH or from same directory as this selftest
-if ! which nettest >/dev/null; then
- PATH=$PWD:$PATH
- if ! which nettest >/dev/null; then
- echo "'nettest' command not found; skipping tests"
- exit $ksft_skip
- fi
-fi
+check_gen_prog "nettest"
result=0
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
index 152171fb1fc8..e9c2f71da207 100755
--- a/tools/testing/selftests/net/vrf_route_leaking.sh
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -59,7 +59,6 @@
# while it is forwarded between different vrfs.
source lib.sh
-PATH=$PWD:$PWD/tools/testing/selftests/net:$PATH
VERBOSE=0
PAUSE_ON_FAIL=no
DEFAULT_TTYPE=sym
@@ -636,6 +635,8 @@ EOF
# Some systems don't have a ping6 binary anymore
command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping)
+check_gen_prog "nettest"
+
TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_local ipv4_tcp_local
ipv4_udp_local ipv4_ping_ttl_asym ipv4_traceroute_asym"
TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_local ipv6_tcp_local ipv6_udp_local
diff --git a/tools/testing/selftests/net/xfrm_policy_add_speed.sh b/tools/testing/selftests/net/xfrm_policy_add_speed.sh
new file mode 100755
index 000000000000..2fab29d3cb91
--- /dev/null
+++ b/tools/testing/selftests/net/xfrm_policy_add_speed.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+source lib.sh
+
+timeout=4m
+ret=0
+tmp=$(mktemp)
+cleanup() {
+ cleanup_all_ns
+ rm -f "$tmp"
+}
+
+trap cleanup EXIT
+
+maxpolicies=100000
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && maxpolicies=10000
+
+do_dummies4() {
+ local dir="$1"
+ local max="$2"
+
+ local policies
+ local pfx
+ pfx=30
+ policies=0
+
+ ip netns exec "$ns" ip xfrm policy flush
+
+ for i in $(seq 1 100);do
+ local s
+ local d
+ for j in $(seq 1 255);do
+ s=$((i+0))
+ d=$((i+100))
+
+ for a in $(seq 1 8 255); do
+ policies=$((policies+1))
+ [ "$policies" -gt "$max" ] && return
+ echo xfrm policy add src 10.$s.$j.0/30 dst 10.$d.$j.$a/$pfx dir $dir action block
+ done
+ for a in $(seq 1 8 255); do
+ policies=$((policies+1))
+ [ "$policies" -gt "$max" ] && return
+ echo xfrm policy add src 10.$s.$j.$a/30 dst 10.$d.$j.0/$pfx dir $dir action block
+ done
+ done
+ done
+}
+
+setup_ns ns
+
+do_bench()
+{
+ local max="$1"
+
+ start=$(date +%s%3N)
+ do_dummies4 "out" "$max" > "$tmp"
+ if ! timeout "$timeout" ip netns exec "$ns" ip -batch "$tmp";then
+ echo "WARNING: policy insertion cancelled after $timeout"
+ ret=1
+ fi
+ stop=$(date +%s%3N)
+
+ result=$((stop-start))
+
+ policies=$(wc -l < "$tmp")
+ printf "Inserted %-06s policies in $result ms\n" $policies
+
+ have=$(ip netns exec "$ns" ip xfrm policy show | grep "action block" | wc -l)
+ if [ "$have" -ne "$policies" ]; then
+ echo "WARNING: mismatch, have $have policies, expected $policies"
+ ret=1
+ fi
+}
+
+p=100
+while [ $p -le "$maxpolicies" ]; do
+ do_bench "$p"
+ p="${p}0"
+done
+
+exit $ret
diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 3fbabab46958..8de98ea7af80 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -1,19 +1,21 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for nolibc tests
-include ../../../scripts/Makefile.include
-include ../../../scripts/utilities.mak
-# We need this for the "cc-option" macro.
-include ../../../build/Build.include
+# we're in ".../tools/testing/selftests/nolibc"
+ifeq ($(srctree),)
+srctree := $(patsubst %/tools/testing/selftests/,%,$(dir $(CURDIR)))
+endif
+
+include $(srctree)/tools/scripts/utilities.mak
+# We need this for the "__cc-option" macro.
+include $(srctree)/scripts/Makefile.compiler
ifneq ($(O),)
ifneq ($(call is-absolute,$(O)),y)
$(error Only absolute O= parameters are supported)
endif
-endif
-
-# we're in ".../tools/testing/selftests/nolibc"
-ifeq ($(srctree),)
-srctree := $(patsubst %/tools/testing/selftests/,%,$(dir $(CURDIR)))
+objtree := $(O)
+else
+objtree ?= $(srctree)
endif
ifeq ($(ARCH),)
@@ -21,7 +23,7 @@ include $(srctree)/scripts/subarch.include
ARCH = $(SUBARCH)
endif
-objtree ?= $(srctree)
+cc-option = $(call __cc-option, $(CC),$(CLANG_CROSS_FLAGS),$(1),$(2))
# XARCH extends the kernel's ARCH with a few variants of the same
# architecture that only differ by the configuration, the toolchain
@@ -155,9 +157,22 @@ CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wex
$(CFLAGS_$(XARCH)) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_EXTRA)
LDFLAGS :=
+LIBGCC := -lgcc
+
+ifneq ($(LLVM),)
+# Not needed for clang
+LIBGCC :=
+endif
+
+# Modify CFLAGS based on LLVM=
+include $(srctree)/tools/scripts/Makefile.include
+
+# GCC uses "s390", clang "systemz"
+CLANG_CROSS_FLAGS := $(subst --target=s390-linux,--target=systemz-linux,$(CLANG_CROSS_FLAGS))
+
REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \
END{ printf("\n%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \
- if (f) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \
+ if (f || !p) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \
printf("\nSee all results in %s\n", ARGV[1]); }'
help:
@@ -204,11 +219,11 @@ sysroot/$(ARCH)/include:
ifneq ($(NOLIBC_SYSROOT),0)
nolibc-test: nolibc-test.c nolibc-test-linkage.c sysroot/$(ARCH)/include
$(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
- -nostdlib -nostdinc -static -Isysroot/$(ARCH)/include nolibc-test.c nolibc-test-linkage.c -lgcc
+ -nostdlib -nostdinc -static -Isysroot/$(ARCH)/include nolibc-test.c nolibc-test-linkage.c $(LIBGCC)
else
nolibc-test: nolibc-test.c nolibc-test-linkage.c
$(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
- -nostdlib -static -include $(srctree)/tools/include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c -lgcc
+ -nostdlib -static -include $(srctree)/tools/include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c $(LIBGCC)
endif
libc-test: nolibc-test.c nolibc-test-linkage.c
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 093d0512f4c5..6fba7025c5e3 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -542,7 +542,7 @@ int expect_strzr(const char *expr, int llen)
{
int ret = 0;
- llen += printf(" = <%s> ", expr);
+ llen += printf(" = <%s> ", expr ? expr : "(null)");
if (expr) {
ret = 1;
result(llen, FAIL);
@@ -561,7 +561,7 @@ int expect_strnz(const char *expr, int llen)
{
int ret = 0;
- llen += printf(" = <%s> ", expr);
+ llen += printf(" = <%s> ", expr ? expr : "(null)");
if (!expr) {
ret = 1;
result(llen, FAIL);
@@ -686,9 +686,10 @@ static void constructor1(void)
}
__attribute__((constructor))
-static void constructor2(void)
+static void constructor2(int argc, char **argv, char **envp)
{
- constructor_test_value *= 2;
+ if (argc && argv && envp)
+ constructor_test_value *= 2;
}
int run_startup(int min, int max)
diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh
index 0446e6326a40..e7ecda4ae796 100755
--- a/tools/testing/selftests/nolibc/run-tests.sh
+++ b/tools/testing/selftests/nolibc/run-tests.sh
@@ -15,10 +15,11 @@ download_location="${cache_dir}/crosstools/"
build_location="$(realpath "${cache_dir}"/nolibc-tests/)"
perform_download=0
test_mode=system
-CFLAGS_EXTRA="-Werror"
+werror=1
+llvm=
archs="i386 x86_64 arm64 arm mips32le mips32be ppc ppc64 ppc64le riscv s390 loongarch"
-TEMP=$(getopt -o 'j:d:c:b:a:m:peh' -n "$0" -- "$@")
+TEMP=$(getopt -o 'j:d:c:b:a:m:pelh' -n "$0" -- "$@")
eval set -- "$TEMP"
unset TEMP
@@ -42,6 +43,7 @@ Options:
-b [DIR] Build location (default: ${build_location})
-m [MODE] Test mode user/system (default: ${test_mode})
-e Disable -Werror
+ -l Build with LLVM/clang
EOF
}
@@ -69,7 +71,10 @@ while true; do
test_mode="$2"
shift 2; continue ;;
'-e')
- CFLAGS_EXTRA=""
+ werror=0
+ shift; continue ;;
+ '-l')
+ llvm=1
shift; continue ;;
'-h')
print_usage
@@ -140,7 +145,10 @@ test_arch() {
ct_abi=$(crosstool_abi "$1")
cross_compile=$(realpath "${download_location}gcc-${crosstool_version}-nolibc/${ct_arch}-${ct_abi}/bin/${ct_arch}-${ct_abi}-")
build_dir="${build_location}/${arch}"
- MAKE=(make -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" O="${build_dir}")
+ if [ "$werror" -ne 0 ]; then
+ CFLAGS_EXTRA="$CFLAGS_EXTRA -Werror"
+ fi
+ MAKE=(make -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" LLVM="${llvm}" O="${build_dir}")
mkdir -p "$build_dir"
if [ "$test_mode" = "system" ] && [ ! -f "${build_dir}/.config" ]; then
diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile
index 1321922038d0..ca4483c238b9 100644
--- a/tools/testing/selftests/powerpc/benchmarks/Makefile
+++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
@@ -18,4 +18,4 @@ $(OUTPUT)/context_switch: LDLIBS += -lpthread
$(OUTPUT)/fork: LDLIBS += -lpthread
-$(OUTPUT)/exec_target: CFLAGS += -static -nostartfiles
+$(OUTPUT)/exec_target: CFLAGS += -nostartfiles
diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
index c14b0fc1edde..a6408d3f26cd 100644
--- a/tools/testing/selftests/powerpc/benchmarks/exec_target.c
+++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
@@ -7,10 +7,22 @@
*/
#define _GNU_SOURCE
-#include <unistd.h>
#include <sys/syscall.h>
void _start(void)
{
- syscall(SYS_exit, 0);
+ asm volatile (
+ "li %%r0, %[sys_exit];"
+ "li %%r3, 0;"
+ "sc;"
+ :
+ : [sys_exit] "i" (SYS_exit)
+ /*
+ * "sc" will clobber r0, r3-r13, cr0, ctr, xer and memory.
+ * Even though sys_exit never returns, handle clobber
+ * registers.
+ */
+ : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
+ "r11", "r12", "r13", "cr0", "ctr", "xer", "memory"
+ );
}
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index b33cd8753689..ad79784e552d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -68,6 +68,8 @@ config_override_param "--gdb options" KcList "$TORTURE_KCONFIG_GDB_ARG"
config_override_param "--kasan options" KcList "$TORTURE_KCONFIG_KASAN_ARG"
config_override_param "--kcsan options" KcList "$TORTURE_KCONFIG_KCSAN_ARG"
config_override_param "--kconfig argument" KcList "$TORTURE_KCONFIG_ARG"
+config_override_param "$config_dir/CFcommon.$(uname -m)" KcList \
+ "`cat $config_dir/CFcommon.$(uname -m) 2> /dev/null`"
cp $T/KcList $resdir/ConfigFragment
base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index 990d24696fd3..0447c4a00cc4 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -19,10 +19,10 @@ PATH=${RCUTORTURE}/bin:$PATH; export PATH
TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`"
MAKE_ALLOTED_CPUS=$((TORTURE_ALLOTED_CPUS*2))
-HALF_ALLOTED_CPUS=$((TORTURE_ALLOTED_CPUS/2))
-if test "$HALF_ALLOTED_CPUS" -lt 1
+SCALE_ALLOTED_CPUS=$((TORTURE_ALLOTED_CPUS/2))
+if test "$SCALE_ALLOTED_CPUS" -lt 1
then
- HALF_ALLOTED_CPUS=1
+ SCALE_ALLOTED_CPUS=1
fi
VERBOSE_BATCH_CPUS=$((TORTURE_ALLOTED_CPUS/16))
if test "$VERBOSE_BATCH_CPUS" -lt 2
@@ -90,6 +90,7 @@ usage () {
echo " --do-scftorture / --do-no-scftorture / --no-scftorture"
echo " --do-srcu-lockdep / --do-no-srcu-lockdep / --no-srcu-lockdep"
echo " --duration [ <minutes> | <hours>h | <days>d ]"
+ echo " --guest-cpu-limit N"
echo " --kcsan-kmake-arg kernel-make-arguments"
exit 1
}
@@ -203,6 +204,21 @@ do
duration_base=$(($ts*mult))
shift
;;
+ --guest-cpu-limit|--guest-cpu-lim)
+ checkarg --guest-cpu-limit "(number)" "$#" "$2" '^[0-9]*$' '^--'
+ if (("$2" <= "$TORTURE_ALLOTED_CPUS" / 2))
+ then
+ SCALE_ALLOTED_CPUS="$2"
+ VERBOSE_BATCH_CPUS="$((SCALE_ALLOTED_CPUS/8))"
+ if (("$VERBOSE_BATCH_CPUS" < 2))
+ then
+ VERBOSE_BATCH_CPUS=0
+ fi
+ else
+ echo "Ignoring value of $2 for --guest-cpu-limit which is greater than (("$TORTURE_ALLOTED_CPUS" / 2))."
+ fi
+ shift
+ ;;
--kcsan-kmake-arg|--kcsan-kmake-args)
checkarg --kcsan-kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
kcsan_kmake_args="`echo "$kcsan_kmake_args $2" | sed -e 's/^ *//' -e 's/ *$//'`"
@@ -425,9 +441,9 @@ fi
if test "$do_scftorture" = "yes"
then
# Scale memory based on the number of CPUs.
- scfmem=$((3+HALF_ALLOTED_CPUS/16))
- torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1"
- torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory ${scfmem}G --trust-make
+ scfmem=$((3+SCALE_ALLOTED_CPUS/16))
+ torture_bootargs="scftorture.nthreads=$SCALE_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1"
+ torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$SCALE_ALLOTED_CPUS" --memory ${scfmem}G --trust-make
fi
if test "$do_rt" = "yes"
@@ -471,8 +487,8 @@ for prim in $primlist
do
if test -n "$firsttime"
then
- torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot"
- torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "refscale.verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
+ torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$SCALE_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot"
+ torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$SCALE_ALLOTED_CPUS" --bootargs "refscale.verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
mv $T/last-resdir-nodebug $T/first-resdir-nodebug || :
if test -f "$T/last-resdir-kasan"
then
@@ -520,8 +536,8 @@ for prim in $primlist
do
if test -n "$firsttime"
then
- torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot"
- torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
+ torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$SCALE_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot"
+ torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$SCALE_ALLOTED_CPUS" --trust-make
mv $T/last-resdir-nodebug $T/first-resdir-nodebug || :
if test -f "$T/last-resdir-kasan"
then
@@ -559,7 +575,7 @@ do_kcsan="$do_kcsan_save"
if test "$do_kvfree" = "yes"
then
torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot"
- torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration $duration_rcutorture --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
+ torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration $duration_rcutorture --kconfig "CONFIG_NR_CPUS=$SCALE_ALLOTED_CPUS" --memory 2G --trust-make
fi
if test "$do_clocksourcewd" = "yes"
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
index 0e92d85313aa..217597e84905 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
@@ -1,7 +1,5 @@
CONFIG_RCU_TORTURE_TEST=y
CONFIG_PRINTK_TIME=y
-CONFIG_HYPERVISOR_GUEST=y
CONFIG_PARAVIRT=y
-CONFIG_KVM_GUEST=y
CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n
CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon.i686 b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon.i686
new file mode 100644
index 000000000000..d8b2f555686f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon.i686
@@ -0,0 +1,2 @@
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_KVM_GUEST=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon.ppc64le b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon.ppc64le
new file mode 100644
index 000000000000..133da04247ee
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon.ppc64le
@@ -0,0 +1 @@
+CONFIG_KVM_GUEST=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon.x86_64 b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon.x86_64
new file mode 100644
index 000000000000..d8b2f555686f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon.x86_64
@@ -0,0 +1,2 @@
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_KVM_GUEST=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot
index 979edbf4c820..55ce305b2a3d 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot
@@ -2,3 +2,4 @@ nohz_full=2-9
rcutorture.stall_cpu=14
rcutorture.stall_cpu_holdoff=90
rcutorture.fwd_progress=0
+rcutree.nohz_full_patience_delay=1000
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/TINY b/tools/testing/selftests/rcutorture/configs/refscale/TINY
new file mode 100644
index 000000000000..759343980b80
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/refscale/TINY
@@ -0,0 +1,20 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
+#CHECK#CONFIG_PREEMPT_RCU=n
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index 742782438ca3..94cfdba5308d 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -290,12 +290,12 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param
static bool arch_supports_noncont_cat(const struct resctrl_test *test)
{
- unsigned int eax, ebx, ecx, edx;
-
/* AMD always supports non-contiguous CBM. */
if (get_vendor() == ARCH_AMD)
return true;
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ unsigned int eax, ebx, ecx, edx;
/* Intel support for non-contiguous CBM needs to be discovered. */
if (!strcmp(test->resource, "L3"))
__cpuid_count(0x10, 1, eax, ebx, ecx, edx);
@@ -305,6 +305,9 @@ static bool arch_supports_noncont_cat(const struct resctrl_test *test)
return false;
return ((ecx >> 3) & 1);
+#endif /* end arch */
+
+ return false;
}
static int noncont_cat_run_test(const struct resctrl_test *test,
diff --git a/tools/testing/selftests/riscv/mm/mmap_bottomup.c b/tools/testing/selftests/riscv/mm/mmap_bottomup.c
index 7f7d3eb8b9c9..f9ccae50349b 100644
--- a/tools/testing/selftests/riscv/mm/mmap_bottomup.c
+++ b/tools/testing/selftests/riscv/mm/mmap_bottomup.c
@@ -7,8 +7,6 @@
TEST(infinite_rlimit)
{
EXPECT_EQ(BOTTOM_UP, memory_layout());
-
- TEST_MMAPS;
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/mm/mmap_default.c b/tools/testing/selftests/riscv/mm/mmap_default.c
index 2ba3ec990006..3f53b6ecc326 100644
--- a/tools/testing/selftests/riscv/mm/mmap_default.c
+++ b/tools/testing/selftests/riscv/mm/mmap_default.c
@@ -7,8 +7,6 @@
TEST(default_rlimit)
{
EXPECT_EQ(TOP_DOWN, memory_layout());
-
- TEST_MMAPS;
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/mm/mmap_test.h b/tools/testing/selftests/riscv/mm/mmap_test.h
index 3b29ca3bb3d4..75918d15919f 100644
--- a/tools/testing/selftests/riscv/mm/mmap_test.h
+++ b/tools/testing/selftests/riscv/mm/mmap_test.h
@@ -10,76 +10,9 @@
#define TOP_DOWN 0
#define BOTTOM_UP 1
-#if __riscv_xlen == 64
-uint64_t random_addresses[] = {
- 0x19764f0d73b3a9f0, 0x016049584cecef59, 0x3580bdd3562f4acd,
- 0x1164219f20b17da0, 0x07d97fcb40ff2373, 0x76ec528921272ee7,
- 0x4dd48c38a3de3f70, 0x2e11415055f6997d, 0x14b43334ac476c02,
- 0x375a60795aff19f6, 0x47f3051725b8ee1a, 0x4e697cf240494a9f,
- 0x456b59b5c2f9e9d1, 0x101724379d63cb96, 0x7fe9ad31619528c1,
- 0x2f417247c495c2ea, 0x329a5a5b82943a5e, 0x06d7a9d6adcd3827,
- 0x327b0b9ee37f62d5, 0x17c7b1851dfd9b76, 0x006ebb6456ec2cd9,
- 0x00836cd14146a134, 0x00e5c4dcde7126db, 0x004c29feadf75753,
- 0x00d8b20149ed930c, 0x00d71574c269387a, 0x0006ebe4a82acb7a,
- 0x0016135df51f471b, 0x00758bdb55455160, 0x00d0bdd949b13b32,
- 0x00ecea01e7c5f54b, 0x00e37b071b9948b1, 0x0011fdd00ff57ab3,
- 0x00e407294b52f5ea, 0x00567748c200ed20, 0x000d073084651046,
- 0x00ac896f4365463c, 0x00eb0d49a0b26216, 0x0066a2564a982a31,
- 0x002e0d20237784ae, 0x0000554ff8a77a76, 0x00006ce07a54c012,
- 0x000009570516d799, 0x00000954ca15b84d, 0x0000684f0d453379,
- 0x00002ae5816302b5, 0x0000042403fb54bf, 0x00004bad7392bf30,
- 0x00003e73bfa4b5e3, 0x00005442c29978e0, 0x00002803f11286b6,
- 0x000073875d745fc6, 0x00007cede9cb8240, 0x000027df84cc6a4f,
- 0x00006d7e0e74242a, 0x00004afd0b836e02, 0x000047d0e837cd82,
- 0x00003b42405efeda, 0x00001531bafa4c95, 0x00007172cae34ac4,
-};
-#else
-uint32_t random_addresses[] = {
- 0x8dc302e0, 0x929ab1e0, 0xb47683ba, 0xea519c73, 0xa19f1c90, 0xc49ba213,
- 0x8f57c625, 0xadfe5137, 0x874d4d95, 0xaa20f09d, 0xcf21ebfc, 0xda7737f1,
- 0xcedf392a, 0x83026c14, 0xccedca52, 0xc6ccf826, 0xe0cd9415, 0x997472ca,
- 0xa21a44c1, 0xe82196f5, 0xa23fd66b, 0xc28d5590, 0xd009cdce, 0xcf0be646,
- 0x8fc8c7ff, 0xe2a85984, 0xa3d3236b, 0x89a0619d, 0xc03db924, 0xb5d4cc1b,
- 0xb96ee04c, 0xd191da48, 0xb432a000, 0xaa2bebbc, 0xa2fcb289, 0xb0cca89b,
- 0xb0c18d6a, 0x88f58deb, 0xa4d42d1c, 0xe4d74e86, 0x99902b09, 0x8f786d31,
- 0xbec5e381, 0x9a727e65, 0xa9a65040, 0xa880d789, 0x8f1b335e, 0xfc821c1e,
- 0x97e34be4, 0xbbef84ed, 0xf447d197, 0xfd7ceee2, 0xe632348d, 0xee4590f4,
- 0x958992a5, 0xd57e05d6, 0xfd240970, 0xc5b0dcff, 0xd96da2c2, 0xa7ae041d,
-};
-#endif
-
-// Only works on 64 bit
-#if __riscv_xlen == 64
#define PROT (PROT_READ | PROT_WRITE)
#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
-/* mmap must return a value that doesn't use more bits than the hint address. */
-static inline unsigned long get_max_value(unsigned long input)
-{
- unsigned long max_bit = (1UL << (((sizeof(unsigned long) * 8) - 1 -
- __builtin_clzl(input))));
-
- return max_bit + (max_bit - 1);
-}
-
-#define TEST_MMAPS \
- ({ \
- void *mmap_addr; \
- for (int i = 0; i < ARRAY_SIZE(random_addresses); i++) { \
- mmap_addr = mmap((void *)random_addresses[i], \
- 5 * sizeof(int), PROT, FLAGS, 0, 0); \
- EXPECT_NE(MAP_FAILED, mmap_addr); \
- EXPECT_GE((void *)get_max_value(random_addresses[i]), \
- mmap_addr); \
- mmap_addr = mmap((void *)random_addresses[i], \
- 5 * sizeof(int), PROT, FLAGS, 0, 0); \
- EXPECT_NE(MAP_FAILED, mmap_addr); \
- EXPECT_GE((void *)get_max_value(random_addresses[i]), \
- mmap_addr); \
- } \
- })
-#endif /* __riscv_xlen == 64 */
-
static inline int memory_layout(void)
{
void *value1 = mmap(NULL, sizeof(int), PROT, FLAGS, 0, 0);
diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c
index 63ce02d1d5cc..9647b14b47c5 100644
--- a/tools/testing/selftests/rtc/rtctest.c
+++ b/tools/testing/selftests/rtc/rtctest.c
@@ -410,13 +410,6 @@ TEST_F_TIMEOUT(rtc, alarm_wkalm_set_minute, 65) {
ASSERT_EQ(new, secs);
}
-static void __attribute__((constructor))
-__constructor_order_last(void)
-{
- if (!__constructor_order)
- __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD;
-}
-
int main(int argc, char **argv)
{
switch (argc) {
diff --git a/tools/testing/selftests/rust/config b/tools/testing/selftests/rust/config
index b4002acd40bc..5f942b5c8c17 100644
--- a/tools/testing/selftests/rust/config
+++ b/tools/testing/selftests/rust/config
@@ -1,5 +1,6 @@
+# CONFIG_GCC_PLUGINS is not set
CONFIG_RUST=y
CONFIG_SAMPLES=y
CONFIG_SAMPLES_RUST=y
CONFIG_SAMPLE_RUST_MINIMAL=m
-CONFIG_SAMPLE_RUST_PRINT=m \ No newline at end of file
+CONFIG_SAMPLE_RUST_PRINT=m
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index ee349187636f..4f255cec0c22 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -143,7 +143,6 @@ class PluginMgr:
except Exception as ee:
print('exception {} in call to pre_case for {} plugin'.
format(ee, pgn_inst.__class__))
- print('test_ordinal is {}'.format(test_ordinal))
print('testid is {}'.format(caseinfo['id']))
raise
diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c
index 4421cd562c24..18e794a46c23 100644
--- a/tools/testing/selftests/timers/change_skew.c
+++ b/tools/testing/selftests/timers/change_skew.c
@@ -30,9 +30,6 @@
#include <time.h>
#include "../kselftest.h"
-#define NSEC_PER_SEC 1000000000LL
-
-
int change_skew_test(int ppm)
{
struct timex tx;
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index 07c81c0093c0..16bd49492efa 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -6,10 +6,13 @@
*
* Kernel loop code stolen from Steven Rostedt <srostedt@redhat.com>
*/
-
+#define _GNU_SOURCE
#include <sys/time.h>
+#include <sys/types.h>
#include <stdio.h>
#include <signal.h>
+#include <stdint.h>
+#include <string.h>
#include <unistd.h>
#include <time.h>
#include <pthread.h>
@@ -18,6 +21,21 @@
#define DELAY 2
#define USECS_PER_SEC 1000000
+#define NSECS_PER_SEC 1000000000
+
+static void __fatal_error(const char *test, const char *name, const char *what)
+{
+ char buf[64];
+
+ strerror_r(errno, buf, sizeof(buf));
+
+ if (name && strlen(name))
+ ksft_exit_fail_msg("%s %s %s %s\n", test, name, what, buf);
+ else
+ ksft_exit_fail_msg("%s %s %s\n", test, what, buf);
+}
+
+#define fatal_error(name, what) __fatal_error(__func__, name, what)
static volatile int done;
@@ -74,24 +92,13 @@ static int check_diff(struct timeval start, struct timeval end)
return 0;
}
-static int check_itimer(int which)
+static void check_itimer(int which, const char *name)
{
- const char *name;
- int err;
struct timeval start, end;
struct itimerval val = {
.it_value.tv_sec = DELAY,
};
- if (which == ITIMER_VIRTUAL)
- name = "ITIMER_VIRTUAL";
- else if (which == ITIMER_PROF)
- name = "ITIMER_PROF";
- else if (which == ITIMER_REAL)
- name = "ITIMER_REAL";
- else
- return -1;
-
done = 0;
if (which == ITIMER_VIRTUAL)
@@ -101,17 +108,11 @@ static int check_itimer(int which)
else if (which == ITIMER_REAL)
signal(SIGALRM, sig_handler);
- err = gettimeofday(&start, NULL);
- if (err < 0) {
- ksft_perror("Can't call gettimeofday()");
- return -1;
- }
+ if (gettimeofday(&start, NULL) < 0)
+ fatal_error(name, "gettimeofday()");
- err = setitimer(which, &val, NULL);
- if (err < 0) {
- ksft_perror("Can't set timer");
- return -1;
- }
+ if (setitimer(which, &val, NULL) < 0)
+ fatal_error(name, "setitimer()");
if (which == ITIMER_VIRTUAL)
user_loop();
@@ -120,68 +121,41 @@ static int check_itimer(int which)
else if (which == ITIMER_REAL)
idle_loop();
- err = gettimeofday(&end, NULL);
- if (err < 0) {
- ksft_perror("Can't call gettimeofday()");
- return -1;
- }
+ if (gettimeofday(&end, NULL) < 0)
+ fatal_error(name, "gettimeofday()");
ksft_test_result(check_diff(start, end) == 0, "%s\n", name);
-
- return 0;
}
-static int check_timer_create(int which)
+static void check_timer_create(int which, const char *name)
{
- const char *type;
- int err;
- timer_t id;
struct timeval start, end;
struct itimerspec val = {
.it_value.tv_sec = DELAY,
};
-
- if (which == CLOCK_THREAD_CPUTIME_ID) {
- type = "thread";
- } else if (which == CLOCK_PROCESS_CPUTIME_ID) {
- type = "process";
- } else {
- ksft_print_msg("Unknown timer_create() type %d\n", which);
- return -1;
- }
+ timer_t id;
done = 0;
- err = timer_create(which, NULL, &id);
- if (err < 0) {
- ksft_perror("Can't create timer");
- return -1;
- }
- signal(SIGALRM, sig_handler);
- err = gettimeofday(&start, NULL);
- if (err < 0) {
- ksft_perror("Can't call gettimeofday()");
- return -1;
- }
+ if (timer_create(which, NULL, &id) < 0)
+ fatal_error(name, "timer_create()");
- err = timer_settime(id, 0, &val, NULL);
- if (err < 0) {
- ksft_perror("Can't set timer");
- return -1;
- }
+ if (signal(SIGALRM, sig_handler) == SIG_ERR)
+ fatal_error(name, "signal()");
+
+ if (gettimeofday(&start, NULL) < 0)
+ fatal_error(name, "gettimeofday()");
+
+ if (timer_settime(id, 0, &val, NULL) < 0)
+ fatal_error(name, "timer_settime()");
user_loop();
- err = gettimeofday(&end, NULL);
- if (err < 0) {
- ksft_perror("Can't call gettimeofday()");
- return -1;
- }
+ if (gettimeofday(&end, NULL) < 0)
+ fatal_error(name, "gettimeofday()");
ksft_test_result(check_diff(start, end) == 0,
- "timer_create() per %s\n", type);
-
- return 0;
+ "timer_create() per %s\n", name);
}
static pthread_t ctd_thread;
@@ -209,15 +183,14 @@ static void *ctd_thread_func(void *arg)
ctd_count = 100;
if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id))
- return "Can't create timer\n";
+ fatal_error(NULL, "timer_create()");
if (timer_settime(id, 0, &val, NULL))
- return "Can't set timer\n";
-
+ fatal_error(NULL, "timer_settime()");
while (ctd_count > 0 && !ctd_failed)
;
if (timer_delete(id))
- return "Can't delete timer\n";
+ fatal_error(NULL, "timer_delete()");
return NULL;
}
@@ -225,19 +198,16 @@ static void *ctd_thread_func(void *arg)
/*
* Test that only the running thread receives the timer signal.
*/
-static int check_timer_distribution(void)
+static void check_timer_distribution(void)
{
- const char *errmsg;
+ if (signal(SIGALRM, ctd_sighandler) == SIG_ERR)
+ fatal_error(NULL, "signal()");
- signal(SIGALRM, ctd_sighandler);
-
- errmsg = "Can't create thread\n";
if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL))
- goto err;
+ fatal_error(NULL, "pthread_create()");
- errmsg = "Can't join thread\n";
- if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg)
- goto err;
+ if (pthread_join(ctd_thread, NULL))
+ fatal_error(NULL, "pthread_join()");
if (!ctd_failed)
ksft_test_result_pass("check signal distribution\n");
@@ -245,31 +215,399 @@ static int check_timer_distribution(void)
ksft_test_result_fail("check signal distribution\n");
else
ksft_test_result_skip("check signal distribution (old kernel)\n");
- return 0;
-err:
- ksft_print_msg("%s", errmsg);
- return -1;
+}
+
+struct tmrsig {
+ int signals;
+ int overruns;
+};
+
+static void siginfo_handler(int sig, siginfo_t *si, void *uc)
+{
+ struct tmrsig *tsig = si ? si->si_ptr : NULL;
+
+ if (tsig) {
+ tsig->signals++;
+ tsig->overruns += si->si_overrun;
+ }
+}
+
+static void *ignore_thread(void *arg)
+{
+ unsigned int *tid = arg;
+ sigset_t set;
+
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ if (sigprocmask(SIG_BLOCK, &set, NULL))
+ fatal_error(NULL, "sigprocmask(SIG_BLOCK)");
+
+ *tid = gettid();
+ sleep(100);
+
+ if (sigprocmask(SIG_UNBLOCK, &set, NULL))
+ fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)");
+ return NULL;
+}
+
+static void check_sig_ign(int thread)
+{
+ struct tmrsig tsig = { };
+ struct itimerspec its;
+ unsigned int tid = 0;
+ struct sigaction sa;
+ struct sigevent sev;
+ pthread_t pthread;
+ timer_t timerid;
+ sigset_t set;
+
+ if (thread) {
+ if (pthread_create(&pthread, NULL, ignore_thread, &tid))
+ fatal_error(NULL, "pthread_create()");
+ sleep(1);
+ }
+
+ sa.sa_flags = SA_SIGINFO;
+ sa.sa_sigaction = siginfo_handler;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGUSR1, &sa, NULL))
+ fatal_error(NULL, "sigaction()");
+
+ /* Block the signal */
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ if (sigprocmask(SIG_BLOCK, &set, NULL))
+ fatal_error(NULL, "sigprocmask(SIG_BLOCK)");
+
+ memset(&sev, 0, sizeof(sev));
+ sev.sigev_notify = SIGEV_SIGNAL;
+ sev.sigev_signo = SIGUSR1;
+ sev.sigev_value.sival_ptr = &tsig;
+ if (thread) {
+ sev.sigev_notify = SIGEV_THREAD_ID;
+ sev._sigev_un._tid = tid;
+ }
+
+ if (timer_create(CLOCK_MONOTONIC, &sev, &timerid))
+ fatal_error(NULL, "timer_create()");
+
+ /* Start the timer to expire in 100ms and 100ms intervals */
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 100000000;
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 100000000;
+ timer_settime(timerid, 0, &its, NULL);
+
+ sleep(1);
+
+ /* Set the signal to be ignored */
+ if (signal(SIGUSR1, SIG_IGN) == SIG_ERR)
+ fatal_error(NULL, "signal(SIG_IGN)");
+
+ sleep(1);
+
+ if (thread) {
+ /* Stop the thread first. No signal should be delivered to it */
+ if (pthread_cancel(pthread))
+ fatal_error(NULL, "pthread_cancel()");
+ if (pthread_join(pthread, NULL))
+ fatal_error(NULL, "pthread_join()");
+ }
+
+ /* Restore the handler */
+ if (sigaction(SIGUSR1, &sa, NULL))
+ fatal_error(NULL, "sigaction()");
+
+ sleep(1);
+
+ /* Unblock it, which should deliver the signal in the !thread case*/
+ if (sigprocmask(SIG_UNBLOCK, &set, NULL))
+ fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)");
+
+ if (timer_delete(timerid))
+ fatal_error(NULL, "timer_delete()");
+
+ if (!thread) {
+ ksft_test_result(tsig.signals == 1 && tsig.overruns == 29,
+ "check_sig_ign SIGEV_SIGNAL\n");
+ } else {
+ ksft_test_result(tsig.signals == 0 && tsig.overruns == 0,
+ "check_sig_ign SIGEV_THREAD_ID\n");
+ }
+}
+
+static void check_rearm(void)
+{
+ struct tmrsig tsig = { };
+ struct itimerspec its;
+ struct sigaction sa;
+ struct sigevent sev;
+ timer_t timerid;
+ sigset_t set;
+
+ sa.sa_flags = SA_SIGINFO;
+ sa.sa_sigaction = siginfo_handler;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGUSR1, &sa, NULL))
+ fatal_error(NULL, "sigaction()");
+
+ /* Block the signal */
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ if (sigprocmask(SIG_BLOCK, &set, NULL))
+ fatal_error(NULL, "sigprocmask(SIG_BLOCK)");
+
+ memset(&sev, 0, sizeof(sev));
+ sev.sigev_notify = SIGEV_SIGNAL;
+ sev.sigev_signo = SIGUSR1;
+ sev.sigev_value.sival_ptr = &tsig;
+ if (timer_create(CLOCK_MONOTONIC, &sev, &timerid))
+ fatal_error(NULL, "timer_create()");
+
+ /* Start the timer to expire in 100ms and 100ms intervals */
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 100000000;
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 100000000;
+ if (timer_settime(timerid, 0, &its, NULL))
+ fatal_error(NULL, "timer_settime()");
+
+ sleep(1);
+
+ /* Reprogram the timer to single shot */
+ its.it_value.tv_sec = 10;
+ its.it_value.tv_nsec = 0;
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 0;
+ if (timer_settime(timerid, 0, &its, NULL))
+ fatal_error(NULL, "timer_settime()");
+
+ /* Unblock it, which should not deliver a signal */
+ if (sigprocmask(SIG_UNBLOCK, &set, NULL))
+ fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)");
+
+ if (timer_delete(timerid))
+ fatal_error(NULL, "timer_delete()");
+
+ ksft_test_result(!tsig.signals, "check_rearm\n");
+}
+
+static void check_delete(void)
+{
+ struct tmrsig tsig = { };
+ struct itimerspec its;
+ struct sigaction sa;
+ struct sigevent sev;
+ timer_t timerid;
+ sigset_t set;
+
+ sa.sa_flags = SA_SIGINFO;
+ sa.sa_sigaction = siginfo_handler;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGUSR1, &sa, NULL))
+ fatal_error(NULL, "sigaction()");
+
+ /* Block the signal */
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ if (sigprocmask(SIG_BLOCK, &set, NULL))
+ fatal_error(NULL, "sigprocmask(SIG_BLOCK)");
+
+ memset(&sev, 0, sizeof(sev));
+ sev.sigev_notify = SIGEV_SIGNAL;
+ sev.sigev_signo = SIGUSR1;
+ sev.sigev_value.sival_ptr = &tsig;
+ if (timer_create(CLOCK_MONOTONIC, &sev, &timerid))
+ fatal_error(NULL, "timer_create()");
+
+ /* Start the timer to expire in 100ms and 100ms intervals */
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 100000000;
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 100000000;
+ if (timer_settime(timerid, 0, &its, NULL))
+ fatal_error(NULL, "timer_settime()");
+
+ sleep(1);
+
+ if (timer_delete(timerid))
+ fatal_error(NULL, "timer_delete()");
+
+ /* Unblock it, which should not deliver a signal */
+ if (sigprocmask(SIG_UNBLOCK, &set, NULL))
+ fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)");
+
+ ksft_test_result(!tsig.signals, "check_delete\n");
+}
+
+static inline int64_t calcdiff_ns(struct timespec t1, struct timespec t2)
+{
+ int64_t diff;
+
+ diff = NSECS_PER_SEC * (int64_t)((int) t1.tv_sec - (int) t2.tv_sec);
+ diff += ((int) t1.tv_nsec - (int) t2.tv_nsec);
+ return diff;
+}
+
+static void check_sigev_none(int which, const char *name)
+{
+ struct timespec start, now;
+ struct itimerspec its;
+ struct sigevent sev;
+ timer_t timerid;
+
+ memset(&sev, 0, sizeof(sev));
+ sev.sigev_notify = SIGEV_NONE;
+
+ if (timer_create(which, &sev, &timerid))
+ fatal_error(name, "timer_create()");
+
+ /* Start the timer to expire in 100ms and 100ms intervals */
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 100000000;
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 100000000;
+ timer_settime(timerid, 0, &its, NULL);
+
+ if (clock_gettime(which, &start))
+ fatal_error(name, "clock_gettime()");
+
+ do {
+ if (clock_gettime(which, &now))
+ fatal_error(name, "clock_gettime()");
+ } while (calcdiff_ns(now, start) < NSECS_PER_SEC);
+
+ if (timer_gettime(timerid, &its))
+ fatal_error(name, "timer_gettime()");
+
+ if (timer_delete(timerid))
+ fatal_error(name, "timer_delete()");
+
+ ksft_test_result(its.it_value.tv_sec || its.it_value.tv_nsec,
+ "check_sigev_none %s\n", name);
+}
+
+static void check_gettime(int which, const char *name)
+{
+ struct itimerspec its, prev;
+ struct timespec start, now;
+ struct sigevent sev;
+ timer_t timerid;
+ int wraps = 0;
+ sigset_t set;
+
+ /* Block the signal */
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ if (sigprocmask(SIG_BLOCK, &set, NULL))
+ fatal_error(name, "sigprocmask(SIG_BLOCK)");
+
+ memset(&sev, 0, sizeof(sev));
+ sev.sigev_notify = SIGEV_SIGNAL;
+ sev.sigev_signo = SIGUSR1;
+
+ if (timer_create(which, &sev, &timerid))
+ fatal_error(name, "timer_create()");
+
+ /* Start the timer to expire in 100ms and 100ms intervals */
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 100000000;
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 100000000;
+ if (timer_settime(timerid, 0, &its, NULL))
+ fatal_error(name, "timer_settime()");
+
+ if (timer_gettime(timerid, &prev))
+ fatal_error(name, "timer_gettime()");
+
+ if (clock_gettime(which, &start))
+ fatal_error(name, "clock_gettime()");
+
+ do {
+ if (clock_gettime(which, &now))
+ fatal_error(name, "clock_gettime()");
+ if (timer_gettime(timerid, &its))
+ fatal_error(name, "timer_gettime()");
+ if (its.it_value.tv_nsec > prev.it_value.tv_nsec)
+ wraps++;
+ prev = its;
+
+ } while (calcdiff_ns(now, start) < NSECS_PER_SEC);
+
+ if (timer_delete(timerid))
+ fatal_error(name, "timer_delete()");
+
+ ksft_test_result(wraps > 1, "check_gettime %s\n", name);
+}
+
+static void check_overrun(int which, const char *name)
+{
+ struct timespec start, now;
+ struct tmrsig tsig = { };
+ struct itimerspec its;
+ struct sigaction sa;
+ struct sigevent sev;
+ timer_t timerid;
+ sigset_t set;
+
+ sa.sa_flags = SA_SIGINFO;
+ sa.sa_sigaction = siginfo_handler;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGUSR1, &sa, NULL))
+ fatal_error(name, "sigaction()");
+
+ /* Block the signal */
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ if (sigprocmask(SIG_BLOCK, &set, NULL))
+ fatal_error(name, "sigprocmask(SIG_BLOCK)");
+
+ memset(&sev, 0, sizeof(sev));
+ sev.sigev_notify = SIGEV_SIGNAL;
+ sev.sigev_signo = SIGUSR1;
+ sev.sigev_value.sival_ptr = &tsig;
+ if (timer_create(which, &sev, &timerid))
+ fatal_error(name, "timer_create()");
+
+ /* Start the timer to expire in 100ms and 100ms intervals */
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 100000000;
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 100000000;
+ if (timer_settime(timerid, 0, &its, NULL))
+ fatal_error(name, "timer_settime()");
+
+ if (clock_gettime(which, &start))
+ fatal_error(name, "clock_gettime()");
+
+ do {
+ if (clock_gettime(which, &now))
+ fatal_error(name, "clock_gettime()");
+ } while (calcdiff_ns(now, start) < NSECS_PER_SEC);
+
+ /* Unblock it, which should deliver a signal */
+ if (sigprocmask(SIG_UNBLOCK, &set, NULL))
+ fatal_error(name, "sigprocmask(SIG_UNBLOCK)");
+
+ if (timer_delete(timerid))
+ fatal_error(name, "timer_delete()");
+
+ ksft_test_result(tsig.signals == 1 && tsig.overruns == 9,
+ "check_overrun %s\n", name);
}
int main(int argc, char **argv)
{
ksft_print_header();
- ksft_set_plan(6);
+ ksft_set_plan(18);
ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n");
ksft_print_msg("based timers if other threads run on the CPU...\n");
- if (check_itimer(ITIMER_VIRTUAL) < 0)
- ksft_exit_fail();
-
- if (check_itimer(ITIMER_PROF) < 0)
- ksft_exit_fail();
-
- if (check_itimer(ITIMER_REAL) < 0)
- ksft_exit_fail();
-
- if (check_timer_create(CLOCK_THREAD_CPUTIME_ID) < 0)
- ksft_exit_fail();
+ check_itimer(ITIMER_VIRTUAL, "ITIMER_VIRTUAL");
+ check_itimer(ITIMER_PROF, "ITIMER_PROF");
+ check_itimer(ITIMER_REAL, "ITIMER_REAL");
+ check_timer_create(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID");
/*
* It's unfortunately hard to reliably test a timer expiration
@@ -280,11 +618,21 @@ int main(int argc, char **argv)
* to ensure true parallelism. So test only one thread until we
* find a better solution.
*/
- if (check_timer_create(CLOCK_PROCESS_CPUTIME_ID) < 0)
- ksft_exit_fail();
-
- if (check_timer_distribution() < 0)
- ksft_exit_fail();
+ check_timer_create(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
+ check_timer_distribution();
+
+ check_sig_ign(0);
+ check_sig_ign(1);
+ check_rearm();
+ check_delete();
+ check_sigev_none(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
+ check_sigev_none(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
+ check_gettime(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
+ check_gettime(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
+ check_gettime(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID");
+ check_overrun(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
+ check_overrun(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
+ check_overrun(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID");
ksft_finished();
}
diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c
index c8e6bffe4e0a..83450145fe65 100644
--- a/tools/testing/selftests/timers/skew_consistency.c
+++ b/tools/testing/selftests/timers/skew_consistency.c
@@ -36,8 +36,6 @@
#include <sys/wait.h>
#include "../kselftest.h"
-#define NSEC_PER_SEC 1000000000LL
-
int main(int argc, char **argv)
{
struct timex tx;
diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c
index 76b38e41d9c7..d5564bbf0e50 100644
--- a/tools/testing/selftests/timers/threadtest.c
+++ b/tools/testing/selftests/timers/threadtest.c
@@ -38,10 +38,10 @@ struct timespec global_list[LISTSIZE];
int listcount = 0;
-void checklist(struct timespec *list, int size)
+void checklist(const struct timespec *list, int size)
{
int i, j;
- struct timespec *a, *b;
+ const struct timespec *a, *b;
/* scan the list */
for (i = 0; i < size-1; i++) {
diff --git a/tools/testing/selftests/tpm2/test_async.sh b/tools/testing/selftests/tpm2/test_async.sh
index 43bf5bd772fd..cf5a9c826097 100755
--- a/tools/testing/selftests/tpm2/test_async.sh
+++ b/tools/testing/selftests/tpm2/test_async.sh
@@ -7,4 +7,4 @@ ksft_skip=4
[ -e /dev/tpm0 ] || exit $ksft_skip
[ -e /dev/tpmrm0 ] || exit $ksft_skip
-python3 -m unittest -v tpm2_tests.AsyncTest
+python3 -m unittest -v tpm2_tests.AsyncTest 2>&1
diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh
index 58af963e5b55..20fa70f970a9 100755
--- a/tools/testing/selftests/tpm2/test_smoke.sh
+++ b/tools/testing/selftests/tpm2/test_smoke.sh
@@ -6,4 +6,4 @@ ksft_skip=4
[ -e /dev/tpm0 ] || exit $ksft_skip
-python3 -m unittest -v tpm2_tests.SmokeTest
+python3 -m unittest -v tpm2_tests.SmokeTest 2>&1
diff --git a/tools/testing/selftests/tpm2/test_space.sh b/tools/testing/selftests/tpm2/test_space.sh
index 04c47b13fe8a..93894cbc89a8 100755
--- a/tools/testing/selftests/tpm2/test_space.sh
+++ b/tools/testing/selftests/tpm2/test_space.sh
@@ -6,4 +6,4 @@ ksft_skip=4
[ -e /dev/tpmrm0 ] || exit $ksft_skip
-python3 -m unittest -v tpm2_tests.SpaceTest
+python3 -m unittest -v tpm2_tests.SpaceTest 2>&1
diff --git a/tools/testing/selftests/user/Makefile b/tools/testing/selftests/user/Makefile
deleted file mode 100644
index 640a40f9b72b..000000000000
--- a/tools/testing/selftests/user/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-# Makefile for user memory selftests
-
-# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
-all:
-
-TEST_PROGS := test_user_copy.sh
-
-include ../lib.mk
diff --git a/tools/testing/selftests/user/config b/tools/testing/selftests/user/config
deleted file mode 100644
index 784ed8416324..000000000000
--- a/tools/testing/selftests/user/config
+++ /dev/null
@@ -1 +0,0 @@
-CONFIG_TEST_USER_COPY=m
diff --git a/tools/testing/selftests/user/test_user_copy.sh b/tools/testing/selftests/user/test_user_copy.sh
deleted file mode 100755
index f9b31a57439b..000000000000
--- a/tools/testing/selftests/user/test_user_copy.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Runs copy_to/from_user infrastructure using test_user_copy kernel module
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-if ! /sbin/modprobe -q -n test_user_copy; then
- echo "user: module test_user_copy is not found [SKIP]"
- exit $ksft_skip
-fi
-if /sbin/modprobe -q test_user_copy; then
- /sbin/modprobe -q -r test_user_copy
- echo "user_copy: ok"
-else
- echo "user_copy: [FAIL]"
- exit 1
-fi
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 3de8e7e052ae..af9cedbf5357 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -1,7 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-uname_M := $(shell uname -m 2>/dev/null || echo not)
-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
-SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
+include ../../../scripts/Makefile.arch
TEST_GEN_PROGS := vdso_test_gettimeofday
TEST_GEN_PROGS += vdso_test_getcpu
@@ -11,14 +9,12 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
TEST_GEN_PROGS += vdso_standalone_test_x86
endif
TEST_GEN_PROGS += vdso_test_correctness
-ifeq ($(uname_M),x86_64)
+ifeq ($(ARCH)$(CONFIG_X86_32),$(filter $(ARCH)$(CONFIG_X86_32),x86 x86_64 loongarch arm64 powerpc s390))
TEST_GEN_PROGS += vdso_test_getrandom
-ifneq ($(SODIUM),)
TEST_GEN_PROGS += vdso_test_chacha
endif
-endif
-CFLAGS := -std=gnu99
+CFLAGS := -std=gnu99 -O2
ifeq ($(CONFIG_X86_32),y)
LDLIBS += -lgcc_s
@@ -38,11 +34,12 @@ $(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl
$(OUTPUT)/vdso_test_getrandom: parse_vdso.c
$(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \
+ $(KHDR_INCLUDES) \
-isystem $(top_srcdir)/include/uapi
-$(OUTPUT)/vdso_test_chacha: $(top_srcdir)/arch/$(ARCH)/entry/vdso/vgetrandom-chacha.S
+$(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(SRCARCH)/vdso/vgetrandom-chacha.S
$(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \
- -isystem $(top_srcdir)/arch/$(ARCH)/include \
- -isystem $(top_srcdir)/include \
- -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \
- -Wa,--noexecstack $(SODIUM)
+ -idirafter $(top_srcdir)/tools/include/generated \
+ -idirafter $(top_srcdir)/arch/$(SRCARCH)/include \
+ -idirafter $(top_srcdir)/include \
+ -D__ASSEMBLY__ -Wa,--noexecstack
diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c
index 4ae417372e9e..7dd5668ea8a6 100644
--- a/tools/testing/selftests/vDSO/parse_vdso.c
+++ b/tools/testing/selftests/vDSO/parse_vdso.c
@@ -36,6 +36,12 @@
#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
+#ifdef __s390x__
+#define ELF_HASH_ENTRY ELF(Xword)
+#else
+#define ELF_HASH_ENTRY ELF(Word)
+#endif
+
static struct vdso_info
{
bool valid;
@@ -47,8 +53,8 @@ static struct vdso_info
/* Symbol table */
ELF(Sym) *symtab;
const char *symstrings;
- ELF(Word) *bucket, *chain;
- ELF(Word) nbucket, nchain;
+ ELF_HASH_ENTRY *bucket, *chain;
+ ELF_HASH_ENTRY nbucket, nchain;
/* Version table */
ELF(Versym) *versym;
@@ -115,7 +121,7 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base)
/*
* Fish out the useful bits of the dynamic table.
*/
- ELF(Word) *hash = 0;
+ ELF_HASH_ENTRY *hash = 0;
vdso_info.symstrings = 0;
vdso_info.symtab = 0;
vdso_info.versym = 0;
@@ -133,7 +139,7 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base)
+ vdso_info.load_offset);
break;
case DT_HASH:
- hash = (ELF(Word) *)
+ hash = (ELF_HASH_ENTRY *)
((uintptr_t)dyn[i].d_un.d_ptr
+ vdso_info.load_offset);
break;
@@ -216,7 +222,8 @@ void *vdso_sym(const char *version, const char *name)
ELF(Sym) *sym = &vdso_info.symtab[chain];
/* Check for a defined global or weak function w/ right name. */
- if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
+ if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC &&
+ ELF64_ST_TYPE(sym->st_info) != STT_NOTYPE)
continue;
if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
ELF64_ST_BIND(sym->st_info) != STB_WEAK)
diff --git a/tools/testing/selftests/vDSO/vdso_call.h b/tools/testing/selftests/vDSO/vdso_call.h
new file mode 100644
index 000000000000..bb237d771051
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_call.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Macro to call vDSO functions
+ *
+ * Copyright (C) 2024 Christophe Leroy <christophe.leroy@csgroup.eu>, CS GROUP France
+ */
+#ifndef __VDSO_CALL_H__
+#define __VDSO_CALL_H__
+
+#ifdef __powerpc__
+
+#define LOADARGS_1(fn, __arg1) do { \
+ _r0 = fn; \
+ _r3 = (long)__arg1; \
+} while (0)
+
+#define LOADARGS_2(fn, __arg1, __arg2) do { \
+ _r0 = fn; \
+ _r3 = (long)__arg1; \
+ _r4 = (long)__arg2; \
+} while (0)
+
+#define LOADARGS_3(fn, __arg1, __arg2, __arg3) do { \
+ _r0 = fn; \
+ _r3 = (long)__arg1; \
+ _r4 = (long)__arg2; \
+ _r5 = (long)__arg3; \
+} while (0)
+
+#define LOADARGS_5(fn, __arg1, __arg2, __arg3, __arg4, __arg5) do { \
+ _r0 = fn; \
+ _r3 = (long)__arg1; \
+ _r4 = (long)__arg2; \
+ _r5 = (long)__arg3; \
+ _r6 = (long)__arg4; \
+ _r7 = (long)__arg5; \
+} while (0)
+
+#define VDSO_CALL(fn, nr, args...) ({ \
+ register void *_r0 asm ("r0"); \
+ register long _r3 asm ("r3"); \
+ register long _r4 asm ("r4"); \
+ register long _r5 asm ("r5"); \
+ register long _r6 asm ("r6"); \
+ register long _r7 asm ("r7"); \
+ register long _r8 asm ("r8"); \
+ register long _rval asm ("r3"); \
+ \
+ LOADARGS_##nr(fn, args); \
+ \
+ asm volatile( \
+ " mtctr %0\n" \
+ " bctrl\n" \
+ " bns+ 1f\n" \
+ " neg 3, 3\n" \
+ "1:" \
+ : "+r" (_r0), "=r" (_r3), "+r" (_r4), "+r" (_r5), \
+ "+r" (_r6), "+r" (_r7), "+r" (_r8) \
+ : "r" (_rval) \
+ : "r9", "r10", "r11", "r12", "cr0", "cr1", "cr5", \
+ "cr6", "cr7", "xer", "lr", "ctr", "memory" \
+ ); \
+ _rval; \
+})
+
+#else
+#define VDSO_CALL(fn, nr, args...) fn(args)
+#endif
+
+#endif
diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h
index 7b543e7f04d7..722260f97561 100644
--- a/tools/testing/selftests/vDSO/vdso_config.h
+++ b/tools/testing/selftests/vDSO/vdso_config.h
@@ -18,18 +18,18 @@
#elif defined(__aarch64__)
#define VDSO_VERSION 3
#define VDSO_NAMES 0
-#elif defined(__powerpc__)
+#elif defined(__powerpc64__)
#define VDSO_VERSION 1
#define VDSO_NAMES 0
-#define VDSO_32BIT 1
-#elif defined(__powerpc64__)
+#elif defined(__powerpc__)
#define VDSO_VERSION 1
#define VDSO_NAMES 0
-#elif defined (__s390__)
+#define VDSO_32BIT 1
+#elif defined (__s390__) && !defined(__s390x__)
#define VDSO_VERSION 2
#define VDSO_NAMES 0
#define VDSO_32BIT 1
-#elif defined (__s390X__)
+#elif defined (__s390x__)
#define VDSO_VERSION 2
#define VDSO_NAMES 0
#elif defined(__mips__)
@@ -68,16 +68,15 @@ static const char *versions[7] = {
"LINUX_5.10"
};
-static const char *names[2][6] = {
+static const char *names[2][7] = {
{
"__kernel_gettimeofday",
"__kernel_clock_gettime",
"__kernel_time",
"__kernel_clock_getres",
"__kernel_getcpu",
-#if defined(VDSO_32BIT)
"__kernel_clock_gettime64",
-#endif
+ "__kernel_getrandom",
},
{
"__vdso_gettimeofday",
@@ -85,9 +84,8 @@ static const char *names[2][6] = {
"__vdso_time",
"__vdso_clock_getres",
"__vdso_getcpu",
-#if defined(VDSO_32BIT)
"__vdso_clock_gettime64",
-#endif
+ "__vdso_getrandom",
},
};
diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c
index 96d32fd65b42..a54424e2336f 100644
--- a/tools/testing/selftests/vDSO/vdso_test_abi.c
+++ b/tools/testing/selftests/vDSO/vdso_test_abi.c
@@ -20,10 +20,8 @@
#include "../kselftest.h"
#include "vdso_config.h"
-
-extern void *vdso_sym(const char *version, const char *name);
-extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
-extern void vdso_init_from_auxv(void *auxv);
+#include "vdso_call.h"
+#include "parse_vdso.h"
static const char *version;
static const char **name;
@@ -61,7 +59,7 @@ static void vdso_test_gettimeofday(void)
}
struct timeval tv;
- long ret = vdso_gettimeofday(&tv, 0);
+ long ret = VDSO_CALL(vdso_gettimeofday, 2, &tv, 0);
if (ret == 0) {
ksft_print_msg("The time is %lld.%06lld\n",
@@ -86,7 +84,7 @@ static void vdso_test_clock_gettime(clockid_t clk_id)
}
struct timespec ts;
- long ret = vdso_clock_gettime(clk_id, &ts);
+ long ret = VDSO_CALL(vdso_clock_gettime, 2, clk_id, &ts);
if (ret == 0) {
ksft_print_msg("The time is %lld.%06lld\n",
@@ -111,7 +109,7 @@ static void vdso_test_time(void)
return;
}
- long ret = vdso_time(NULL);
+ long ret = VDSO_CALL(vdso_time, 1, NULL);
if (ret > 0) {
ksft_print_msg("The time in hours since January 1, 1970 is %lld\n",
@@ -138,7 +136,7 @@ static void vdso_test_clock_getres(clockid_t clk_id)
}
struct timespec ts, sys_ts;
- long ret = vdso_clock_getres(clk_id, &ts);
+ long ret = VDSO_CALL(vdso_clock_getres, 2, clk_id, &ts);
if (ret == 0) {
ksft_print_msg("The vdso resolution is %lld %lld\n",
diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c
index e38f44e5f803..b1ea532c5996 100644
--- a/tools/testing/selftests/vDSO/vdso_test_chacha.c
+++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c
@@ -3,23 +3,90 @@
* Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
-#include <sodium/crypto_stream_chacha20.h>
+#include <tools/le_byteshift.h>
#include <sys/random.h>
+#include <sys/auxv.h>
#include <string.h>
#include <stdint.h>
+#include <stdbool.h>
#include "../kselftest.h"
-extern void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, const uint8_t *key, uint32_t *counter, size_t nblocks);
+#if defined(__aarch64__)
+static bool cpu_has_capabilities(void)
+{
+ return getauxval(AT_HWCAP) & HWCAP_ASIMD;
+}
+#elif defined(__s390x__)
+static bool cpu_has_capabilities(void)
+{
+ return getauxval(AT_HWCAP) & HWCAP_S390_VXRS;
+}
+#else
+static bool cpu_has_capabilities(void)
+{
+ return true;
+}
+#endif
+
+static uint32_t rol32(uint32_t word, unsigned int shift)
+{
+ return (word << (shift & 31)) | (word >> ((-shift) & 31));
+}
+
+static void reference_chacha20_blocks(uint8_t *dst_bytes, const uint32_t *key, uint32_t *counter, size_t nblocks)
+{
+ uint32_t s[16] = {
+ 0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U,
+ key[0], key[1], key[2], key[3], key[4], key[5], key[6], key[7],
+ counter[0], counter[1], 0, 0
+ };
+
+ while (nblocks--) {
+ uint32_t x[16];
+ memcpy(x, s, sizeof(x));
+ for (unsigned int r = 0; r < 20; r += 2) {
+ #define QR(a, b, c, d) ( \
+ x[a] += x[b], \
+ x[d] = rol32(x[d] ^ x[a], 16), \
+ x[c] += x[d], \
+ x[b] = rol32(x[b] ^ x[c], 12), \
+ x[a] += x[b], \
+ x[d] = rol32(x[d] ^ x[a], 8), \
+ x[c] += x[d], \
+ x[b] = rol32(x[b] ^ x[c], 7))
+
+ QR(0, 4, 8, 12);
+ QR(1, 5, 9, 13);
+ QR(2, 6, 10, 14);
+ QR(3, 7, 11, 15);
+ QR(0, 5, 10, 15);
+ QR(1, 6, 11, 12);
+ QR(2, 7, 8, 13);
+ QR(3, 4, 9, 14);
+ }
+ for (unsigned int i = 0; i < 16; ++i, dst_bytes += sizeof(uint32_t))
+ put_unaligned_le32(x[i] + s[i], dst_bytes);
+ if (!++s[12])
+ ++s[13];
+ }
+ counter[0] = s[12];
+ counter[1] = s[13];
+}
+
+typedef uint8_t u8;
+typedef uint32_t u32;
+typedef uint64_t u64;
+#include <vdso/getrandom.h>
int main(int argc, char *argv[])
{
enum { TRIALS = 1000, BLOCKS = 128, BLOCK_SIZE = 64 };
- static const uint8_t nonce[8] = { 0 };
- uint32_t counter[2];
- uint8_t key[32];
+ uint32_t key[8], counter1[2], counter2[2];
uint8_t output1[BLOCK_SIZE * BLOCKS], output2[BLOCK_SIZE * BLOCKS];
ksft_print_header();
+ if (!cpu_has_capabilities())
+ ksft_exit_skip("Required CPU capabilities missing\n");
ksft_set_plan(1);
for (unsigned int trial = 0; trial < TRIALS; ++trial) {
@@ -27,17 +94,33 @@ int main(int argc, char *argv[])
printf("getrandom() failed!\n");
return KSFT_SKIP;
}
- crypto_stream_chacha20(output1, sizeof(output1), nonce, key);
+ memset(counter1, 0, sizeof(counter1));
+ reference_chacha20_blocks(output1, key, counter1, BLOCKS);
for (unsigned int split = 0; split < BLOCKS; ++split) {
memset(output2, 'X', sizeof(output2));
- memset(counter, 0, sizeof(counter));
+ memset(counter2, 0, sizeof(counter2));
if (split)
- __arch_chacha20_blocks_nostack(output2, key, counter, split);
- __arch_chacha20_blocks_nostack(output2 + split * BLOCK_SIZE, key, counter, BLOCKS - split);
- if (memcmp(output1, output2, sizeof(output1)))
+ __arch_chacha20_blocks_nostack(output2, key, counter2, split);
+ __arch_chacha20_blocks_nostack(output2 + split * BLOCK_SIZE, key, counter2, BLOCKS - split);
+ if (memcmp(output1, output2, sizeof(output1)) || memcmp(counter1, counter2, sizeof(counter1)))
return KSFT_FAIL;
}
}
+ memset(counter1, 0, sizeof(counter1));
+ counter1[0] = (uint32_t)-BLOCKS + 2;
+ memset(counter2, 0, sizeof(counter2));
+ counter2[0] = (uint32_t)-BLOCKS + 2;
+
+ reference_chacha20_blocks(output1, key, counter1, BLOCKS);
+ __arch_chacha20_blocks_nostack(output2, key, counter2, BLOCKS);
+ if (memcmp(output1, output2, sizeof(output1)) || memcmp(counter1, counter2, sizeof(counter1)))
+ return KSFT_FAIL;
+
+ reference_chacha20_blocks(output1, key, counter1, BLOCKS);
+ __arch_chacha20_blocks_nostack(output2, key, counter2, BLOCKS);
+ if (memcmp(output1, output2, sizeof(output1)) || memcmp(counter1, counter2, sizeof(counter1)))
+ return KSFT_FAIL;
+
ksft_test_result_pass("chacha: PASS\n");
return KSFT_PASS;
}
diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c
index e691a3cf1491..5fb97ad67eea 100644
--- a/tools/testing/selftests/vDSO/vdso_test_correctness.c
+++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c
@@ -20,6 +20,7 @@
#include <limits.h>
#include "vdso_config.h"
+#include "vdso_call.h"
#include "../kselftest.h"
static const char **name;
@@ -114,6 +115,12 @@ static void fill_function_pointers()
if (!vdso)
vdso = dlopen("linux-gate.so.1",
RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso)
+ vdso = dlopen("linux-vdso32.so.1",
+ RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso)
+ vdso = dlopen("linux-vdso64.so.1",
+ RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
if (!vdso) {
printf("[WARN]\tfailed to find vDSO\n");
return;
@@ -180,7 +187,7 @@ static void test_getcpu(void)
ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
if (vdso_getcpu)
- ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+ ret_vdso = VDSO_CALL(vdso_getcpu, 3, &cpu_vdso, &node_vdso, 0);
if (vgetcpu)
ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
@@ -263,7 +270,7 @@ static void test_one_clock_gettime(int clock, const char *name)
if (sys_clock_gettime(clock, &start) < 0) {
if (errno == EINVAL) {
- vdso_ret = vdso_clock_gettime(clock, &vdso);
+ vdso_ret = VDSO_CALL(vdso_clock_gettime, 2, clock, &vdso);
if (vdso_ret == -EINVAL) {
printf("[OK]\tNo such clock.\n");
} else {
@@ -276,7 +283,7 @@ static void test_one_clock_gettime(int clock, const char *name)
return;
}
- vdso_ret = vdso_clock_gettime(clock, &vdso);
+ vdso_ret = VDSO_CALL(vdso_clock_gettime, 2, clock, &vdso);
end_ret = sys_clock_gettime(clock, &end);
if (vdso_ret != 0 || end_ret != 0) {
@@ -325,7 +332,7 @@ static void test_one_clock_gettime64(int clock, const char *name)
if (sys_clock_gettime64(clock, &start) < 0) {
if (errno == EINVAL) {
- vdso_ret = vdso_clock_gettime64(clock, &vdso);
+ vdso_ret = VDSO_CALL(vdso_clock_gettime64, 2, clock, &vdso);
if (vdso_ret == -EINVAL) {
printf("[OK]\tNo such clock.\n");
} else {
@@ -338,7 +345,7 @@ static void test_one_clock_gettime64(int clock, const char *name)
return;
}
- vdso_ret = vdso_clock_gettime64(clock, &vdso);
+ vdso_ret = VDSO_CALL(vdso_clock_gettime64, 2, clock, &vdso);
end_ret = sys_clock_gettime64(clock, &end);
if (vdso_ret != 0 || end_ret != 0) {
@@ -395,7 +402,7 @@ static void test_gettimeofday(void)
return;
}
- vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz);
+ vdso_ret = VDSO_CALL(vdso_gettimeofday, 2, &vdso, &vdso_tz);
end_ret = sys_gettimeofday(&end, NULL);
if (vdso_ret != 0 || end_ret != 0) {
@@ -425,7 +432,7 @@ static void test_gettimeofday(void)
}
/* And make sure that passing NULL for tz doesn't crash. */
- vdso_gettimeofday(&vdso, NULL);
+ VDSO_CALL(vdso_gettimeofday, 2, &vdso, NULL);
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/vDSO/vdso_test_getcpu.c b/tools/testing/selftests/vDSO/vdso_test_getcpu.c
index b758f68c6c9c..cdeaed45fb26 100644
--- a/tools/testing/selftests/vDSO/vdso_test_getcpu.c
+++ b/tools/testing/selftests/vDSO/vdso_test_getcpu.c
@@ -14,6 +14,7 @@
#include "../kselftest.h"
#include "parse_vdso.h"
#include "vdso_config.h"
+#include "vdso_call.h"
struct getcpu_cache;
typedef long (*getcpu_t)(unsigned int *, unsigned int *,
@@ -42,7 +43,7 @@ int main(int argc, char **argv)
return KSFT_SKIP;
}
- ret = get_cpu(&cpu, &node, 0);
+ ret = VDSO_CALL(get_cpu, 3, &cpu, &node, 0);
if (ret == 0) {
printf("Running on CPU %u node %u\n", cpu, node);
} else {
diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c b/tools/testing/selftests/vDSO/vdso_test_getrandom.c
index 05122425a873..72a1d9b43a84 100644
--- a/tools/testing/selftests/vDSO/vdso_test_getrandom.c
+++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c
@@ -16,11 +16,17 @@
#include <sys/mman.h>
#include <sys/random.h>
#include <sys/syscall.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
#include <sys/types.h>
#include <linux/random.h>
+#include <linux/compiler.h>
+#include <linux/ptrace.h>
#include "../kselftest.h"
#include "parse_vdso.h"
+#include "vdso_config.h"
+#include "vdso_call.h"
#ifndef timespecsub
#define timespecsub(tsp, usp, vsp) \
@@ -38,50 +44,43 @@ static struct {
pthread_mutex_t lock;
void **states;
size_t len, cap;
-} grnd_allocator = {
- .lock = PTHREAD_MUTEX_INITIALIZER
-};
-
-static struct {
ssize_t(*fn)(void *, size_t, unsigned long, void *, size_t);
- pthread_key_t key;
- pthread_once_t initialized;
struct vgetrandom_opaque_params params;
-} grnd_ctx = {
- .initialized = PTHREAD_ONCE_INIT
+} vgrnd = {
+ .lock = PTHREAD_MUTEX_INITIALIZER
};
static void *vgetrandom_get_state(void)
{
void *state = NULL;
- pthread_mutex_lock(&grnd_allocator.lock);
- if (!grnd_allocator.len) {
+ pthread_mutex_lock(&vgrnd.lock);
+ if (!vgrnd.len) {
size_t page_size = getpagesize();
size_t new_cap;
size_t alloc_size, num = sysconf(_SC_NPROCESSORS_ONLN); /* Just a decent heuristic. */
void *new_block, *new_states;
- alloc_size = (num * grnd_ctx.params.size_of_opaque_state + page_size - 1) & (~(page_size - 1));
- num = (page_size / grnd_ctx.params.size_of_opaque_state) * (alloc_size / page_size);
- new_block = mmap(0, alloc_size, grnd_ctx.params.mmap_prot, grnd_ctx.params.mmap_flags, -1, 0);
+ alloc_size = (num * vgrnd.params.size_of_opaque_state + page_size - 1) & (~(page_size - 1));
+ num = (page_size / vgrnd.params.size_of_opaque_state) * (alloc_size / page_size);
+ new_block = mmap(0, alloc_size, vgrnd.params.mmap_prot, vgrnd.params.mmap_flags, -1, 0);
if (new_block == MAP_FAILED)
goto out;
- new_cap = grnd_allocator.cap + num;
- new_states = reallocarray(grnd_allocator.states, new_cap, sizeof(*grnd_allocator.states));
+ new_cap = vgrnd.cap + num;
+ new_states = reallocarray(vgrnd.states, new_cap, sizeof(*vgrnd.states));
if (!new_states)
goto unmap;
- grnd_allocator.cap = new_cap;
- grnd_allocator.states = new_states;
+ vgrnd.cap = new_cap;
+ vgrnd.states = new_states;
for (size_t i = 0; i < num; ++i) {
- if (((uintptr_t)new_block & (page_size - 1)) + grnd_ctx.params.size_of_opaque_state > page_size)
+ if (((uintptr_t)new_block & (page_size - 1)) + vgrnd.params.size_of_opaque_state > page_size)
new_block = (void *)(((uintptr_t)new_block + page_size - 1) & (~(page_size - 1)));
- grnd_allocator.states[i] = new_block;
- new_block += grnd_ctx.params.size_of_opaque_state;
+ vgrnd.states[i] = new_block;
+ new_block += vgrnd.params.size_of_opaque_state;
}
- grnd_allocator.len = num;
+ vgrnd.len = num;
goto success;
unmap:
@@ -89,10 +88,10 @@ static void *vgetrandom_get_state(void)
goto out;
}
success:
- state = grnd_allocator.states[--grnd_allocator.len];
+ state = vgrnd.states[--vgrnd.len];
out:
- pthread_mutex_unlock(&grnd_allocator.lock);
+ pthread_mutex_unlock(&vgrnd.lock);
return state;
}
@@ -100,27 +99,33 @@ static void vgetrandom_put_state(void *state)
{
if (!state)
return;
- pthread_mutex_lock(&grnd_allocator.lock);
- grnd_allocator.states[grnd_allocator.len++] = state;
- pthread_mutex_unlock(&grnd_allocator.lock);
+ pthread_mutex_lock(&vgrnd.lock);
+ vgrnd.states[vgrnd.len++] = state;
+ pthread_mutex_unlock(&vgrnd.lock);
}
static void vgetrandom_init(void)
{
- if (pthread_key_create(&grnd_ctx.key, vgetrandom_put_state) != 0)
- return;
+ const char *version = versions[VDSO_VERSION];
+ const char *name = names[VDSO_NAMES][6];
unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
+ size_t ret;
+
if (!sysinfo_ehdr) {
printf("AT_SYSINFO_EHDR is not present!\n");
exit(KSFT_SKIP);
}
vdso_init_from_sysinfo_ehdr(sysinfo_ehdr);
- grnd_ctx.fn = (__typeof__(grnd_ctx.fn))vdso_sym("LINUX_2.6", "__vdso_getrandom");
- if (!grnd_ctx.fn) {
- printf("__vdso_getrandom is missing!\n");
+ vgrnd.fn = (__typeof__(vgrnd.fn))vdso_sym(version, name);
+ if (!vgrnd.fn) {
+ printf("%s is missing!\n", name);
exit(KSFT_FAIL);
}
- if (grnd_ctx.fn(NULL, 0, 0, &grnd_ctx.params, ~0UL) != 0) {
+ ret = VDSO_CALL(vgrnd.fn, 5, NULL, 0, 0, &vgrnd.params, ~0UL);
+ if (ret == -ENOSYS) {
+ printf("unsupported architecture\n");
+ exit(KSFT_SKIP);
+ } else if (ret) {
printf("failed to fetch vgetrandom params!\n");
exit(KSFT_FAIL);
}
@@ -128,27 +133,21 @@ static void vgetrandom_init(void)
static ssize_t vgetrandom(void *buf, size_t len, unsigned long flags)
{
- void *state;
+ static __thread void *state;
- pthread_once(&grnd_ctx.initialized, vgetrandom_init);
- state = pthread_getspecific(grnd_ctx.key);
if (!state) {
state = vgetrandom_get_state();
- if (pthread_setspecific(grnd_ctx.key, state) != 0) {
- vgetrandom_put_state(state);
- state = NULL;
- }
if (!state) {
printf("vgetrandom_get_state failed!\n");
exit(KSFT_FAIL);
}
}
- return grnd_ctx.fn(buf, len, flags, state, grnd_ctx.params.size_of_opaque_state);
+ return VDSO_CALL(vgrnd.fn, 5, buf, len, flags, state, vgrnd.params.size_of_opaque_state);
}
enum { TRIALS = 25000000, THREADS = 256 };
-static void *test_vdso_getrandom(void *)
+static void *test_vdso_getrandom(void *ctx)
{
for (size_t i = 0; i < TRIALS; ++i) {
unsigned int val;
@@ -158,7 +157,7 @@ static void *test_vdso_getrandom(void *)
return NULL;
}
-static void *test_libc_getrandom(void *)
+static void *test_libc_getrandom(void *ctx)
{
for (size_t i = 0; i < TRIALS; ++i) {
unsigned int val;
@@ -168,7 +167,7 @@ static void *test_libc_getrandom(void *)
return NULL;
}
-static void *test_syscall_getrandom(void *)
+static void *test_syscall_getrandom(void *ctx)
{
for (size_t i = 0; i < TRIALS; ++i) {
unsigned int val;
@@ -244,9 +243,10 @@ static void fill(void)
static void kselftest(void)
{
uint8_t weird_size[1263];
+ pid_t child;
ksft_print_header();
- ksft_set_plan(1);
+ ksft_set_plan(2);
for (size_t i = 0; i < 1000; ++i) {
ssize_t ret = vgetrandom(weird_size, sizeof(weird_size), 0);
@@ -255,6 +255,42 @@ static void kselftest(void)
}
ksft_test_result_pass("getrandom: PASS\n");
+
+ unshare(CLONE_NEWUSER);
+ assert(unshare(CLONE_NEWTIME) == 0);
+ child = fork();
+ assert(child >= 0);
+ if (!child) {
+ vgetrandom_init();
+ child = getpid();
+ assert(ptrace(PTRACE_TRACEME, 0, NULL, NULL) == 0);
+ assert(kill(child, SIGSTOP) == 0);
+ assert(vgetrandom(weird_size, sizeof(weird_size), 0) == sizeof(weird_size));
+ _exit(0);
+ }
+ for (;;) {
+ struct ptrace_syscall_info info = { 0 };
+ int status, ret;
+ assert(waitpid(child, &status, 0) >= 0);
+ if (WIFEXITED(status)) {
+ if (WEXITSTATUS(status) != 0)
+ exit(KSFT_FAIL);
+ break;
+ }
+ assert(WIFSTOPPED(status));
+ if (WSTOPSIG(status) == SIGSTOP)
+ assert(ptrace(PTRACE_SETOPTIONS, child, 0, PTRACE_O_TRACESYSGOOD) == 0);
+ else if (WSTOPSIG(status) == (SIGTRAP | 0x80)) {
+ assert(ptrace(PTRACE_GET_SYSCALL_INFO, child, sizeof(info), &info) > 0);
+ if (info.op == PTRACE_SYSCALL_INFO_ENTRY && info.entry.nr == __NR_getrandom &&
+ info.entry.args[0] == (uintptr_t)weird_size && info.entry.args[1] == sizeof(weird_size))
+ exit(KSFT_FAIL);
+ }
+ assert(ptrace(PTRACE_SYSCALL, child, 0, 0) == 0);
+ }
+
+ ksft_test_result_pass("getrandom timens: PASS\n");
+
exit(KSFT_PASS);
}
@@ -265,6 +301,8 @@ static void usage(const char *argv0)
int main(int argc, char *argv[])
{
+ vgetrandom_init();
+
if (argc == 1) {
kselftest();
return 0;
diff --git a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
index ee4f1ca56a71..e31b18ffae33 100644
--- a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
+++ b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
@@ -19,6 +19,7 @@
#include "../kselftest.h"
#include "parse_vdso.h"
#include "vdso_config.h"
+#include "vdso_call.h"
int main(int argc, char **argv)
{
@@ -43,7 +44,7 @@ int main(int argc, char **argv)
}
struct timeval tv;
- long ret = gtod(&tv, 0);
+ long ret = VDSO_CALL(gtod, 2, &tv, 0);
if (ret == 0) {
printf("The time is %lld.%06lld\n",
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 5c8757a25998..d51249f14e2f 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -77,7 +77,7 @@ all_32: $(BINARIES_32)
all_64: $(BINARIES_64)
-EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64)
+EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64) srso
$(BINARIES_32): $(OUTPUT)/%_32: %.c helpers.h
$(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $< $(EXTRA_FILES) -lrt -ldl -lm
diff --git a/tools/testing/selftests/x86/srso.c b/tools/testing/selftests/x86/srso.c
new file mode 100644
index 000000000000..394ec8bdeb00
--- /dev/null
+++ b/tools/testing/selftests/x86/srso.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/perf_event.h>
+#include <cpuid.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+int main(void)
+{
+ struct perf_event_attr ret_attr, mret_attr;
+ long long count_rets, count_rets_mispred;
+ int rrets_fd, mrrets_fd;
+ unsigned int cpuid1_eax, b, c, d;
+
+ __cpuid(1, cpuid1_eax, b, c, d);
+
+ if (cpuid1_eax < 0x00800f00 ||
+ cpuid1_eax > 0x00afffff) {
+ fprintf(stderr, "This needs to run on a Zen[1-4] machine (CPUID(1).EAX: 0x%x). Exiting...\n", cpuid1_eax);
+ exit(EXIT_FAILURE);
+ }
+
+ memset(&ret_attr, 0, sizeof(struct perf_event_attr));
+ memset(&mret_attr, 0, sizeof(struct perf_event_attr));
+
+ ret_attr.type = mret_attr.type = PERF_TYPE_RAW;
+ ret_attr.size = mret_attr.size = sizeof(struct perf_event_attr);
+ ret_attr.config = 0xc8;
+ mret_attr.config = 0xc9;
+ ret_attr.disabled = mret_attr.disabled = 1;
+ ret_attr.exclude_user = mret_attr.exclude_user = 1;
+ ret_attr.exclude_hv = mret_attr.exclude_hv = 1;
+
+ rrets_fd = syscall(SYS_perf_event_open, &ret_attr, 0, -1, -1, 0);
+ if (rrets_fd == -1) {
+ perror("opening retired RETs fd");
+ exit(EXIT_FAILURE);
+ }
+
+ mrrets_fd = syscall(SYS_perf_event_open, &mret_attr, 0, -1, -1, 0);
+ if (mrrets_fd == -1) {
+ perror("opening retired mispredicted RETs fd");
+ exit(EXIT_FAILURE);
+ }
+
+ ioctl(rrets_fd, PERF_EVENT_IOC_RESET, 0);
+ ioctl(mrrets_fd, PERF_EVENT_IOC_RESET, 0);
+
+ ioctl(rrets_fd, PERF_EVENT_IOC_ENABLE, 0);
+ ioctl(mrrets_fd, PERF_EVENT_IOC_ENABLE, 0);
+
+ printf("Sleeping for 10 seconds\n");
+ sleep(10);
+
+ ioctl(rrets_fd, PERF_EVENT_IOC_DISABLE, 0);
+ ioctl(mrrets_fd, PERF_EVENT_IOC_DISABLE, 0);
+
+ read(rrets_fd, &count_rets, sizeof(long long));
+ read(mrrets_fd, &count_rets_mispred, sizeof(long long));
+
+ printf("RETs: (%lld retired <-> %lld mispredicted)\n",
+ count_rets, count_rets_mispred);
+ printf("SRSO Safe-RET mitigation works correctly if both counts are almost equal.\n");
+
+ return 0;
+}
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 554b290fefdc..a3d448a075e3 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -139,7 +139,7 @@ int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_po
}
/* Connect to <cid, port> and return the file descriptor. */
-static int vsock_connect(unsigned int cid, unsigned int port, int type)
+int vsock_connect(unsigned int cid, unsigned int port, int type)
{
union {
struct sockaddr sa;
@@ -226,8 +226,8 @@ static int vsock_listen(unsigned int cid, unsigned int port, int type)
/* Listen on <cid, port> and return the first incoming connection. The remote
* address is stored to clientaddrp. clientaddrp may be NULL.
*/
-static int vsock_accept(unsigned int cid, unsigned int port,
- struct sockaddr_vm *clientaddrp, int type)
+int vsock_accept(unsigned int cid, unsigned int port,
+ struct sockaddr_vm *clientaddrp, int type)
{
union {
struct sockaddr sa;
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index e95e62485959..fff22d4a14c0 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -39,6 +39,9 @@ struct test_case {
void init_signals(void);
unsigned int parse_cid(const char *str);
unsigned int parse_port(const char *str);
+int vsock_connect(unsigned int cid, unsigned int port, int type);
+int vsock_accept(unsigned int cid, unsigned int port,
+ struct sockaddr_vm *clientaddrp, int type);
int vsock_stream_connect(unsigned int cid, unsigned int port);
int vsock_bind_connect(unsigned int cid, unsigned int port,
unsigned int bind_port, int type);
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index f851f8961247..8d38dbf8f41f 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -20,6 +20,8 @@
#include <sys/mman.h>
#include <poll.h>
#include <signal.h>
+#include <sys/ioctl.h>
+#include <linux/sockios.h>
#include "vsock_test_zerocopy.h"
#include "timeout.h"
@@ -1238,6 +1240,79 @@ static void test_double_bind_connect_client(const struct test_opts *opts)
}
}
+#define MSG_BUF_IOCTL_LEN 64
+static void test_unsent_bytes_server(const struct test_opts *opts, int type)
+{
+ unsigned char buf[MSG_BUF_IOCTL_LEN];
+ int client_fd;
+
+ client_fd = vsock_accept(VMADDR_CID_ANY, opts->peer_port, NULL, type);
+ if (client_fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ recv_buf(client_fd, buf, sizeof(buf), 0, sizeof(buf));
+ control_writeln("RECEIVED");
+
+ close(client_fd);
+}
+
+static void test_unsent_bytes_client(const struct test_opts *opts, int type)
+{
+ unsigned char buf[MSG_BUF_IOCTL_LEN];
+ int ret, fd, sock_bytes_unsent;
+
+ fd = vsock_connect(opts->peer_cid, opts->peer_port, type);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ for (int i = 0; i < sizeof(buf); i++)
+ buf[i] = rand() & 0xFF;
+
+ send_buf(fd, buf, sizeof(buf), 0, sizeof(buf));
+ control_expectln("RECEIVED");
+
+ ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent);
+ if (ret < 0) {
+ if (errno == EOPNOTSUPP) {
+ fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
+ } else {
+ perror("ioctl");
+ exit(EXIT_FAILURE);
+ }
+ } else if (ret == 0 && sock_bytes_unsent != 0) {
+ fprintf(stderr,
+ "Unexpected 'SIOCOUTQ' value, expected 0, got %i\n",
+ sock_bytes_unsent);
+ exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+}
+
+static void test_stream_unsent_bytes_client(const struct test_opts *opts)
+{
+ test_unsent_bytes_client(opts, SOCK_STREAM);
+}
+
+static void test_stream_unsent_bytes_server(const struct test_opts *opts)
+{
+ test_unsent_bytes_server(opts, SOCK_STREAM);
+}
+
+static void test_seqpacket_unsent_bytes_client(const struct test_opts *opts)
+{
+ test_unsent_bytes_client(opts, SOCK_SEQPACKET);
+}
+
+static void test_seqpacket_unsent_bytes_server(const struct test_opts *opts)
+{
+ test_unsent_bytes_server(opts, SOCK_SEQPACKET);
+}
+
#define RCVLOWAT_CREDIT_UPD_BUF_SIZE (1024 * 128)
/* This define is the same as in 'include/linux/virtio_vsock.h':
* it is used to decide when to send credit update message during
@@ -1523,6 +1598,16 @@ static struct test_case test_cases[] = {
.run_client = test_stream_rcvlowat_def_cred_upd_client,
.run_server = test_stream_cred_upd_on_low_rx_bytes,
},
+ {
+ .name = "SOCK_STREAM ioctl(SIOCOUTQ) 0 unsent bytes",
+ .run_client = test_stream_unsent_bytes_client,
+ .run_server = test_stream_unsent_bytes_server,
+ },
+ {
+ .name = "SOCK_SEQPACKET ioctl(SIOCOUTQ) 0 unsent bytes",
+ .run_client = test_seqpacket_unsent_bytes_client,
+ .run_server = test_seqpacket_unsent_bytes_server,
+ },
{},
};