summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile13
-rw-r--r--tools/accounting/getdelays.c67
-rw-r--r--tools/accounting/procacct.c5
-rw-r--r--tools/arch/arm/include/uapi/asm/kvm.h1
-rw-r--r--tools/arch/arm64/include/asm/sysreg.h440
-rw-r--r--tools/arch/arm64/include/uapi/asm/kvm.h12
-rw-r--r--tools/arch/arm64/tools/Makefile6
-rw-r--r--tools/arch/x86/include/asm/amd-ibs.h3
-rw-r--r--tools/arch/x86/include/asm/asm.h8
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h9
-rw-r--r--tools/arch/x86/include/asm/disabled-features.h161
-rw-r--r--tools/arch/x86/include/asm/msr-index.h2
-rw-r--r--tools/arch/x86/include/asm/nops.h2
-rw-r--r--tools/arch/x86/include/asm/orc_types.h4
-rw-r--r--tools/arch/x86/include/asm/pvclock-abi.h4
-rw-r--r--tools/arch/x86/include/asm/required-features.h105
-rw-r--r--tools/bootconfig/main.c4
-rw-r--r--tools/bpf/Makefile6
-rw-r--r--tools/bpf/bpftool/Documentation/Makefile6
-rw-r--r--tools/bpf/bpftool/Makefile13
-rw-r--r--tools/bpf/bpftool/btf.c14
-rw-r--r--tools/bpf/bpftool/btf_dumper.c2
-rw-r--r--tools/bpf/bpftool/cgroup.c2
-rw-r--r--tools/bpf/bpftool/common.c7
-rw-r--r--tools/bpf/bpftool/gen.c12
-rw-r--r--tools/bpf/bpftool/jit_disasm.c3
-rw-r--r--tools/bpf/bpftool/link.c14
-rw-r--r--tools/bpf/bpftool/main.c8
-rw-r--r--tools/bpf/bpftool/map.c14
-rw-r--r--tools/bpf/bpftool/map_perf_ring.c6
-rw-r--r--tools/bpf/bpftool/net.c4
-rw-r--r--tools/bpf/bpftool/netlink_dumper.c6
-rw-r--r--tools/bpf/bpftool/prog.c13
-rw-r--r--tools/bpf/bpftool/tracelog.c2
-rw-r--r--tools/bpf/bpftool/xlated_dumper.c6
-rw-r--r--tools/bpf/resolve_btfids/Makefile2
-rw-r--r--tools/bpf/runqslower/Makefile8
-rw-r--r--tools/build/Makefile8
-rw-r--r--tools/build/Makefile.feature3
-rw-r--r--tools/build/feature/Makefile3
-rwxr-xr-xtools/debugging/kernel-chktaint8
-rw-r--r--tools/include/linux/bits.h2
-rw-r--r--tools/include/linux/kasan-tags.h15
-rw-r--r--tools/include/nolibc/Makefile2
-rw-r--r--tools/include/nolibc/arch-mips.h1
-rw-r--r--tools/include/nolibc/arch-s390.h9
-rw-r--r--tools/include/nolibc/arch.h2
-rw-r--r--tools/include/nolibc/crt.h2
-rw-r--r--tools/include/nolibc/dirent.h98
-rw-r--r--tools/include/nolibc/errno.h2
-rw-r--r--tools/include/nolibc/limits.h7
-rw-r--r--tools/include/nolibc/nolibc.h4
-rw-r--r--tools/include/nolibc/signal.h1
-rw-r--r--tools/include/nolibc/stackprotector.h2
-rw-r--r--tools/include/nolibc/stdio.h98
-rw-r--r--tools/include/nolibc/stdlib.h1
-rw-r--r--tools/include/nolibc/string.h4
-rw-r--r--tools/include/nolibc/sys.h83
-rw-r--r--tools/include/uapi/asm-generic/socket.h21
-rw-r--r--tools/include/uapi/linux/bpf.h40
-rw-r--r--tools/include/uapi/linux/btf.h3
-rw-r--r--tools/include/uapi/linux/const.h2
-rw-r--r--tools/include/uapi/linux/elf.h524
-rw-r--r--tools/include/uapi/linux/if_xdp.h10
-rw-r--r--tools/include/uapi/linux/netdev.h16
-rw-r--r--tools/lib/bpf/Makefile13
-rw-r--r--tools/lib/bpf/bpf.c3
-rw-r--r--tools/lib/bpf/bpf.h3
-rw-r--r--tools/lib/bpf/btf.c105
-rw-r--r--tools/lib/bpf/btf.h3
-rw-r--r--tools/lib/bpf/btf_dump.c5
-rw-r--r--tools/lib/bpf/libbpf.c237
-rw-r--r--tools/lib/bpf/libbpf.h13
-rw-r--r--tools/lib/bpf/libbpf.map3
-rw-r--r--tools/lib/bpf/libbpf_internal.h1
-rw-r--r--tools/lib/bpf/linker.c2
-rw-r--r--tools/lib/bpf/relo_core.c24
-rw-r--r--tools/lib/bpf/str_error.c2
-rw-r--r--tools/lib/bpf/str_error.h7
-rw-r--r--tools/lib/bpf/usdt.bpf.h32
-rw-r--r--tools/lib/perf/Makefile13
-rw-r--r--tools/lib/thermal/Makefile13
-rw-r--r--tools/memory-model/Documentation/glossary.txt32
-rw-r--r--tools/memory-model/Documentation/herd-representation.txt49
-rw-r--r--tools/memory-model/README4
-rw-r--r--tools/memory-model/linux-kernel.bell33
-rw-r--r--tools/memory-model/linux-kernel.cat10
-rw-r--r--tools/memory-model/linux-kernel.cfg1
-rw-r--r--tools/memory-model/linux-kernel.def169
-rw-r--r--tools/mm/page-types.c4
-rw-r--r--tools/net/ynl/Makefile.deps5
-rw-r--r--tools/net/ynl/lib/ynl.c2
-rw-r--r--tools/net/ynl/pyynl/lib/ynl.py46
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_c.py36
-rw-r--r--tools/objtool/Documentation/objtool.txt105
-rw-r--r--tools/objtool/Makefile8
-rw-r--r--tools/objtool/arch/loongarch/decode.c28
-rw-r--r--tools/objtool/arch/loongarch/include/arch/elf.h7
-rw-r--r--tools/objtool/arch/loongarch/special.c159
-rw-r--r--tools/objtool/arch/powerpc/decode.c24
-rw-r--r--tools/objtool/arch/x86/decode.c14
-rw-r--r--tools/objtool/builtin-check.c208
-rw-r--r--tools/objtool/check.c112
-rw-r--r--tools/objtool/elf.c3
-rw-r--r--tools/objtool/include/objtool/arch.h3
-rw-r--r--tools/objtool/include/objtool/builtin.h3
-rw-r--r--tools/objtool/include/objtool/elf.h2
-rw-r--r--tools/objtool/include/objtool/special.h2
-rw-r--r--tools/objtool/include/objtool/warn.h20
-rw-r--r--tools/objtool/noreturns.h4
-rw-r--r--tools/objtool/objtool.c78
-rw-r--r--tools/objtool/orc_dump.c7
-rw-r--r--tools/pci/Build1
-rw-r--r--tools/pci/Makefile58
-rw-r--r--tools/pci/pcitest.c250
-rw-r--r--tools/pci/pcitest.sh72
-rw-r--r--tools/perf/Makefile.perf41
-rw-r--r--tools/perf/builtin-trace.c6
-rwxr-xr-xtools/perf/check-headers.sh2
-rwxr-xr-xtools/perf/tests/shell/trace_btf_enum.sh8
-rw-r--r--tools/perf/util/annotate.c76
-rw-r--r--tools/perf/util/annotate.h15
-rw-r--r--tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c11
-rw-r--r--tools/perf/util/cpumap.c4
-rw-r--r--tools/perf/util/disasm.c83
-rw-r--r--tools/power/cpupower/Makefile19
-rw-r--r--tools/power/cpupower/bench/parse.c4
-rw-r--r--tools/power/cpupower/lib/cpupower.c48
-rw-r--r--tools/power/cpupower/lib/cpupower.h3
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c48
-rw-r--r--tools/power/pm-graph/config/custom-timeline-functions.cfg4
-rwxr-xr-xtools/power/pm-graph/sleepgraph.py6
-rw-r--r--tools/power/x86/intel-speed-select/Makefile2
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c22
-rw-r--r--tools/power/x86/intel-speed-select/isst-display.c11
-rw-r--r--tools/power/x86/turbostat/turbostat.832
-rw-r--r--tools/power/x86/turbostat/turbostat.c654
-rw-r--r--tools/scripts/Makefile.include33
-rwxr-xr-xtools/sound/dapm-graph2
-rw-r--r--tools/testing/crypto/chacha20-s390/test-cipher.c4
-rw-r--r--tools/testing/cxl/Kbuild1
-rw-r--r--tools/testing/cxl/test/cxl.c2
-rw-r--r--tools/testing/cxl/test/mem.c208
-rw-r--r--tools/testing/cxl/test/mock.c6
-rwxr-xr-xtools/testing/ktest/ktest.pl8
-rw-r--r--tools/testing/kunit/configs/all_tests.config5
-rw-r--r--tools/testing/kunit/kunit_kernel.py4
-rw-r--r--tools/testing/kunit/kunit_parser.py9
-rwxr-xr-xtools/testing/kunit/kunit_tool_test.py11
-rw-r--r--tools/testing/kunit/qemu_configs/sparc.py5
-rw-r--r--tools/testing/kunit/qemu_configs/x86_64.py4
-rw-r--r--tools/testing/radix-tree/multiorder.c4
-rw-r--r--tools/testing/selftests/Makefile4
-rw-r--r--tools/testing/selftests/arm64/fp/kernel-test.c1
-rw-r--r--tools/testing/selftests/arm64/mte/check_hugetlb_options.c19
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch649
-rw-r--r--tools/testing/selftests/bpf/Makefile28
-rw-r--r--tools/testing/selftests/bpf/Makefile.docs6
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_spin_lock.h533
-rw-r--r--tools/testing/selftests/bpf/bpf_atomic.h140
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h15
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h5
-rw-r--r--tools/testing/selftests/bpf/cap_helpers.c8
-rw-r--r--tools/testing/selftests/bpf/cap_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c62
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c139
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h24
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_atomics.c66
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c108
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c68
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_nf.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c23
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c147
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c128
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c13
-rw-r--r--tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c107
-rw-r--r--tools/testing/selftests/bpf/prog_tests/compute_live_registers.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cpumask.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c21
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fd_array.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c162
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kernel_flag.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_helpers.h29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c540
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c176
-rw-r--r--tools/testing/selftests/bpf/prog_tests/net_timestamping.c239
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netns_cookie.c21
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c47
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prepare.c99
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pro_epilogue.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/read_vsyscall.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/res_spin_lock.c98
-rw-r--r--tools/testing/selftests/bpf/prog_tests/setget_sockopt.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c129
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_strp.c454
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spin_lock.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/summarization.c144
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_links.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_opts.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tunnel.c633
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_veristat.c139
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c638
-rw-r--r--tools/testing/selftests/bpf/prog_tests/token.c97
-rw-r--r--tools/testing/selftests/bpf/prog_tests/usdt.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c145
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_vlan.c175
-rw-r--r--tools/testing/selftests/bpf/progs/arena_atomics.c121
-rw-r--r--tools/testing/selftests/bpf/progs/arena_spin_lock.c51
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tasks.c110
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h22
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h5
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c3
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_preorder.c41
-rw-r--r--tools/testing/selftests/bpf/progs/changes_pkt_data.c39
-rw-r--r--tools/testing/selftests/bpf/progs/compute_live_registers.c424
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_dropper.c4
-rw-r--r--tools/testing/selftests/bpf/progs/core_reloc_types.h10
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_common.h1
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_failure.c38
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_success.c120
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c123
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_noreturns.c15
-rw-r--r--tools/testing/selftests/bpf/progs/find_vma.c2
-rw-r--r--tools/testing/selftests/bpf/progs/irq.c124
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c139
-rw-r--r--tools/testing/selftests/bpf/progs/net_timestamping.c248
-rw-r--r--tools/testing/selftests/bpf/progs/netns_cookie_prog.c9
-rw-r--r--tools/testing/selftests/bpf/progs/preempt_lock.c68
-rw-r--r--tools/testing/selftests/bpf/progs/prepare.c28
-rw-r--r--tools/testing/selftests/bpf/progs/priv_freplace_prog.c13
-rw-r--r--tools/testing/selftests/bpf/progs/priv_prog.c6
-rw-r--r--tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c88
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_read_lock.c61
-rw-r--r--tools/testing/selftests/bpf/progs/read_vsyscall.c11
-rw-r--r--tools/testing/selftests/bpf/progs/res_spin_lock.c143
-rw-r--r--tools/testing/selftests/bpf/progs/res_spin_lock_fail.c244
-rw-r--r--tools/testing/selftests/bpf/progs/set_global_vars.c47
-rw-r--r--tools/testing/selftests/bpf/progs/setget_sockopt.c3
-rw-r--r--tools/testing/selftests/bpf/progs/strncmp_bench.c5
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c30
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c26
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c34
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c25
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c30
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted.c31
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c39
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c22
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c36
-rw-r--r--tools/testing/selftests/bpf/progs/summarization.c78
-rw-r--r--tools/testing/selftests/bpf/progs/summarization_freplace.c (renamed from tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c)17
-rw-r--r--tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_get_xattr.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_kernel_flag.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_lookup_key.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_ptr_untrusted.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_set_remove_xattr.c133
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_strp.c53
-rw-r--r--tools/testing/selftests/bpf/progs/test_spin_lock_fail.c69
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_under_cgroup.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_usdt.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_meta.c53
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_vlan.c20
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_array_access.c15
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c58
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_gotol.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_load_acquire.c218
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_may_goto_1.c34
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_precision.c49
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_stack_ptr.c52
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_store_release.c286
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_redirect_map.c88
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c41
-rw-r--r--tools/testing/selftests/bpf/test_btf.h6
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.c108
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.h6
-rw-r--r--tools/testing/selftests/bpf/test_loader.c32
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_ip_encap.sh476
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_seg6local.sh156
-rw-r--r--tools/testing/selftests/bpf/test_maps.c9
-rw-r--r--tools/testing/selftests/bpf/test_progs.c72
-rw-r--r--tools/testing/selftests/bpf/test_progs.h8
-rwxr-xr-xtools/testing/selftests/bpf/test_tunnel.sh645
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect_multi.sh214
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_vlan.sh233
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh9
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh9
-rw-r--r--tools/testing/selftests/bpf/veristat.c367
-rwxr-xr-xtools/testing/selftests/bpf/with_addr.sh54
-rwxr-xr-xtools/testing/selftests/bpf/with_tunnels.sh36
-rw-r--r--tools/testing/selftests/bpf/xdp_hw_metadata.c168
-rw-r--r--tools/testing/selftests/bpf/xdp_redirect_multi.c226
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_v1_hp.sh2
-rw-r--r--tools/testing/selftests/damon/.gitignore3
-rw-r--r--tools/testing/selftests/damon/Makefile11
-rw-r--r--tools/testing/selftests/damon/config1
-rwxr-xr-xtools/testing/selftests/damon/damon_nr_regions.py2
-rwxr-xr-xtools/testing/selftests/damon/damos_quota.py9
-rwxr-xr-xtools/testing/selftests/damon/damos_quota_goal.py3
-rwxr-xr-xtools/testing/selftests/damon/debugfs_attrs.sh17
-rwxr-xr-xtools/testing/selftests/damon/debugfs_duplicate_context_creation.sh27
-rwxr-xr-xtools/testing/selftests/damon/debugfs_empty_targets.sh21
-rwxr-xr-xtools/testing/selftests/damon/debugfs_huge_count_read_write.sh22
-rwxr-xr-xtools/testing/selftests/damon/debugfs_rm_non_contexts.sh19
-rwxr-xr-xtools/testing/selftests/damon/debugfs_schemes.sh19
-rwxr-xr-xtools/testing/selftests/damon/debugfs_target_ids.sh19
-rw-r--r--tools/testing/selftests/damon/debugfs_target_ids_pid_leak.c68
-rwxr-xr-xtools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh22
-rw-r--r--tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.c80
-rwxr-xr-xtools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh14
-rw-r--r--tools/testing/selftests/damon/huge_count_read_write.c46
-rw-r--r--tools/testing/selftests/drivers/net/.gitignore2
-rw-r--r--tools/testing/selftests/drivers/net/Makefile5
-rw-r--r--tools/testing/selftests/drivers/net/README.rst4
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_options.sh4
-rw-r--r--tools/testing/selftests/drivers/net/config1
-rwxr-xr-xtools/testing/selftests/drivers/net/hds.py144
-rw-r--r--tools/testing/selftests/drivers/net/hw/.gitignore2
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile11
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/csum.py50
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devmem.py6
-rw-r--r--tools/testing/selftests/drivers/net/hw/iou-zcrx.c457
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/iou-zcrx.py87
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/irq.py99
-rw-r--r--tools/testing/selftests/drivers/net/hw/ncdevmem.c1
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_ctx.py57
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_input_xfrm.py87
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/tso.py241
-rw-r--r--tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c13
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py139
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh24
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_fragmented_msg.sh122
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_sysdata.sh242
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/ping.py208
-rwxr-xr-xtools/testing/selftests/drivers/net/queues.py52
-rw-r--r--tools/testing/selftests/drivers/net/xdp_helper.c151
-rw-r--r--tools/testing/selftests/drivers/ntsync/.gitignore1
-rw-r--r--tools/testing/selftests/drivers/ntsync/Makefile7
-rw-r--r--tools/testing/selftests/drivers/ntsync/config1
-rw-r--r--tools/testing/selftests/drivers/ntsync/ntsync.c1343
-rwxr-xr-xtools/testing/selftests/efivarfs/efivarfs.sh168
-rw-r--r--tools/testing/selftests/exec/check-exec.c11
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/.gitignore2
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/Makefile6
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c516
-rw-r--r--tools/testing/selftests/filesystems/nsfs/iterate_mntns.c14
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/Makefile11
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c507
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/wrappers.h17
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount.h2
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test.c35
-rw-r--r--tools/testing/selftests/filesystems/utils.c501
-rw-r--r--tools/testing/selftests/filesystems/utils.h45
-rw-r--r--tools/testing/selftests/ftrace/.gitignore1
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc58
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc14
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc10
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc42
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc1
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions8
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc1
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc3
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc3
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc1
-rwxr-xr-xtools/testing/selftests/gpio/gpio-sim.sh31
-rw-r--r--tools/testing/selftests/hid/Makefile2
-rw-r--r--tools/testing/selftests/kselftest.h5
-rwxr-xr-xtools/testing/selftests/kselftest/module.sh2
-rw-r--r--tools/testing/selftests/kvm/.gitignore1
-rw-r--r--tools/testing/selftests/kvm/Makefile345
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm331
-rw-r--r--tools/testing/selftests/kvm/access_tracking_perf_test.c2
-rw-r--r--tools/testing/selftests/kvm/arm64/aarch32_id_regs.c (renamed from tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c)12
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer.c (renamed from tools/testing/selftests/kvm/aarch64/arch_timer.c)0
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c (renamed from tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c)0
-rw-r--r--tools/testing/selftests/kvm/arm64/debug-exceptions.c (renamed from tools/testing/selftests/kvm/aarch64/debug-exceptions.c)4
-rw-r--r--tools/testing/selftests/kvm/arm64/get-reg-list.c (renamed from tools/testing/selftests/kvm/aarch64/get-reg-list.c)1
-rw-r--r--tools/testing/selftests/kvm/arm64/hypercalls.c (renamed from tools/testing/selftests/kvm/aarch64/hypercalls.c)50
-rw-r--r--tools/testing/selftests/kvm/arm64/mmio_abort.c (renamed from tools/testing/selftests/kvm/aarch64/mmio_abort.c)0
-rw-r--r--tools/testing/selftests/kvm/arm64/no-vgic-v3.c (renamed from tools/testing/selftests/kvm/aarch64/no-vgic-v3.c)2
-rw-r--r--tools/testing/selftests/kvm/arm64/page_fault_test.c (renamed from tools/testing/selftests/kvm/aarch64/page_fault_test.c)0
-rw-r--r--tools/testing/selftests/kvm/arm64/psci_test.c (renamed from tools/testing/selftests/kvm/aarch64/psci_test.c)8
-rw-r--r--tools/testing/selftests/kvm/arm64/set_id_regs.c (renamed from tools/testing/selftests/kvm/aarch64/set_id_regs.c)62
-rw-r--r--tools/testing/selftests/kvm/arm64/smccc_filter.c (renamed from tools/testing/selftests/kvm/aarch64/smccc_filter.c)0
-rw-r--r--tools/testing/selftests/kvm/arm64/vcpu_width_config.c (renamed from tools/testing/selftests/kvm/aarch64/vcpu_width_config.c)0
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_init.c (renamed from tools/testing/selftests/kvm/aarch64/vgic_init.c)0
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_irq.c (renamed from tools/testing/selftests/kvm/aarch64/vgic_irq.c)0
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c (renamed from tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c)0
-rw-r--r--tools/testing/selftests/kvm/arm64/vpmu_counter_access.c (renamed from tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c)19
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c2
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c523
-rw-r--r--tools/testing/selftests/kvm/include/arm64/arch_timer.h (renamed from tools/testing/selftests/kvm/include/aarch64/arch_timer.h)0
-rw-r--r--tools/testing/selftests/kvm/include/arm64/delay.h (renamed from tools/testing/selftests/kvm/include/aarch64/delay.h)0
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic.h (renamed from tools/testing/selftests/kvm/include/aarch64/gic.h)0
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic_v3.h (renamed from tools/testing/selftests/kvm/include/aarch64/gic_v3.h)0
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic_v3_its.h (renamed from tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h)0
-rw-r--r--tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h (renamed from tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h)0
-rw-r--r--tools/testing/selftests/kvm/include/arm64/processor.h (renamed from tools/testing/selftests/kvm/include/aarch64/processor.h)0
-rw-r--r--tools/testing/selftests/kvm/include/arm64/spinlock.h (renamed from tools/testing/selftests/kvm/include/aarch64/spinlock.h)0
-rw-r--r--tools/testing/selftests/kvm/include/arm64/ucall.h (renamed from tools/testing/selftests/kvm/include/aarch64/ucall.h)0
-rw-r--r--tools/testing/selftests/kvm/include/arm64/vgic.h (renamed from tools/testing/selftests/kvm/include/aarch64/vgic.h)0
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h43
-rw-r--r--tools/testing/selftests/kvm/include/s390/debug_print.h (renamed from tools/testing/selftests/kvm/include/s390x/debug_print.h)0
-rw-r--r--tools/testing/selftests/kvm/include/s390/diag318_test_handler.h (renamed from tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h)0
-rw-r--r--tools/testing/selftests/kvm/include/s390/facility.h (renamed from tools/testing/selftests/kvm/include/s390x/facility.h)0
-rw-r--r--tools/testing/selftests/kvm/include/s390/kvm_util_arch.h (renamed from tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h)0
-rw-r--r--tools/testing/selftests/kvm/include/s390/processor.h (renamed from tools/testing/selftests/kvm/include/s390x/processor.h)0
-rw-r--r--tools/testing/selftests/kvm/include/s390/sie.h (renamed from tools/testing/selftests/kvm/include/s390x/sie.h)0
-rw-r--r--tools/testing/selftests/kvm/include/s390/ucall.h (renamed from tools/testing/selftests/kvm/include/s390x/ucall.h)0
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h2
-rw-r--r--tools/testing/selftests/kvm/include/x86/apic.h (renamed from tools/testing/selftests/kvm/include/x86_64/apic.h)2
-rw-r--r--tools/testing/selftests/kvm/include/x86/evmcs.h (renamed from tools/testing/selftests/kvm/include/x86_64/evmcs.h)3
-rw-r--r--tools/testing/selftests/kvm/include/x86/hyperv.h (renamed from tools/testing/selftests/kvm/include/x86_64/hyperv.h)3
-rw-r--r--tools/testing/selftests/kvm/include/x86/kvm_util_arch.h (renamed from tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h)0
-rw-r--r--tools/testing/selftests/kvm/include/x86/mce.h (renamed from tools/testing/selftests/kvm/include/x86_64/mce.h)2
-rw-r--r--tools/testing/selftests/kvm/include/x86/pmu.h (renamed from tools/testing/selftests/kvm/include/x86_64/pmu.h)0
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h (renamed from tools/testing/selftests/kvm/include/x86_64/processor.h)77
-rw-r--r--tools/testing/selftests/kvm/include/x86/sev.h (renamed from tools/testing/selftests/kvm/include/x86_64/sev.h)0
-rw-r--r--tools/testing/selftests/kvm/include/x86/svm.h (renamed from tools/testing/selftests/kvm/include/x86_64/svm.h)6
-rw-r--r--tools/testing/selftests/kvm/include/x86/svm_util.h (renamed from tools/testing/selftests/kvm/include/x86_64/svm_util.h)3
-rw-r--r--tools/testing/selftests/kvm/include/x86/ucall.h (renamed from tools/testing/selftests/kvm/include/x86_64/ucall.h)0
-rw-r--r--tools/testing/selftests/kvm/include/x86/vmx.h (renamed from tools/testing/selftests/kvm/include/x86_64/vmx.h)2
-rw-r--r--tools/testing/selftests/kvm/kvm_create_max_vcpus.c28
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic.c (renamed from tools/testing/selftests/kvm/lib/aarch64/gic.c)0
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_private.h (renamed from tools/testing/selftests/kvm/lib/aarch64/gic_private.h)0
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3.c (renamed from tools/testing/selftests/kvm/lib/aarch64/gic_v3.c)0
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c (renamed from tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c)0
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/handlers.S (renamed from tools/testing/selftests/kvm/lib/aarch64/handlers.S)0
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/processor.c (renamed from tools/testing/selftests/kvm/lib/aarch64/processor.c)8
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/spinlock.c (renamed from tools/testing/selftests/kvm/lib/aarch64/spinlock.c)0
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/ucall.c (renamed from tools/testing/selftests/kvm/lib/aarch64/ucall.c)0
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/vgic.c (renamed from tools/testing/selftests/kvm/lib/aarch64/vgic.c)0
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c117
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c66
-rw-r--r--tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c (renamed from tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c)0
-rw-r--r--tools/testing/selftests/kvm/lib/s390/facility.c (renamed from tools/testing/selftests/kvm/lib/s390x/facility.c)0
-rw-r--r--tools/testing/selftests/kvm/lib/s390/processor.c (renamed from tools/testing/selftests/kvm/lib/s390x/processor.c)0
-rw-r--r--tools/testing/selftests/kvm/lib/s390/ucall.c (renamed from tools/testing/selftests/kvm/lib/s390x/ucall.c)0
-rw-r--r--tools/testing/selftests/kvm/lib/userfaultfd_util.c2
-rw-r--r--tools/testing/selftests/kvm/lib/x86/apic.c (renamed from tools/testing/selftests/kvm/lib/x86_64/apic.c)0
-rw-r--r--tools/testing/selftests/kvm/lib/x86/handlers.S (renamed from tools/testing/selftests/kvm/lib/x86_64/handlers.S)0
-rw-r--r--tools/testing/selftests/kvm/lib/x86/hyperv.c (renamed from tools/testing/selftests/kvm/lib/x86_64/hyperv.c)0
-rw-r--r--tools/testing/selftests/kvm/lib/x86/memstress.c (renamed from tools/testing/selftests/kvm/lib/x86_64/memstress.c)2
-rw-r--r--tools/testing/selftests/kvm/lib/x86/pmu.c (renamed from tools/testing/selftests/kvm/lib/x86_64/pmu.c)0
-rw-r--r--tools/testing/selftests/kvm/lib/x86/processor.c (renamed from tools/testing/selftests/kvm/lib/x86_64/processor.c)2
-rw-r--r--tools/testing/selftests/kvm/lib/x86/sev.c (renamed from tools/testing/selftests/kvm/lib/x86_64/sev.c)0
-rw-r--r--tools/testing/selftests/kvm/lib/x86/svm.c (renamed from tools/testing/selftests/kvm/lib/x86_64/svm.c)1
-rw-r--r--tools/testing/selftests/kvm/lib/x86/ucall.c (renamed from tools/testing/selftests/kvm/lib/x86_64/ucall.c)0
-rw-r--r--tools/testing/selftests/kvm/lib/x86/vmx.c (renamed from tools/testing/selftests/kvm/lib/x86_64/vmx.c)2
-rw-r--r--tools/testing/selftests/kvm/mmu_stress_test.c (renamed from tools/testing/selftests/kvm/max_guest_memory_test.c)167
-rw-r--r--tools/testing/selftests/kvm/riscv/arch_timer.c2
-rw-r--r--tools/testing/selftests/kvm/riscv/ebreak_test.c2
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c18
-rw-r--r--tools/testing/selftests/kvm/riscv/sbi_pmu_test.c83
-rw-r--r--tools/testing/selftests/kvm/s390/cmma_test.c (renamed from tools/testing/selftests/kvm/s390x/cmma_test.c)4
-rw-r--r--tools/testing/selftests/kvm/s390/config (renamed from tools/testing/selftests/kvm/s390x/config)0
-rw-r--r--tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c (renamed from tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c)0
-rw-r--r--tools/testing/selftests/kvm/s390/debug_test.c (renamed from tools/testing/selftests/kvm/s390x/debug_test.c)0
-rw-r--r--tools/testing/selftests/kvm/s390/memop.c (renamed from tools/testing/selftests/kvm/s390x/memop.c)0
-rw-r--r--tools/testing/selftests/kvm/s390/resets.c (renamed from tools/testing/selftests/kvm/s390x/resets.c)2
-rw-r--r--tools/testing/selftests/kvm/s390/shared_zeropage_test.c (renamed from tools/testing/selftests/kvm/s390x/shared_zeropage_test.c)0
-rw-r--r--tools/testing/selftests/kvm/s390/sync_regs_test.c (renamed from tools/testing/selftests/kvm/s390x/sync_regs_test.c)0
-rw-r--r--tools/testing/selftests/kvm/s390/tprot.c (renamed from tools/testing/selftests/kvm/s390x/tprot.c)0
-rw-r--r--tools/testing/selftests/kvm/s390/ucontrol_test.c (renamed from tools/testing/selftests/kvm/s390x/ucontrol_test.c)32
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c59
-rw-r--r--tools/testing/selftests/kvm/steal_time.c3
-rw-r--r--tools/testing/selftests/kvm/x86/amx_test.c (renamed from tools/testing/selftests/kvm/x86_64/amx_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/apic_bus_clock_test.c (renamed from tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/cpuid_test.c (renamed from tools/testing/selftests/kvm/x86_64/cpuid_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c (renamed from tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/debug_regs.c (renamed from tools/testing/selftests/kvm/x86_64/debug_regs.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c (renamed from tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c)6
-rw-r--r--tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c (renamed from tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/feature_msrs_test.c (renamed from tools/testing/selftests/kvm/x86_64/feature_msrs_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/fix_hypercall_test.c (renamed from tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/flds_emulation.h (renamed from tools/testing/selftests/kvm/x86_64/flds_emulation.h)0
-rw-r--r--tools/testing/selftests/kvm/x86/hwcr_msr_test.c (renamed from tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_clock.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_clock.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_cpuid.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c)47
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_evmcs.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_features.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_features.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_ipi.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_ipi.c)6
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_svm_test.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/kvm_clock_test.c (renamed from tools/testing/selftests/kvm/x86_64/kvm_clock_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/kvm_pv_test.c (renamed from tools/testing/selftests/kvm/x86_64/kvm_pv_test.c)38
-rw-r--r--tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c (renamed from tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/monitor_mwait_test.c (renamed from tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/nested_emulation_test.c146
-rw-r--r--tools/testing/selftests/kvm/x86/nested_exceptions_test.c (renamed from tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c)2
-rw-r--r--tools/testing/selftests/kvm/x86/nx_huge_pages_test.c (renamed from tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c)4
-rwxr-xr-xtools/testing/selftests/kvm/x86/nx_huge_pages_test.sh (renamed from tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh)0
-rw-r--r--tools/testing/selftests/kvm/x86/platform_info_test.c (renamed from tools/testing/selftests/kvm/x86_64/platform_info_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_counters_test.c (renamed from tools/testing/selftests/kvm/x86_64/pmu_counters_test.c)158
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_event_filter_test.c (renamed from tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/private_mem_conversions_test.c (renamed from tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c (renamed from tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/recalc_apic_map_test.c (renamed from tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/set_boot_cpu_id.c (renamed from tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/set_sregs_test.c (renamed from tools/testing/selftests/kvm/x86_64/set_sregs_test.c)63
-rw-r--r--tools/testing/selftests/kvm/x86/sev_init2_tests.c (renamed from tools/testing/selftests/kvm/x86_64/sev_init2_tests.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/sev_migrate_tests.c (renamed from tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/sev_smoke_test.c (renamed from tools/testing/selftests/kvm/x86_64/sev_smoke_test.c)5
-rw-r--r--tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c (renamed from tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/smm_test.c (renamed from tools/testing/selftests/kvm/x86_64/smm_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/state_test.c (renamed from tools/testing/selftests/kvm/x86_64/state_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/svm_int_ctl_test.c (renamed from tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c)5
-rw-r--r--tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c (renamed from tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c (renamed from tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/svm_vmcall_test.c (renamed from tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/sync_regs_test.c (renamed from tools/testing/selftests/kvm/x86_64/sync_regs_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/triple_fault_event_test.c (renamed from tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/tsc_msrs_test.c (renamed from tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/tsc_scaling_sync.c (renamed from tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/ucna_injection_test.c (renamed from tools/testing/selftests/kvm/x86_64/ucna_injection_test.c)2
-rw-r--r--tools/testing/selftests/kvm/x86/userspace_io_test.c (renamed from tools/testing/selftests/kvm/x86_64/userspace_io_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c (renamed from tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_apic_access_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_msrs_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_ipi_test.c (renamed from tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c)16
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_state_test.c (renamed from tools/testing/selftests/kvm/x86_64/xapic_state_test.c)4
-rw-r--r--tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c (renamed from tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/xen_shinfo_test.c (renamed from tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c)5
-rw-r--r--tools/testing/selftests/kvm/x86/xen_vmcall_test.c (renamed from tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c)0
-rw-r--r--tools/testing/selftests/kvm/x86/xss_msr_test.c (renamed from tools/testing/selftests/kvm/x86_64/xss_msr_test.c)0
-rw-r--r--tools/testing/selftests/landlock/.gitignore3
-rw-r--r--tools/testing/selftests/landlock/Makefile6
-rw-r--r--tools/testing/selftests/landlock/audit.h472
-rw-r--r--tools/testing/selftests/landlock/audit_test.c551
-rw-r--r--tools/testing/selftests/landlock/base_test.c130
-rw-r--r--tools/testing/selftests/landlock/common.h21
-rw-r--r--tools/testing/selftests/landlock/config4
-rw-r--r--tools/testing/selftests/landlock/fs_test.c604
-rw-r--r--tools/testing/selftests/landlock/net_test.c256
-rw-r--r--tools/testing/selftests/landlock/ptrace_test.c140
-rw-r--r--tools/testing/selftests/landlock/scoped_abstract_unix_test.c111
-rw-r--r--tools/testing/selftests/landlock/scoped_signal_test.c108
-rw-r--r--tools/testing/selftests/landlock/wait-pipe-sandbox.c131
-rw-r--r--tools/testing/selftests/lib.mk5
-rw-r--r--tools/testing/selftests/lib/Makefile2
-rw-r--r--tools/testing/selftests/lib/config3
-rwxr-xr-xtools/testing/selftests/lib/prime_numbers.sh4
-rwxr-xr-xtools/testing/selftests/lib/printf.sh4
-rwxr-xr-xtools/testing/selftests/lib/scanf.sh4
-rw-r--r--tools/testing/selftests/livepatch/functions.sh52
-rwxr-xr-xtools/testing/selftests/livepatch/test-ftrace.sh34
-rwxr-xr-xtools/testing/selftests/livepatch/test-kprobe.sh2
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c2
-rw-r--r--tools/testing/selftests/mm/.gitignore5
-rw-r--r--tools/testing/selftests/mm/Makefile16
-rw-r--r--tools/testing/selftests/mm/config1
-rw-r--r--tools/testing/selftests/mm/cow.c10
-rw-r--r--tools/testing/selftests/mm/guard-pages.c89
-rw-r--r--tools/testing/selftests/mm/hugepage-mremap.c2
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c8
-rw-r--r--tools/testing/selftests/mm/ksm_tests.c2
-rw-r--r--tools/testing/selftests/mm/memfd_secret.c14
-rw-r--r--tools/testing/selftests/mm/migration.c99
-rw-r--r--tools/testing/selftests/mm/mkdirty.c9
-rw-r--r--tools/testing/selftests/mm/mlock2.h1
-rw-r--r--tools/testing/selftests/mm/mremap_test.c17
-rw-r--r--tools/testing/selftests/mm/mseal_test.c14
-rw-r--r--tools/testing/selftests/mm/pagemap_ioctl.c118
-rw-r--r--tools/testing/selftests/mm/pkey-arm64.h6
-rw-r--r--tools/testing/selftests/mm/pkey-helpers.h64
-rw-r--r--tools/testing/selftests/mm/pkey-powerpc.h4
-rw-r--r--tools/testing/selftests/mm/pkey-x86.h6
-rw-r--r--tools/testing/selftests/mm/pkey_sighandler_tests.c32
-rw-r--r--tools/testing/selftests/mm/pkey_util.c40
-rw-r--r--tools/testing/selftests/mm/protection_keys.c216
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh12
-rw-r--r--tools/testing/selftests/mm/seal_elf.c137
-rw-r--r--tools/testing/selftests/mm/soft-dirty.c2
-rw-r--r--tools/testing/selftests/mm/split_huge_page_test.c46
-rw-r--r--tools/testing/selftests/mm/thp_settings.c4
-rw-r--r--tools/testing/selftests/mm/thp_settings.h4
-rw-r--r--tools/testing/selftests/mm/uffd-common.c4
-rw-r--r--tools/testing/selftests/mm/uffd-stress.c15
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c18
-rw-r--r--tools/testing/selftests/mm/uffd-wp-mremap.c380
-rw-r--r--tools/testing/selftests/mm/virtual_address_range.c41
-rw-r--r--tools/testing/selftests/mm/vm_util.c68
-rw-r--r--tools/testing/selftests/mm/vm_util.h1
-rw-r--r--tools/testing/selftests/mm/write_to_hugetlbfs.c2
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c652
-rw-r--r--tools/testing/selftests/net/.gitignore2
-rw-r--r--tools/testing/selftests/net/Makefile10
-rwxr-xr-xtools/testing/selftests/net/bpf_offload.py19
-rwxr-xr-xtools/testing/selftests/net/cmsg_ip.sh187
-rwxr-xr-xtools/testing/selftests/net/cmsg_ipv6.sh154
-rw-r--r--tools/testing/selftests/net/cmsg_sender.c114
-rw-r--r--tools/testing/selftests/net/config10
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh4
-rwxr-xr-xtools/testing/selftests/net/fdb_flush.sh2
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh9
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh74
-rw-r--r--tools/testing/selftests/net/forwarding/README2
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh2
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh10
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_port_range.sh46
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh10
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh15
-rw-r--r--tools/testing/selftests/net/gro.c8
-rwxr-xr-xtools/testing/selftests/net/gro.sh7
-rwxr-xr-xtools/testing/selftests/net/ip_local_port_range.sh4
-rw-r--r--tools/testing/selftests/net/lib.sh19
-rw-r--r--tools/testing/selftests/net/lib/Makefile5
-rw-r--r--tools/testing/selftests/net/lib/py/__init__.py4
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py7
-rw-r--r--tools/testing/selftests/net/lib/py/netns.py18
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py89
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py4
-rw-r--r--tools/testing/selftests/net/lib/xdp_dummy.bpf.c19
-rwxr-xr-xtools/testing/selftests/net/link_netns.py141
-rwxr-xr-xtools/testing/selftests/net/lwt_dst_cache_ref_loop.sh246
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile4
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh27
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c2
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_diag.c272
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh2
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh30
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter.sh7
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter_queue.sh7
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh1
-rwxr-xr-xtools/testing/selftests/net/netns-name.sh10
-rwxr-xr-xtools/testing/selftests/net/nl_netdev.py18
-rw-r--r--tools/testing/selftests/net/openvswitch/Makefile2
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh11
-rwxr-xr-xtools/testing/selftests/net/packetdrill/ksft_runner.sh4
-rw-r--r--tools/testing/selftests/net/proc_net_pktgen.c690
-rw-r--r--tools/testing/selftests/net/psock_tpacket.c2
-rw-r--r--tools/testing/selftests/net/reuseaddr_ports_exhausted.c2
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.py30
-rw-r--r--tools/testing/selftests/net/setup_veth.sh3
-rw-r--r--tools/testing/selftests/net/so_rcv_listener.c168
-rw-r--r--tools/testing/selftests/net/tcp_ao/connect-deny.c58
-rw-r--r--tools/testing/selftests/net/tcp_ao/connect.c22
-rw-r--r--tools/testing/selftests/net/tcp_ao/icmps-discard.c17
-rw-r--r--tools/testing/selftests/net/tcp_ao/key-management.c76
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/aolib.h114
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c7
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/sock.c315
-rw-r--r--tools/testing/selftests/net/tcp_ao/restore.c75
-rw-r--r--tools/testing/selftests/net/tcp_ao/rst.c47
-rw-r--r--tools/testing/selftests/net/tcp_ao/self-connect.c18
-rw-r--r--tools/testing/selftests/net/tcp_ao/seq-ext.c30
-rw-r--r--tools/testing/selftests/net/tcp_ao/unsigned-md5.c118
-rwxr-xr-xtools/testing/selftests/net/test_blackhole_dev.sh11
-rwxr-xr-xtools/testing/selftests/net/test_so_rcv.sh73
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_fdb_changelink.sh111
-rw-r--r--tools/testing/selftests/net/udpgso.c26
-rw-r--r--tools/testing/selftests/net/ynl.mk3
-rw-r--r--tools/testing/selftests/nolibc/Makefile30
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test-linkage.c6
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c138
-rwxr-xr-xtools/testing/selftests/nolibc/run-tests.sh26
-rw-r--r--tools/testing/selftests/pci_endpoint/.gitignore2
-rw-r--r--tools/testing/selftests/pci_endpoint/Makefile7
-rw-r--r--tools/testing/selftests/pci_endpoint/config4
-rw-r--r--tools/testing/selftests/pci_endpoint/pci_endpoint_test.c232
-rw-r--r--tools/testing/selftests/pcie_bwctrl/Makefile2
-rw-r--r--tools/testing/selftests/pidfd/.gitignore2
-rw-r--r--tools/testing/selftests/pidfd/Makefile4
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h109
-rw-r--r--tools/testing/selftests/pidfd/pidfd_exec_helper.c12
-rw-r--r--tools/testing/selftests/pidfd/pidfd_fdinfo_test.c1
-rw-r--r--tools/testing/selftests/pidfd/pidfd_info_test.c692
-rw-r--r--tools/testing/selftests/pidfd/pidfd_open_test.c30
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setns_test.c45
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c78
-rw-r--r--tools/testing/selftests/powerpc/include/pkeys.h5
-rw-r--r--tools/testing/selftests/powerpc/mm/pkey_exec_prot.c2
-rw-r--r--tools/testing/selftests/powerpc/mm/pkey_siginfo.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c3
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c3
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c4
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c5
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile3
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c17
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/check_extended_reg_test.c35
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c20
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h12
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c2
-rw-r--r--tools/testing/selftests/powerpc/ptrace/core-pkey.c6
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c6
-rw-r--r--tools/testing/selftests/ptp/testptp.c37
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/srcu_lockdep.sh2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot6
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE073
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE103
-rw-r--r--tools/testing/selftests/riscv/vector/.gitignore3
-rw-r--r--tools/testing/selftests/riscv/vector/Makefile17
-rw-r--r--tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c94
-rw-r--r--tools/testing/selftests/riscv/vector/v_helpers.c68
-rw-r--r--tools/testing/selftests/riscv/vector/v_helpers.h8
-rw-r--r--tools/testing/selftests/riscv/vector/v_initval.c22
-rw-r--r--tools/testing/selftests/riscv/vector/v_initval_nolibc.c72
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c20
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_prctl.c305
-rw-r--r--tools/testing/selftests/rseq/.gitignore1
-rw-r--r--tools/testing/selftests/rseq/Makefile9
-rw-r--r--tools/testing/selftests/rseq/param_test.c24
-rw-r--r--tools/testing/selftests/rseq/rseq-or1k-bits.h412
-rw-r--r--tools/testing/selftests/rseq/rseq-or1k-thread-pointer.h13
-rw-r--r--tools/testing/selftests/rseq/rseq-or1k.h181
-rw-r--r--tools/testing/selftests/rseq/rseq-riscv-bits.h6
-rw-r--r--tools/testing/selftests/rseq/rseq-riscv.h2
-rw-r--r--tools/testing/selftests/rseq/rseq-thread-pointer.h2
-rw-r--r--tools/testing/selftests/rseq/rseq.c27
-rw-r--r--tools/testing/selftests/rseq/rseq.h7
-rwxr-xr-xtools/testing/selftests/rseq/run_syscall_errors_test.sh5
-rw-r--r--tools/testing/selftests/rseq/syscall_errors_test.c124
-rw-r--r--tools/testing/selftests/sched/config2
-rw-r--r--tools/testing/selftests/sched_ext/config1
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c205
-rwxr-xr-xtools/testing/selftests/sysctl/sysctl.sh10
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/police.json10
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json34
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json23
-rw-r--r--tools/testing/selftests/timers/posix_timers.c73
-rw-r--r--tools/testing/selftests/timers/skew_consistency.c2
-rw-r--r--tools/testing/selftests/ublk/.gitignore3
-rw-r--r--tools/testing/selftests/ublk/Makefile27
-rw-r--r--tools/testing/selftests/ublk/common.c55
-rw-r--r--tools/testing/selftests/ublk/config1
-rw-r--r--tools/testing/selftests/ublk/file_backed.c169
-rw-r--r--tools/testing/selftests/ublk/kublk.c1138
-rw-r--r--tools/testing/selftests/ublk/kublk.h370
-rw-r--r--tools/testing/selftests/ublk/null.c106
-rw-r--r--tools/testing/selftests/ublk/stripe.c318
-rwxr-xr-xtools/testing/selftests/ublk/test_common.sh246
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_01.sh44
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_01.sh32
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_02.sh22
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_03.sh31
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_04.sh22
-rwxr-xr-xtools/testing/selftests/ublk/test_null_01.sh20
-rwxr-xr-xtools/testing/selftests/ublk/test_null_02.sh20
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_01.sh47
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_02.sh47
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_01.sh34
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_02.sh24
-rw-r--r--tools/testing/selftests/ublk/trace/seq_io.bt25
-rw-r--r--tools/testing/selftests/ublk/ublk_dep.h18
-rw-r--r--tools/testing/selftests/user_events/dyn_test.c2
-rw-r--r--tools/testing/selftests/vDSO/Makefile11
-rw-r--r--tools/testing/selftests/vDSO/parse_vdso.c29
-rw-r--r--tools/testing/selftests/vDSO/parse_vdso.h1
-rw-r--r--tools/testing/selftests/vDSO/vdso_standalone_test_x86.c150
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_gettimeofday.c4
-rw-r--r--tools/testing/selftests/wireguard/qemu/debug.config1
-rw-r--r--tools/testing/selftests/x86/Makefile6
-rw-r--r--tools/testing/selftests/x86/amx.c442
-rw-r--r--tools/testing/selftests/x86/avx.c12
-rw-r--r--tools/testing/selftests/x86/corrupt_xstate_header.c14
-rw-r--r--tools/testing/selftests/x86/entry_from_vm86.c24
-rw-r--r--tools/testing/selftests/x86/fsgsbase.c24
-rw-r--r--tools/testing/selftests/x86/helpers.h28
-rw-r--r--tools/testing/selftests/x86/ioperm.c25
-rw-r--r--tools/testing/selftests/x86/iopl.c25
-rw-r--r--tools/testing/selftests/x86/lam.c153
-rw-r--r--tools/testing/selftests/x86/ldt_gdt.c18
-rw-r--r--tools/testing/selftests/x86/mov_ss_trap.c14
-rw-r--r--tools/testing/selftests/x86/ptrace_syscall.c24
-rw-r--r--tools/testing/selftests/x86/sigaltstack.c26
-rw-r--r--tools/testing/selftests/x86/sigreturn.c24
-rw-r--r--tools/testing/selftests/x86/single_step_syscall.c22
-rw-r--r--tools/testing/selftests/x86/syscall_arg_fault.c12
-rw-r--r--tools/testing/selftests/x86/syscall_nt.c12
-rw-r--r--tools/testing/selftests/x86/syscall_numbering.c3
-rw-r--r--tools/testing/selftests/x86/sysret_rip.c24
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c13
-rw-r--r--tools/testing/selftests/x86/unwind_vdso.c12
-rw-r--r--tools/testing/selftests/x86/xstate.c477
-rw-r--r--tools/testing/selftests/x86/xstate.h195
-rw-r--r--tools/testing/vma/vma.c64
-rw-r--r--tools/testing/vma/vma_internal.h220
-rw-r--r--tools/testing/vsock/util.c88
-rw-r--r--tools/testing/vsock/util.h2
-rw-r--r--tools/testing/vsock/vsock_test.c163
-rw-r--r--tools/thermal/lib/Makefile13
-rw-r--r--tools/tracing/latency/Makefile6
-rw-r--r--tools/tracing/rtla/.gitignore1
-rw-r--r--tools/tracing/rtla/Makefile30
-rw-r--r--tools/tracing/rtla/Makefile.config42
-rw-r--r--tools/tracing/rtla/Makefile.rtla17
-rw-r--r--tools/tracing/rtla/src/Build1
-rw-r--r--tools/tracing/rtla/src/osnoise.c124
-rw-r--r--tools/tracing/rtla/src/osnoise.h52
-rw-r--r--tools/tracing/rtla/src/osnoise_hist.c129
-rw-r--r--tools/tracing/rtla/src/osnoise_top.c131
-rw-r--r--tools/tracing/rtla/src/timerlat.bpf.c149
-rw-r--r--tools/tracing/rtla/src/timerlat.c106
-rw-r--r--tools/tracing/rtla/src/timerlat.h54
-rw-r--r--tools/tracing/rtla/src/timerlat_aa.c2
-rw-r--r--tools/tracing/rtla/src/timerlat_bpf.c166
-rw-r--r--tools/tracing/rtla/src/timerlat_bpf.h59
-rw-r--r--tools/tracing/rtla/src/timerlat_hist.c373
-rw-r--r--tools/tracing/rtla/src/timerlat_top.c502
-rw-r--r--tools/tracing/rtla/src/trace.c69
-rw-r--r--tools/tracing/rtla/src/trace.h4
-rw-r--r--tools/tracing/rtla/tests/engine.sh114
-rw-r--r--tools/tracing/rtla/tests/hwnoise.t21
-rw-r--r--tools/tracing/rtla/tests/osnoise.t25
-rw-r--r--tools/tracing/rtla/tests/timerlat.t41
-rw-r--r--tools/verification/dot2/automata.py36
-rw-r--r--tools/verification/dot2/dot2c.py4
-rw-r--r--tools/verification/dot2/dot2k42
-rw-r--r--tools/verification/dot2/dot2k.py290
-rw-r--r--tools/verification/dot2/dot2k_templates/Kconfig9
-rw-r--r--tools/verification/dot2/dot2k_templates/main.c91
-rw-r--r--tools/verification/dot2/dot2k_templates/main_container.c38
-rw-r--r--tools/verification/dot2/dot2k_templates/main_container.h3
-rw-r--r--tools/verification/dot2/dot2k_templates/main_global.c91
-rw-r--r--tools/verification/dot2/dot2k_templates/main_per_cpu.c91
-rw-r--r--tools/verification/dot2/dot2k_templates/main_per_task.c91
-rw-r--r--tools/verification/dot2/dot2k_templates/trace.h13
-rw-r--r--tools/verification/models/sched/sco.dot18
-rw-r--r--tools/verification/models/sched/scpd.dot18
-rw-r--r--tools/verification/models/sched/sncid.dot18
-rw-r--r--tools/verification/models/sched/snep.dot18
-rw-r--r--tools/verification/models/sched/snroc.dot18
-rw-r--r--tools/verification/models/sched/tss.dot18
-rw-r--r--tools/verification/rv/Makefile6
-rw-r--r--tools/verification/rv/Makefile.rv2
-rw-r--r--tools/verification/rv/include/in_kernel.h2
-rw-r--r--tools/verification/rv/include/rv.h3
-rw-r--r--tools/verification/rv/src/in_kernel.c256
-rw-r--r--tools/verification/rv/src/rv.c38
866 files changed, 37375 insertions, 9890 deletions
diff --git a/tools/Makefile b/tools/Makefile
index 278d24723b74..5e1254eb66de 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -25,7 +25,6 @@ help:
@echo ' leds - LEDs tools'
@echo ' nolibc - nolibc headers testing and installation'
@echo ' objtool - an ELF object analysis tool'
- @echo ' pci - PCI tools'
@echo ' perf - Linux performance measurement and analysis tool'
@echo ' selftests - various kernel selftests'
@echo ' sched_ext - sched_ext example schedulers'
@@ -69,7 +68,7 @@ acpi: FORCE
cpupower: FORCE
$(call descend,power/$@)
-counter firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE
+counter firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi firmware debugging tracing: FORCE
$(call descend,$@)
bpf/%: FORCE
@@ -123,7 +122,7 @@ all: acpi counter cpupower gpio hv firewire \
perf selftests bootconfig spi turbostat usb \
virtio mm bpf x86_energy_perf_policy \
tmon freefall iio objtool kvm_stat wmi \
- pci debugging tracing thermal thermometer thermal-engine
+ debugging tracing thermal thermometer thermal-engine
acpi_install:
$(call descend,power/$(@:_install=),install)
@@ -131,7 +130,7 @@ acpi_install:
cpupower_install:
$(call descend,power/$(@:_install=),install)
-counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install:
+counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install debugging_install tracing_install:
$(call descend,$(@:_install=),install)
selftests_install:
@@ -163,7 +162,7 @@ install: acpi_install counter_install cpupower_install gpio_install \
perf_install selftests_install turbostat_install usb_install \
virtio_install mm_install bpf_install x86_energy_perf_policy_install \
tmon_install freefall_install objtool_install kvm_stat_install \
- wmi_install pci_install debugging_install intel-speed-select_install \
+ wmi_install debugging_install intel-speed-select_install \
tracing_install thermometer_install thermal-engine_install
acpi_clean:
@@ -172,7 +171,7 @@ acpi_clean:
cpupower_clean:
$(call descend,power/cpupower,clean)
-counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean:
+counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean firmware_clean debugging_clean tracing_clean:
$(call descend,$(@:_clean=),clean)
libapi_clean:
@@ -219,7 +218,7 @@ clean: acpi_clean counter_clean cpupower_clean hv_clean firewire_clean \
perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \
mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
freefall_clean build_clean libbpf_clean libsubcmd_clean \
- gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \
+ gpio_clean objtool_clean leds_clean wmi_clean firmware_clean debugging_clean \
intel-speed-select_clean tracing_clean thermal_clean thermometer_clean thermal-engine_clean \
sched_ext_clean
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
index 1334214546d7..3feac0482fe9 100644
--- a/tools/accounting/getdelays.c
+++ b/tools/accounting/getdelays.c
@@ -192,60 +192,77 @@ static int get_family_id(int sd)
}
#define average_ms(t, c) (t / 1000000ULL / (c ? c : 1))
+#define delay_ms(t) (t / 1000000ULL)
static void print_delayacct(struct taskstats *t)
{
- printf("\n\nCPU %15s%15s%15s%15s%15s\n"
- " %15llu%15llu%15llu%15llu%15.3fms\n"
- "IO %15s%15s%15s\n"
- " %15llu%15llu%15.3fms\n"
- "SWAP %15s%15s%15s\n"
- " %15llu%15llu%15.3fms\n"
- "RECLAIM %12s%15s%15s\n"
- " %15llu%15llu%15.3fms\n"
- "THRASHING%12s%15s%15s\n"
- " %15llu%15llu%15.3fms\n"
- "COMPACT %12s%15s%15s\n"
- " %15llu%15llu%15.3fms\n"
- "WPCOPY %12s%15s%15s\n"
- " %15llu%15llu%15.3fms\n"
- "IRQ %15s%15s%15s\n"
- " %15llu%15llu%15.3fms\n",
+ printf("\n\nCPU %15s%15s%15s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+ "IO %15s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+ "SWAP %15s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+ "RECLAIM %12s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+ "THRASHING%12s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+ "COMPACT %12s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+ "WPCOPY %12s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+ "IRQ %15s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15.3fms%13.6fms%13.6fms\n",
"count", "real total", "virtual total",
- "delay total", "delay average",
+ "delay total", "delay average", "delay max", "delay min",
(unsigned long long)t->cpu_count,
(unsigned long long)t->cpu_run_real_total,
(unsigned long long)t->cpu_run_virtual_total,
(unsigned long long)t->cpu_delay_total,
average_ms((double)t->cpu_delay_total, t->cpu_count),
- "count", "delay total", "delay average",
+ delay_ms((double)t->cpu_delay_max),
+ delay_ms((double)t->cpu_delay_min),
+ "count", "delay total", "delay average", "delay max", "delay min",
(unsigned long long)t->blkio_count,
(unsigned long long)t->blkio_delay_total,
average_ms((double)t->blkio_delay_total, t->blkio_count),
- "count", "delay total", "delay average",
+ delay_ms((double)t->blkio_delay_max),
+ delay_ms((double)t->blkio_delay_min),
+ "count", "delay total", "delay average", "delay max", "delay min",
(unsigned long long)t->swapin_count,
(unsigned long long)t->swapin_delay_total,
average_ms((double)t->swapin_delay_total, t->swapin_count),
- "count", "delay total", "delay average",
+ delay_ms((double)t->swapin_delay_max),
+ delay_ms((double)t->swapin_delay_min),
+ "count", "delay total", "delay average", "delay max", "delay min",
(unsigned long long)t->freepages_count,
(unsigned long long)t->freepages_delay_total,
average_ms((double)t->freepages_delay_total, t->freepages_count),
- "count", "delay total", "delay average",
+ delay_ms((double)t->freepages_delay_max),
+ delay_ms((double)t->freepages_delay_min),
+ "count", "delay total", "delay average", "delay max", "delay min",
(unsigned long long)t->thrashing_count,
(unsigned long long)t->thrashing_delay_total,
average_ms((double)t->thrashing_delay_total, t->thrashing_count),
- "count", "delay total", "delay average",
+ delay_ms((double)t->thrashing_delay_max),
+ delay_ms((double)t->thrashing_delay_min),
+ "count", "delay total", "delay average", "delay max", "delay min",
(unsigned long long)t->compact_count,
(unsigned long long)t->compact_delay_total,
average_ms((double)t->compact_delay_total, t->compact_count),
- "count", "delay total", "delay average",
+ delay_ms((double)t->compact_delay_max),
+ delay_ms((double)t->compact_delay_min),
+ "count", "delay total", "delay average", "delay max", "delay min",
(unsigned long long)t->wpcopy_count,
(unsigned long long)t->wpcopy_delay_total,
average_ms((double)t->wpcopy_delay_total, t->wpcopy_count),
- "count", "delay total", "delay average",
+ delay_ms((double)t->wpcopy_delay_max),
+ delay_ms((double)t->wpcopy_delay_min),
+ "count", "delay total", "delay average", "delay max", "delay min",
(unsigned long long)t->irq_count,
(unsigned long long)t->irq_delay_total,
- average_ms((double)t->irq_delay_total, t->irq_count));
+ average_ms((double)t->irq_delay_total, t->irq_count),
+ delay_ms((double)t->irq_delay_max),
+ delay_ms((double)t->irq_delay_min));
}
static void task_context_switch_counts(struct taskstats *t)
diff --git a/tools/accounting/procacct.c b/tools/accounting/procacct.c
index 90c4a37f53d9..e8dee05a6264 100644
--- a/tools/accounting/procacct.c
+++ b/tools/accounting/procacct.c
@@ -274,12 +274,11 @@ int main(int argc, char *argv[])
int maskset = 0;
char *logfile = NULL;
int cfd = 0;
- int forking = 0;
struct msgtemplate msg;
- while (!forking) {
- c = getopt(argc, argv, "m:vr:");
+ while (1) {
+ c = getopt(argc, argv, "m:vr:w:");
if (c < 0)
break;
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
index 03cd7c19a683..d5dd96902817 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -246,6 +246,7 @@ struct kvm_vcpu_events {
#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
+#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h
index cd8420e8c3ad..b6c5ece4fdee 100644
--- a/tools/arch/arm64/include/asm/sysreg.h
+++ b/tools/arch/arm64/include/asm/sysreg.h
@@ -11,6 +11,7 @@
#include <linux/bits.h>
#include <linux/stringify.h>
+#include <linux/kasan-tags.h>
#include <asm/gpr-num.h>
@@ -108,6 +109,9 @@
#define set_pstate_ssbs(x) asm volatile(SET_PSTATE_SSBS(x))
#define set_pstate_dit(x) asm volatile(SET_PSTATE_DIT(x))
+/* Register-based PAN access, for save/restore purposes */
+#define SYS_PSTATE_PAN sys_reg(3, 0, 4, 2, 3)
+
#define __SYS_BARRIER_INSN(CRm, op2, Rt) \
__emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f))
@@ -123,6 +127,37 @@
#define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4)
#define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6)
+#define SYS_IC_IALLUIS sys_insn(1, 0, 7, 1, 0)
+#define SYS_IC_IALLU sys_insn(1, 0, 7, 5, 0)
+#define SYS_IC_IVAU sys_insn(1, 3, 7, 5, 1)
+
+#define SYS_DC_IVAC sys_insn(1, 0, 7, 6, 1)
+#define SYS_DC_IGVAC sys_insn(1, 0, 7, 6, 3)
+#define SYS_DC_IGDVAC sys_insn(1, 0, 7, 6, 5)
+
+#define SYS_DC_CVAC sys_insn(1, 3, 7, 10, 1)
+#define SYS_DC_CGVAC sys_insn(1, 3, 7, 10, 3)
+#define SYS_DC_CGDVAC sys_insn(1, 3, 7, 10, 5)
+
+#define SYS_DC_CVAU sys_insn(1, 3, 7, 11, 1)
+
+#define SYS_DC_CVAP sys_insn(1, 3, 7, 12, 1)
+#define SYS_DC_CGVAP sys_insn(1, 3, 7, 12, 3)
+#define SYS_DC_CGDVAP sys_insn(1, 3, 7, 12, 5)
+
+#define SYS_DC_CVADP sys_insn(1, 3, 7, 13, 1)
+#define SYS_DC_CGVADP sys_insn(1, 3, 7, 13, 3)
+#define SYS_DC_CGDVADP sys_insn(1, 3, 7, 13, 5)
+
+#define SYS_DC_CIVAC sys_insn(1, 3, 7, 14, 1)
+#define SYS_DC_CIGVAC sys_insn(1, 3, 7, 14, 3)
+#define SYS_DC_CIGDVAC sys_insn(1, 3, 7, 14, 5)
+
+/* Data cache zero operations */
+#define SYS_DC_ZVA sys_insn(1, 3, 7, 4, 1)
+#define SYS_DC_GVA sys_insn(1, 3, 7, 4, 3)
+#define SYS_DC_GZVA sys_insn(1, 3, 7, 4, 4)
+
/*
* Automatically generated definitions for system registers, the
* manual encodings below are in the process of being converted to
@@ -162,6 +197,84 @@
#define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0)
#define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0)
+#define SYS_BRBINF_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0))
+#define SYS_BRBINFINJ_EL1 sys_reg(2, 1, 9, 1, 0)
+#define SYS_BRBSRC_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1))
+#define SYS_BRBSRCINJ_EL1 sys_reg(2, 1, 9, 1, 1)
+#define SYS_BRBTGT_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2))
+#define SYS_BRBTGTINJ_EL1 sys_reg(2, 1, 9, 1, 2)
+#define SYS_BRBTS_EL1 sys_reg(2, 1, 9, 0, 2)
+
+#define SYS_BRBCR_EL1 sys_reg(2, 1, 9, 0, 0)
+#define SYS_BRBFCR_EL1 sys_reg(2, 1, 9, 0, 1)
+#define SYS_BRBIDR0_EL1 sys_reg(2, 1, 9, 2, 0)
+
+#define SYS_TRCITECR_EL1 sys_reg(3, 0, 1, 2, 3)
+#define SYS_TRCACATR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3)))
+#define SYS_TRCACVR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (0 | (m >> 3)))
+#define SYS_TRCAUTHSTATUS sys_reg(2, 1, 7, 14, 6)
+#define SYS_TRCAUXCTLR sys_reg(2, 1, 0, 6, 0)
+#define SYS_TRCBBCTLR sys_reg(2, 1, 0, 15, 0)
+#define SYS_TRCCCCTLR sys_reg(2, 1, 0, 14, 0)
+#define SYS_TRCCIDCCTLR0 sys_reg(2, 1, 3, 0, 2)
+#define SYS_TRCCIDCCTLR1 sys_reg(2, 1, 3, 1, 2)
+#define SYS_TRCCIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 0)
+#define SYS_TRCCLAIMCLR sys_reg(2, 1, 7, 9, 6)
+#define SYS_TRCCLAIMSET sys_reg(2, 1, 7, 8, 6)
+#define SYS_TRCCNTCTLR(m) sys_reg(2, 1, 0, (4 | (m & 3)), 5)
+#define SYS_TRCCNTRLDVR(m) sys_reg(2, 1, 0, (0 | (m & 3)), 5)
+#define SYS_TRCCNTVR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 5)
+#define SYS_TRCCONFIGR sys_reg(2, 1, 0, 4, 0)
+#define SYS_TRCDEVARCH sys_reg(2, 1, 7, 15, 6)
+#define SYS_TRCDEVID sys_reg(2, 1, 7, 2, 7)
+#define SYS_TRCEVENTCTL0R sys_reg(2, 1, 0, 8, 0)
+#define SYS_TRCEVENTCTL1R sys_reg(2, 1, 0, 9, 0)
+#define SYS_TRCEXTINSELR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 4)
+#define SYS_TRCIDR0 sys_reg(2, 1, 0, 8, 7)
+#define SYS_TRCIDR10 sys_reg(2, 1, 0, 2, 6)
+#define SYS_TRCIDR11 sys_reg(2, 1, 0, 3, 6)
+#define SYS_TRCIDR12 sys_reg(2, 1, 0, 4, 6)
+#define SYS_TRCIDR13 sys_reg(2, 1, 0, 5, 6)
+#define SYS_TRCIDR1 sys_reg(2, 1, 0, 9, 7)
+#define SYS_TRCIDR2 sys_reg(2, 1, 0, 10, 7)
+#define SYS_TRCIDR3 sys_reg(2, 1, 0, 11, 7)
+#define SYS_TRCIDR4 sys_reg(2, 1, 0, 12, 7)
+#define SYS_TRCIDR5 sys_reg(2, 1, 0, 13, 7)
+#define SYS_TRCIDR6 sys_reg(2, 1, 0, 14, 7)
+#define SYS_TRCIDR7 sys_reg(2, 1, 0, 15, 7)
+#define SYS_TRCIDR8 sys_reg(2, 1, 0, 0, 6)
+#define SYS_TRCIDR9 sys_reg(2, 1, 0, 1, 6)
+#define SYS_TRCIMSPEC(m) sys_reg(2, 1, 0, (m & 7), 7)
+#define SYS_TRCITEEDCR sys_reg(2, 1, 0, 2, 1)
+#define SYS_TRCOSLSR sys_reg(2, 1, 1, 1, 4)
+#define SYS_TRCPRGCTLR sys_reg(2, 1, 0, 1, 0)
+#define SYS_TRCQCTLR sys_reg(2, 1, 0, 1, 1)
+#define SYS_TRCRSCTLR(m) sys_reg(2, 1, 1, (m & 15), (0 | (m >> 4)))
+#define SYS_TRCRSR sys_reg(2, 1, 0, 10, 0)
+#define SYS_TRCSEQEVR(m) sys_reg(2, 1, 0, (m & 3), 4)
+#define SYS_TRCSEQRSTEVR sys_reg(2, 1, 0, 6, 4)
+#define SYS_TRCSEQSTR sys_reg(2, 1, 0, 7, 4)
+#define SYS_TRCSSCCR(m) sys_reg(2, 1, 1, (m & 7), 2)
+#define SYS_TRCSSCSR(m) sys_reg(2, 1, 1, (8 | (m & 7)), 2)
+#define SYS_TRCSSPCICR(m) sys_reg(2, 1, 1, (m & 7), 3)
+#define SYS_TRCSTALLCTLR sys_reg(2, 1, 0, 11, 0)
+#define SYS_TRCSTATR sys_reg(2, 1, 0, 3, 0)
+#define SYS_TRCSYNCPR sys_reg(2, 1, 0, 13, 0)
+#define SYS_TRCTRACEIDR sys_reg(2, 1, 0, 0, 1)
+#define SYS_TRCTSCTLR sys_reg(2, 1, 0, 12, 0)
+#define SYS_TRCVICTLR sys_reg(2, 1, 0, 0, 2)
+#define SYS_TRCVIIECTLR sys_reg(2, 1, 0, 1, 2)
+#define SYS_TRCVIPCSSCTLR sys_reg(2, 1, 0, 3, 2)
+#define SYS_TRCVISSCTLR sys_reg(2, 1, 0, 2, 2)
+#define SYS_TRCVMIDCCTLR0 sys_reg(2, 1, 3, 2, 2)
+#define SYS_TRCVMIDCCTLR1 sys_reg(2, 1, 3, 3, 2)
+#define SYS_TRCVMIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 1)
+
+/* ETM */
+#define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4)
+
+#define SYS_BRBCR_EL2 sys_reg(2, 4, 9, 0, 0)
+
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
@@ -170,8 +283,6 @@
#define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5)
#define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6)
-#define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1)
-
#define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2)
#define SYS_APIAKEYLO_EL1 sys_reg(3, 0, 2, 1, 0)
@@ -202,15 +313,38 @@
#define SYS_ERXCTLR_EL1 sys_reg(3, 0, 5, 4, 1)
#define SYS_ERXSTATUS_EL1 sys_reg(3, 0, 5, 4, 2)
#define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3)
+#define SYS_ERXPFGF_EL1 sys_reg(3, 0, 5, 4, 4)
+#define SYS_ERXPFGCTL_EL1 sys_reg(3, 0, 5, 4, 5)
+#define SYS_ERXPFGCDN_EL1 sys_reg(3, 0, 5, 4, 6)
#define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0)
#define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1)
+#define SYS_ERXMISC2_EL1 sys_reg(3, 0, 5, 5, 2)
+#define SYS_ERXMISC3_EL1 sys_reg(3, 0, 5, 5, 3)
#define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0)
#define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1)
#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0)
#define SYS_PAR_EL1_F BIT(0)
+/* When PAR_EL1.F == 1 */
#define SYS_PAR_EL1_FST GENMASK(6, 1)
+#define SYS_PAR_EL1_PTW BIT(8)
+#define SYS_PAR_EL1_S BIT(9)
+#define SYS_PAR_EL1_AssuredOnly BIT(12)
+#define SYS_PAR_EL1_TopLevel BIT(13)
+#define SYS_PAR_EL1_Overlay BIT(14)
+#define SYS_PAR_EL1_DirtyBit BIT(15)
+#define SYS_PAR_EL1_F1_IMPDEF GENMASK_ULL(63, 48)
+#define SYS_PAR_EL1_F1_RES0 (BIT(7) | BIT(10) | GENMASK_ULL(47, 16))
+#define SYS_PAR_EL1_RES1 BIT(11)
+/* When PAR_EL1.F == 0 */
+#define SYS_PAR_EL1_SH GENMASK_ULL(8, 7)
+#define SYS_PAR_EL1_NS BIT(9)
+#define SYS_PAR_EL1_F0_IMPDEF BIT(10)
+#define SYS_PAR_EL1_NSE BIT(11)
+#define SYS_PAR_EL1_PA GENMASK_ULL(51, 12)
+#define SYS_PAR_EL1_ATTR GENMASK_ULL(63, 56)
+#define SYS_PAR_EL1_F0_RES0 (GENMASK_ULL(6, 1) | GENMASK_ULL(55, 52))
/*** Statistical Profiling Extension ***/
#define PMSEVFR_EL1_RES0_IMP \
@@ -274,6 +408,8 @@
#define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6)
#define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
+#define SYS_ACCDATA_EL1 sys_reg(3, 0, 13, 0, 5)
+
#define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0)
#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7)
@@ -286,7 +422,6 @@
#define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2)
#define SYS_PMOVSCLR_EL0 sys_reg(3, 3, 9, 12, 3)
#define SYS_PMSWINC_EL0 sys_reg(3, 3, 9, 12, 4)
-#define SYS_PMSELR_EL0 sys_reg(3, 3, 9, 12, 5)
#define SYS_PMCEID0_EL0 sys_reg(3, 3, 9, 12, 6)
#define SYS_PMCEID1_EL0 sys_reg(3, 3, 9, 12, 7)
#define SYS_PMCCNTR_EL0 sys_reg(3, 3, 9, 13, 0)
@@ -369,6 +504,7 @@
#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0)
#define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1)
+#define SYS_SCTLR2_EL2 sys_reg(3, 4, 1, 0, 3)
#define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0)
#define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1)
#define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2)
@@ -381,13 +517,15 @@
#define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0)
#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2)
-#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1)
-#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4)
-#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5)
+#define SYS_VNCR_EL2 sys_reg(3, 4, 2, 2, 0)
#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
#define SYS_SP_EL1 sys_reg(3, 4, 4, 1, 0)
+#define SYS_SPSR_irq sys_reg(3, 4, 4, 3, 0)
+#define SYS_SPSR_abt sys_reg(3, 4, 4, 3, 1)
+#define SYS_SPSR_und sys_reg(3, 4, 4, 3, 2)
+#define SYS_SPSR_fiq sys_reg(3, 4, 4, 3, 3)
#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1)
#define SYS_AFSR0_EL2 sys_reg(3, 4, 5, 1, 0)
#define SYS_AFSR1_EL2 sys_reg(3, 4, 5, 1, 1)
@@ -420,9 +558,6 @@
#define SYS_ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4)
#define SYS_ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5)
-#define SYS_ICH_HCR_EL2 sys_reg(3, 4, 12, 11, 0)
-#define SYS_ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1)
-#define SYS_ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2)
#define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3)
#define SYS_ICH_ELRSR_EL2 sys_reg(3, 4, 12, 11, 5)
#define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7)
@@ -449,24 +584,49 @@
#define SYS_CONTEXTIDR_EL2 sys_reg(3, 4, 13, 0, 1)
#define SYS_TPIDR_EL2 sys_reg(3, 4, 13, 0, 2)
+#define SYS_SCXTNUM_EL2 sys_reg(3, 4, 13, 0, 7)
+
+#define __AMEV_op2(m) (m & 0x7)
+#define __AMEV_CRm(n, m) (n | ((m & 0x8) >> 3))
+#define __SYS__AMEVCNTVOFF0n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0x8, m), __AMEV_op2(m))
+#define SYS_AMEVCNTVOFF0n_EL2(m) __SYS__AMEVCNTVOFF0n_EL2(m)
+#define __SYS__AMEVCNTVOFF1n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0xA, m), __AMEV_op2(m))
+#define SYS_AMEVCNTVOFF1n_EL2(m) __SYS__AMEVCNTVOFF1n_EL2(m)
#define SYS_CNTVOFF_EL2 sys_reg(3, 4, 14, 0, 3)
#define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0)
+#define SYS_CNTHP_TVAL_EL2 sys_reg(3, 4, 14, 2, 0)
+#define SYS_CNTHP_CTL_EL2 sys_reg(3, 4, 14, 2, 1)
+#define SYS_CNTHP_CVAL_EL2 sys_reg(3, 4, 14, 2, 2)
+#define SYS_CNTHV_TVAL_EL2 sys_reg(3, 4, 14, 3, 0)
+#define SYS_CNTHV_CTL_EL2 sys_reg(3, 4, 14, 3, 1)
+#define SYS_CNTHV_CVAL_EL2 sys_reg(3, 4, 14, 3, 2)
/* VHE encodings for architectural EL0/1 system registers */
+#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0)
#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0)
+#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2)
+#define SYS_SCTLR2_EL12 sys_reg(3, 5, 1, 0, 3)
+#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0)
+#define SYS_TRFCR_EL12 sys_reg(3, 5, 1, 2, 1)
+#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6)
#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0)
#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1)
#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2)
+#define SYS_TCR2_EL12 sys_reg(3, 5, 2, 0, 3)
#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0)
#define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1)
#define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0)
#define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1)
#define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0)
#define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0)
+#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0)
+#define SYS_PMSCR_EL12 sys_reg(3, 5, 9, 9, 0)
#define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0)
#define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0)
#define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0)
+#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1)
+#define SYS_SCXTNUM_EL12 sys_reg(3, 5, 13, 0, 7)
#define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0)
#define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0)
#define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1)
@@ -477,6 +637,183 @@
#define SYS_SP_EL2 sys_reg(3, 6, 4, 1, 0)
+/* AT instructions */
+#define AT_Op0 1
+#define AT_CRn 7
+
+#define OP_AT_S1E1R sys_insn(AT_Op0, 0, AT_CRn, 8, 0)
+#define OP_AT_S1E1W sys_insn(AT_Op0, 0, AT_CRn, 8, 1)
+#define OP_AT_S1E0R sys_insn(AT_Op0, 0, AT_CRn, 8, 2)
+#define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
+#define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
+#define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
+#define OP_AT_S1E1A sys_insn(AT_Op0, 0, AT_CRn, 9, 2)
+#define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
+#define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
+#define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
+#define OP_AT_S12E1W sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
+#define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
+#define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
+#define OP_AT_S1E2A sys_insn(AT_Op0, 4, AT_CRn, 9, 2)
+
+/* TLBI instructions */
+#define TLBI_Op0 1
+
+#define TLBI_Op1_EL1 0 /* Accessible from EL1 or higher */
+#define TLBI_Op1_EL2 4 /* Accessible from EL2 or higher */
+
+#define TLBI_CRn_XS 8 /* Extra Slow (the common one) */
+#define TLBI_CRn_nXS 9 /* not Extra Slow (which nobody uses)*/
+
+#define TLBI_CRm_IPAIS 0 /* S2 Inner-Shareable */
+#define TLBI_CRm_nROS 1 /* non-Range, Outer-Sharable */
+#define TLBI_CRm_RIS 2 /* Range, Inner-Sharable */
+#define TLBI_CRm_nRIS 3 /* non-Range, Inner-Sharable */
+#define TLBI_CRm_IPAONS 4 /* S2 Outer and Non-Shareable */
+#define TLBI_CRm_ROS 5 /* Range, Outer-Sharable */
+#define TLBI_CRm_RNS 6 /* Range, Non-Sharable */
+#define TLBI_CRm_nRNS 7 /* non-Range, Non-Sharable */
+
+#define OP_TLBI_VMALLE1OS sys_insn(1, 0, 8, 1, 0)
+#define OP_TLBI_VAE1OS sys_insn(1, 0, 8, 1, 1)
+#define OP_TLBI_ASIDE1OS sys_insn(1, 0, 8, 1, 2)
+#define OP_TLBI_VAAE1OS sys_insn(1, 0, 8, 1, 3)
+#define OP_TLBI_VALE1OS sys_insn(1, 0, 8, 1, 5)
+#define OP_TLBI_VAALE1OS sys_insn(1, 0, 8, 1, 7)
+#define OP_TLBI_RVAE1IS sys_insn(1, 0, 8, 2, 1)
+#define OP_TLBI_RVAAE1IS sys_insn(1, 0, 8, 2, 3)
+#define OP_TLBI_RVALE1IS sys_insn(1, 0, 8, 2, 5)
+#define OP_TLBI_RVAALE1IS sys_insn(1, 0, 8, 2, 7)
+#define OP_TLBI_VMALLE1IS sys_insn(1, 0, 8, 3, 0)
+#define OP_TLBI_VAE1IS sys_insn(1, 0, 8, 3, 1)
+#define OP_TLBI_ASIDE1IS sys_insn(1, 0, 8, 3, 2)
+#define OP_TLBI_VAAE1IS sys_insn(1, 0, 8, 3, 3)
+#define OP_TLBI_VALE1IS sys_insn(1, 0, 8, 3, 5)
+#define OP_TLBI_VAALE1IS sys_insn(1, 0, 8, 3, 7)
+#define OP_TLBI_RVAE1OS sys_insn(1, 0, 8, 5, 1)
+#define OP_TLBI_RVAAE1OS sys_insn(1, 0, 8, 5, 3)
+#define OP_TLBI_RVALE1OS sys_insn(1, 0, 8, 5, 5)
+#define OP_TLBI_RVAALE1OS sys_insn(1, 0, 8, 5, 7)
+#define OP_TLBI_RVAE1 sys_insn(1, 0, 8, 6, 1)
+#define OP_TLBI_RVAAE1 sys_insn(1, 0, 8, 6, 3)
+#define OP_TLBI_RVALE1 sys_insn(1, 0, 8, 6, 5)
+#define OP_TLBI_RVAALE1 sys_insn(1, 0, 8, 6, 7)
+#define OP_TLBI_VMALLE1 sys_insn(1, 0, 8, 7, 0)
+#define OP_TLBI_VAE1 sys_insn(1, 0, 8, 7, 1)
+#define OP_TLBI_ASIDE1 sys_insn(1, 0, 8, 7, 2)
+#define OP_TLBI_VAAE1 sys_insn(1, 0, 8, 7, 3)
+#define OP_TLBI_VALE1 sys_insn(1, 0, 8, 7, 5)
+#define OP_TLBI_VAALE1 sys_insn(1, 0, 8, 7, 7)
+#define OP_TLBI_VMALLE1OSNXS sys_insn(1, 0, 9, 1, 0)
+#define OP_TLBI_VAE1OSNXS sys_insn(1, 0, 9, 1, 1)
+#define OP_TLBI_ASIDE1OSNXS sys_insn(1, 0, 9, 1, 2)
+#define OP_TLBI_VAAE1OSNXS sys_insn(1, 0, 9, 1, 3)
+#define OP_TLBI_VALE1OSNXS sys_insn(1, 0, 9, 1, 5)
+#define OP_TLBI_VAALE1OSNXS sys_insn(1, 0, 9, 1, 7)
+#define OP_TLBI_RVAE1ISNXS sys_insn(1, 0, 9, 2, 1)
+#define OP_TLBI_RVAAE1ISNXS sys_insn(1, 0, 9, 2, 3)
+#define OP_TLBI_RVALE1ISNXS sys_insn(1, 0, 9, 2, 5)
+#define OP_TLBI_RVAALE1ISNXS sys_insn(1, 0, 9, 2, 7)
+#define OP_TLBI_VMALLE1ISNXS sys_insn(1, 0, 9, 3, 0)
+#define OP_TLBI_VAE1ISNXS sys_insn(1, 0, 9, 3, 1)
+#define OP_TLBI_ASIDE1ISNXS sys_insn(1, 0, 9, 3, 2)
+#define OP_TLBI_VAAE1ISNXS sys_insn(1, 0, 9, 3, 3)
+#define OP_TLBI_VALE1ISNXS sys_insn(1, 0, 9, 3, 5)
+#define OP_TLBI_VAALE1ISNXS sys_insn(1, 0, 9, 3, 7)
+#define OP_TLBI_RVAE1OSNXS sys_insn(1, 0, 9, 5, 1)
+#define OP_TLBI_RVAAE1OSNXS sys_insn(1, 0, 9, 5, 3)
+#define OP_TLBI_RVALE1OSNXS sys_insn(1, 0, 9, 5, 5)
+#define OP_TLBI_RVAALE1OSNXS sys_insn(1, 0, 9, 5, 7)
+#define OP_TLBI_RVAE1NXS sys_insn(1, 0, 9, 6, 1)
+#define OP_TLBI_RVAAE1NXS sys_insn(1, 0, 9, 6, 3)
+#define OP_TLBI_RVALE1NXS sys_insn(1, 0, 9, 6, 5)
+#define OP_TLBI_RVAALE1NXS sys_insn(1, 0, 9, 6, 7)
+#define OP_TLBI_VMALLE1NXS sys_insn(1, 0, 9, 7, 0)
+#define OP_TLBI_VAE1NXS sys_insn(1, 0, 9, 7, 1)
+#define OP_TLBI_ASIDE1NXS sys_insn(1, 0, 9, 7, 2)
+#define OP_TLBI_VAAE1NXS sys_insn(1, 0, 9, 7, 3)
+#define OP_TLBI_VALE1NXS sys_insn(1, 0, 9, 7, 5)
+#define OP_TLBI_VAALE1NXS sys_insn(1, 0, 9, 7, 7)
+#define OP_TLBI_IPAS2E1IS sys_insn(1, 4, 8, 0, 1)
+#define OP_TLBI_RIPAS2E1IS sys_insn(1, 4, 8, 0, 2)
+#define OP_TLBI_IPAS2LE1IS sys_insn(1, 4, 8, 0, 5)
+#define OP_TLBI_RIPAS2LE1IS sys_insn(1, 4, 8, 0, 6)
+#define OP_TLBI_ALLE2OS sys_insn(1, 4, 8, 1, 0)
+#define OP_TLBI_VAE2OS sys_insn(1, 4, 8, 1, 1)
+#define OP_TLBI_ALLE1OS sys_insn(1, 4, 8, 1, 4)
+#define OP_TLBI_VALE2OS sys_insn(1, 4, 8, 1, 5)
+#define OP_TLBI_VMALLS12E1OS sys_insn(1, 4, 8, 1, 6)
+#define OP_TLBI_RVAE2IS sys_insn(1, 4, 8, 2, 1)
+#define OP_TLBI_RVALE2IS sys_insn(1, 4, 8, 2, 5)
+#define OP_TLBI_ALLE2IS sys_insn(1, 4, 8, 3, 0)
+#define OP_TLBI_VAE2IS sys_insn(1, 4, 8, 3, 1)
+#define OP_TLBI_ALLE1IS sys_insn(1, 4, 8, 3, 4)
+#define OP_TLBI_VALE2IS sys_insn(1, 4, 8, 3, 5)
+#define OP_TLBI_VMALLS12E1IS sys_insn(1, 4, 8, 3, 6)
+#define OP_TLBI_IPAS2E1OS sys_insn(1, 4, 8, 4, 0)
+#define OP_TLBI_IPAS2E1 sys_insn(1, 4, 8, 4, 1)
+#define OP_TLBI_RIPAS2E1 sys_insn(1, 4, 8, 4, 2)
+#define OP_TLBI_RIPAS2E1OS sys_insn(1, 4, 8, 4, 3)
+#define OP_TLBI_IPAS2LE1OS sys_insn(1, 4, 8, 4, 4)
+#define OP_TLBI_IPAS2LE1 sys_insn(1, 4, 8, 4, 5)
+#define OP_TLBI_RIPAS2LE1 sys_insn(1, 4, 8, 4, 6)
+#define OP_TLBI_RIPAS2LE1OS sys_insn(1, 4, 8, 4, 7)
+#define OP_TLBI_RVAE2OS sys_insn(1, 4, 8, 5, 1)
+#define OP_TLBI_RVALE2OS sys_insn(1, 4, 8, 5, 5)
+#define OP_TLBI_RVAE2 sys_insn(1, 4, 8, 6, 1)
+#define OP_TLBI_RVALE2 sys_insn(1, 4, 8, 6, 5)
+#define OP_TLBI_ALLE2 sys_insn(1, 4, 8, 7, 0)
+#define OP_TLBI_VAE2 sys_insn(1, 4, 8, 7, 1)
+#define OP_TLBI_ALLE1 sys_insn(1, 4, 8, 7, 4)
+#define OP_TLBI_VALE2 sys_insn(1, 4, 8, 7, 5)
+#define OP_TLBI_VMALLS12E1 sys_insn(1, 4, 8, 7, 6)
+#define OP_TLBI_IPAS2E1ISNXS sys_insn(1, 4, 9, 0, 1)
+#define OP_TLBI_RIPAS2E1ISNXS sys_insn(1, 4, 9, 0, 2)
+#define OP_TLBI_IPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 5)
+#define OP_TLBI_RIPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 6)
+#define OP_TLBI_ALLE2OSNXS sys_insn(1, 4, 9, 1, 0)
+#define OP_TLBI_VAE2OSNXS sys_insn(1, 4, 9, 1, 1)
+#define OP_TLBI_ALLE1OSNXS sys_insn(1, 4, 9, 1, 4)
+#define OP_TLBI_VALE2OSNXS sys_insn(1, 4, 9, 1, 5)
+#define OP_TLBI_VMALLS12E1OSNXS sys_insn(1, 4, 9, 1, 6)
+#define OP_TLBI_RVAE2ISNXS sys_insn(1, 4, 9, 2, 1)
+#define OP_TLBI_RVALE2ISNXS sys_insn(1, 4, 9, 2, 5)
+#define OP_TLBI_ALLE2ISNXS sys_insn(1, 4, 9, 3, 0)
+#define OP_TLBI_VAE2ISNXS sys_insn(1, 4, 9, 3, 1)
+#define OP_TLBI_ALLE1ISNXS sys_insn(1, 4, 9, 3, 4)
+#define OP_TLBI_VALE2ISNXS sys_insn(1, 4, 9, 3, 5)
+#define OP_TLBI_VMALLS12E1ISNXS sys_insn(1, 4, 9, 3, 6)
+#define OP_TLBI_IPAS2E1OSNXS sys_insn(1, 4, 9, 4, 0)
+#define OP_TLBI_IPAS2E1NXS sys_insn(1, 4, 9, 4, 1)
+#define OP_TLBI_RIPAS2E1NXS sys_insn(1, 4, 9, 4, 2)
+#define OP_TLBI_RIPAS2E1OSNXS sys_insn(1, 4, 9, 4, 3)
+#define OP_TLBI_IPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 4)
+#define OP_TLBI_IPAS2LE1NXS sys_insn(1, 4, 9, 4, 5)
+#define OP_TLBI_RIPAS2LE1NXS sys_insn(1, 4, 9, 4, 6)
+#define OP_TLBI_RIPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 7)
+#define OP_TLBI_RVAE2OSNXS sys_insn(1, 4, 9, 5, 1)
+#define OP_TLBI_RVALE2OSNXS sys_insn(1, 4, 9, 5, 5)
+#define OP_TLBI_RVAE2NXS sys_insn(1, 4, 9, 6, 1)
+#define OP_TLBI_RVALE2NXS sys_insn(1, 4, 9, 6, 5)
+#define OP_TLBI_ALLE2NXS sys_insn(1, 4, 9, 7, 0)
+#define OP_TLBI_VAE2NXS sys_insn(1, 4, 9, 7, 1)
+#define OP_TLBI_ALLE1NXS sys_insn(1, 4, 9, 7, 4)
+#define OP_TLBI_VALE2NXS sys_insn(1, 4, 9, 7, 5)
+#define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6)
+
+/* Misc instructions */
+#define OP_GCSPUSHX sys_insn(1, 0, 7, 7, 4)
+#define OP_GCSPOPCX sys_insn(1, 0, 7, 7, 5)
+#define OP_GCSPOPX sys_insn(1, 0, 7, 7, 6)
+#define OP_GCSPUSHM sys_insn(1, 3, 7, 7, 0)
+
+#define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4)
+#define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5)
+#define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4)
+#define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5)
+#define OP_COSP_RCTX sys_insn(1, 3, 7, 3, 6)
+#define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7)
+
/* Common SCTLR_ELx flags. */
#define SCTLR_ELx_ENTP2 (BIT(60))
#define SCTLR_ELx_DSSBS (BIT(44))
@@ -555,16 +892,14 @@
/* Position the attr at the correct index */
#define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8))
-/* id_aa64pfr0 */
-#define ID_AA64PFR0_EL1_ELx_64BIT_ONLY 0x1
-#define ID_AA64PFR0_EL1_ELx_32BIT_64BIT 0x2
-
/* id_aa64mmfr0 */
#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0
+#define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7
#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0
#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7
#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1
+#define ID_AA64MMFR0_EL1_TGRAN16_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf
#define ARM64_MIN_PARANGE_BITS 32
@@ -572,6 +907,7 @@
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2 0x3
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7
#ifdef CONFIG_ARM64_PA_BITS_52
@@ -582,11 +918,13 @@
#if defined(CONFIG_ARM64_4K_PAGES)
#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX
#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT
#elif defined(CONFIG_ARM64_16K_PAGES)
#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX
#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT
@@ -610,6 +948,19 @@
#define SYS_GCR_EL1_RRND (BIT(16))
#define SYS_GCR_EL1_EXCL_MASK 0xffffUL
+#ifdef CONFIG_KASAN_HW_TAGS
+/*
+ * KASAN always uses a whole byte for its tags. With CONFIG_KASAN_HW_TAGS it
+ * only uses tags in the range 0xF0-0xFF, which we map to MTE tags 0x0-0xF.
+ */
+#define __MTE_TAG_MIN (KASAN_TAG_MIN & 0xf)
+#define __MTE_TAG_MAX (KASAN_TAG_MAX & 0xf)
+#define __MTE_TAG_INCL GENMASK(__MTE_TAG_MAX, __MTE_TAG_MIN)
+#define KERNEL_GCR_EL1_EXCL (SYS_GCR_EL1_EXCL_MASK & ~__MTE_TAG_INCL)
+#else
+#define KERNEL_GCR_EL1_EXCL SYS_GCR_EL1_EXCL_MASK
+#endif
+
#define KERNEL_GCR_EL1 (SYS_GCR_EL1_RRND | KERNEL_GCR_EL1_EXCL)
/* RGSR_EL1 Definitions */
@@ -626,20 +977,7 @@
/* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
#define SYS_MPIDR_SAFE_VAL (BIT(31))
-#define TRFCR_ELx_TS_SHIFT 5
-#define TRFCR_ELx_TS_MASK ((0x3UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_ELx_TS_VIRTUAL ((0x1UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_ELx_TS_GUEST_PHYSICAL ((0x2UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_ELx_TS_PHYSICAL ((0x3UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_EL2_CX BIT(3)
-#define TRFCR_ELx_ExTRE BIT(1)
-#define TRFCR_ELx_E0TRE BIT(0)
-
/* GIC Hypervisor interface registers */
-/* ICH_MISR_EL2 bit definitions */
-#define ICH_MISR_EOI (1 << 0)
-#define ICH_MISR_U (1 << 1)
-
/* ICH_LR*_EL2 bit definitions */
#define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1)
@@ -654,17 +992,6 @@
#define ICH_LR_PRIORITY_SHIFT 48
#define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT)
-/* ICH_HCR_EL2 bit definitions */
-#define ICH_HCR_EN (1 << 0)
-#define ICH_HCR_UIE (1 << 1)
-#define ICH_HCR_NPIE (1 << 3)
-#define ICH_HCR_TC (1 << 10)
-#define ICH_HCR_TALL0 (1 << 11)
-#define ICH_HCR_TALL1 (1 << 12)
-#define ICH_HCR_TDIR (1 << 14)
-#define ICH_HCR_EOIcount_SHIFT 27
-#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT)
-
/* ICH_VMCR_EL2 bit definitions */
#define ICH_VMCR_ACK_CTL_SHIFT 2
#define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT)
@@ -685,18 +1012,6 @@
#define ICH_VMCR_ENG1_SHIFT 1
#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT)
-/* ICH_VTR_EL2 bit definitions */
-#define ICH_VTR_PRI_BITS_SHIFT 29
-#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT)
-#define ICH_VTR_ID_BITS_SHIFT 23
-#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT)
-#define ICH_VTR_SEIS_SHIFT 22
-#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT)
-#define ICH_VTR_A3V_SHIFT 21
-#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT)
-#define ICH_VTR_TDS_SHIFT 19
-#define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT)
-
/*
* Permission Indirection Extension (PIE) permission encodings.
* Encodings with the _O suffix, have overlays applied (Permission Overlay Extension).
@@ -716,6 +1031,22 @@
#define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4))
+/*
+ * Permission Overlay Extension (POE) permission encodings.
+ */
+#define POE_NONE UL(0x0)
+#define POE_R UL(0x1)
+#define POE_X UL(0x2)
+#define POE_RX UL(0x3)
+#define POE_W UL(0x4)
+#define POE_RW UL(0x5)
+#define POE_XW UL(0x6)
+#define POE_RXW UL(0x7)
+#define POE_MASK UL(0xf)
+
+/* Initial value for Permission Overlay Extension for EL0 */
+#define POR_EL0_INIT POE_RXW
+
#define ARM64_FEATURE_FIELD_BITS 4
/* Defined for compatibility only, do not add new users. */
@@ -789,15 +1120,21 @@
/*
* For registers without architectural names, or simply unsupported by
* GAS.
+ *
+ * __check_r forces warnings to be generated by the compiler when
+ * evaluating r which wouldn't normally happen due to being passed to
+ * the assembler via __stringify(r).
*/
#define read_sysreg_s(r) ({ \
u64 __val; \
+ u32 __maybe_unused __check_r = (u32)(r); \
asm volatile(__mrs_s("%0", r) : "=r" (__val)); \
__val; \
})
#define write_sysreg_s(v, r) do { \
u64 __val = (u64)(v); \
+ u32 __maybe_unused __check_r = (u32)(r); \
asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \
} while (0)
@@ -827,6 +1164,8 @@
par; \
})
+#define SYS_FIELD_VALUE(reg, field, val) reg##_##field##_##val
+
#define SYS_FIELD_GET(reg, field, val) \
FIELD_GET(reg##_##field##_MASK, val)
@@ -834,7 +1173,8 @@
FIELD_PREP(reg##_##field##_MASK, val)
#define SYS_FIELD_PREP_ENUM(reg, field, val) \
- FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val)
+ FIELD_PREP(reg##_##field##_MASK, \
+ SYS_FIELD_VALUE(reg, field, val))
#endif
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 66736ff04011..6d44f8c8a18f 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -374,6 +374,7 @@ enum {
#endif
};
+/* Vendor hyper call function numbers 0-63 */
#define KVM_REG_ARM_VENDOR_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(2)
enum {
@@ -384,6 +385,17 @@ enum {
#endif
};
+/* Vendor hyper call function numbers 64-127 */
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2 KVM_REG_ARM_FW_FEAT_BMAP_REG(3)
+
+enum {
+ KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_VER = 0,
+ KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_CPUS = 1,
+#ifdef __KERNEL__
+ KVM_REG_ARM_VENDOR_HYP_BMAP_2_BIT_COUNT,
+#endif
+};
+
/* Device Control API on vm fd */
#define KVM_ARM_VM_SMCCC_CTRL 0
#define KVM_ARM_VM_SMCCC_FILTER 0
diff --git a/tools/arch/arm64/tools/Makefile b/tools/arch/arm64/tools/Makefile
index 7b42feedf647..de4f1b66ef01 100644
--- a/tools/arch/arm64/tools/Makefile
+++ b/tools/arch/arm64/tools/Makefile
@@ -13,12 +13,6 @@ AWK ?= awk
MKDIR ?= mkdir
RM ?= rm
-ifeq ($(V),1)
-Q =
-else
-Q = @
-endif
-
arm64_tools_dir = $(top_srcdir)/arch/arm64/tools
arm64_sysreg_tbl = $(arm64_tools_dir)/sysreg
arm64_gen_sysreg = $(arm64_tools_dir)/gen-sysreg.awk
diff --git a/tools/arch/x86/include/asm/amd-ibs.h b/tools/arch/x86/include/asm/amd-ibs.h
index 93807b437e4d..cb1740bc3da2 100644
--- a/tools/arch/x86/include/asm/amd-ibs.h
+++ b/tools/arch/x86/include/asm/amd-ibs.h
@@ -64,7 +64,8 @@ union ibs_op_ctl {
opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */
reserved0:5, /* 27-31: reserved */
opcurcnt:27, /* 32-58: periodic op counter current count */
- reserved1:5; /* 59-63: reserved */
+ ldlat_thrsh:4, /* 59-62: Load Latency threshold */
+ ldlat_en:1; /* 63: Load Latency enabled */
};
};
diff --git a/tools/arch/x86/include/asm/asm.h b/tools/arch/x86/include/asm/asm.h
index 3ad3da9a7d97..dbe39b44256b 100644
--- a/tools/arch/x86/include/asm/asm.h
+++ b/tools/arch/x86/include/asm/asm.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_ASM_H
#define _ASM_X86_ASM_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
# define __ASM_FORM(x, ...) x,## __VA_ARGS__
# define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__
# define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__,
@@ -123,7 +123,7 @@
#ifdef __KERNEL__
/* Exception table entry */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
# define _ASM_EXTABLE_HANDLE(from, to, handler) \
.pushsection "__ex_table","a" ; \
.balign 4 ; \
@@ -154,7 +154,7 @@
# define _ASM_NOKPROBE(entry)
# endif
-#else /* ! __ASSEMBLY__ */
+#else /* ! __ASSEMBLER__ */
# define _EXPAND_EXTABLE_HANDLE(x) #x
# define _ASM_EXTABLE_HANDLE(from, to, handler) \
" .pushsection \"__ex_table\",\"a\"\n" \
@@ -186,7 +186,7 @@
*/
register unsigned long current_stack_pointer asm(_ASM_SP);
#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __KERNEL__ */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 17b6590748c0..9e3fa7942e7d 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -2,14 +2,6 @@
#ifndef _ASM_X86_CPUFEATURES_H
#define _ASM_X86_CPUFEATURES_H
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#include <asm/required-features.h>
-#endif
-
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#include <asm/disabled-features.h>
-#endif
-
/*
* Defines x86 CPU feature bits
*/
@@ -210,7 +202,6 @@
#define X86_FEATURE_MBA ( 7*32+18) /* "mba" Memory Bandwidth Allocation */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* "perfmon_v2" AMD Performance Monitoring Version 2 */
-#define X86_FEATURE_USE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled */
#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* Use IBRS during runtime firmware calls */
#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */
#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
deleted file mode 100644
index c492bdc97b05..000000000000
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ /dev/null
@@ -1,161 +0,0 @@
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#define _ASM_X86_DISABLED_FEATURES_H
-
-/* These features, although they might be available in a CPU
- * will not be used because the compile options to support
- * them are not present.
- *
- * This code allows them to be checked and disabled at
- * compile time without an explicit #ifdef. Use
- * cpu_feature_enabled().
- */
-
-#ifdef CONFIG_X86_UMIP
-# define DISABLE_UMIP 0
-#else
-# define DISABLE_UMIP (1<<(X86_FEATURE_UMIP & 31))
-#endif
-
-#ifdef CONFIG_X86_64
-# define DISABLE_VME (1<<(X86_FEATURE_VME & 31))
-# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
-# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
-# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31))
-# define DISABLE_PCID 0
-#else
-# define DISABLE_VME 0
-# define DISABLE_K6_MTRR 0
-# define DISABLE_CYRIX_ARR 0
-# define DISABLE_CENTAUR_MCR 0
-# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31))
-#endif /* CONFIG_X86_64 */
-
-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-# define DISABLE_PKU 0
-# define DISABLE_OSPKE 0
-#else
-# define DISABLE_PKU (1<<(X86_FEATURE_PKU & 31))
-# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31))
-#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
-
-#ifdef CONFIG_X86_5LEVEL
-# define DISABLE_LA57 0
-#else
-# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
-# define DISABLE_PTI 0
-#else
-# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_RETPOLINE
-# define DISABLE_RETPOLINE 0
-#else
-# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
- (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
-#endif
-
-#ifdef CONFIG_MITIGATION_RETHUNK
-# define DISABLE_RETHUNK 0
-#else
-# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_UNRET_ENTRY
-# define DISABLE_UNRET 0
-#else
-# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
-# define DISABLE_CALL_DEPTH_TRACKING 0
-#else
-# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31))
-#endif
-
-#ifdef CONFIG_ADDRESS_MASKING
-# define DISABLE_LAM 0
-#else
-# define DISABLE_LAM (1 << (X86_FEATURE_LAM & 31))
-#endif
-
-#ifdef CONFIG_INTEL_IOMMU_SVM
-# define DISABLE_ENQCMD 0
-#else
-# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
-#endif
-
-#ifdef CONFIG_X86_SGX
-# define DISABLE_SGX 0
-#else
-# define DISABLE_SGX (1 << (X86_FEATURE_SGX & 31))
-#endif
-
-#ifdef CONFIG_XEN_PV
-# define DISABLE_XENPV 0
-#else
-# define DISABLE_XENPV (1 << (X86_FEATURE_XENPV & 31))
-#endif
-
-#ifdef CONFIG_INTEL_TDX_GUEST
-# define DISABLE_TDX_GUEST 0
-#else
-# define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31))
-#endif
-
-#ifdef CONFIG_X86_USER_SHADOW_STACK
-#define DISABLE_USER_SHSTK 0
-#else
-#define DISABLE_USER_SHSTK (1 << (X86_FEATURE_USER_SHSTK & 31))
-#endif
-
-#ifdef CONFIG_X86_KERNEL_IBT
-#define DISABLE_IBT 0
-#else
-#define DISABLE_IBT (1 << (X86_FEATURE_IBT & 31))
-#endif
-
-#ifdef CONFIG_X86_FRED
-# define DISABLE_FRED 0
-#else
-# define DISABLE_FRED (1 << (X86_FEATURE_FRED & 31))
-#endif
-
-#ifdef CONFIG_KVM_AMD_SEV
-#define DISABLE_SEV_SNP 0
-#else
-#define DISABLE_SEV_SNP (1 << (X86_FEATURE_SEV_SNP & 31))
-#endif
-
-/*
- * Make sure to add features to the correct mask
- */
-#define DISABLED_MASK0 (DISABLE_VME)
-#define DISABLED_MASK1 0
-#define DISABLED_MASK2 0
-#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
-#define DISABLED_MASK4 (DISABLE_PCID)
-#define DISABLED_MASK5 0
-#define DISABLED_MASK6 0
-#define DISABLED_MASK7 (DISABLE_PTI)
-#define DISABLED_MASK8 (DISABLE_XENPV|DISABLE_TDX_GUEST)
-#define DISABLED_MASK9 (DISABLE_SGX)
-#define DISABLED_MASK10 0
-#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
- DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
-#define DISABLED_MASK12 (DISABLE_FRED|DISABLE_LAM)
-#define DISABLED_MASK13 0
-#define DISABLED_MASK14 0
-#define DISABLED_MASK15 0
-#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
- DISABLE_ENQCMD)
-#define DISABLED_MASK17 0
-#define DISABLED_MASK18 (DISABLE_IBT)
-#define DISABLED_MASK19 (DISABLE_SEV_SNP)
-#define DISABLED_MASK20 0
-#define DISABLED_MASK21 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
-
-#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 3ae84c3b8e6d..dc1c1057f26e 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -25,6 +25,7 @@
#define _EFER_SVME 12 /* Enable virtualization */
#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */
#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
+#define _EFER_TCE 15 /* Enable Translation Cache Extensions */
#define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */
#define EFER_SCE (1<<_EFER_SCE)
@@ -34,6 +35,7 @@
#define EFER_SVME (1<<_EFER_SVME)
#define EFER_LMSLE (1<<_EFER_LMSLE)
#define EFER_FFXSR (1<<_EFER_FFXSR)
+#define EFER_TCE (1<<_EFER_TCE)
#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS)
/*
diff --git a/tools/arch/x86/include/asm/nops.h b/tools/arch/x86/include/asm/nops.h
index 1c1b7550fa55..cd94221d8335 100644
--- a/tools/arch/x86/include/asm/nops.h
+++ b/tools/arch/x86/include/asm/nops.h
@@ -82,7 +82,7 @@
#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7)
#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern const unsigned char * const x86_nops[];
#endif
diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h
index 46d7e06763c9..e0125afa53fb 100644
--- a/tools/arch/x86/include/asm/orc_types.h
+++ b/tools/arch/x86/include/asm/orc_types.h
@@ -45,7 +45,7 @@
#define ORC_TYPE_REGS 3
#define ORC_TYPE_REGS_PARTIAL 4
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/byteorder.h>
/*
@@ -73,6 +73,6 @@ struct orc_entry {
#endif
} __packed;
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ORC_TYPES_H */
diff --git a/tools/arch/x86/include/asm/pvclock-abi.h b/tools/arch/x86/include/asm/pvclock-abi.h
index 1436226efe3e..b9fece5fc96d 100644
--- a/tools/arch/x86/include/asm/pvclock-abi.h
+++ b/tools/arch/x86/include/asm/pvclock-abi.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PVCLOCK_ABI_H
#define _ASM_X86_PVCLOCK_ABI_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* These structs MUST NOT be changed.
@@ -44,5 +44,5 @@ struct pvclock_wall_clock {
#define PVCLOCK_GUEST_STOPPED (1 << 1)
/* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */
#define PVCLOCK_COUNTS_FROM_ZERO (1 << 2)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_PVCLOCK_ABI_H */
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
deleted file mode 100644
index e9187ddd3d1f..000000000000
--- a/tools/arch/x86/include/asm/required-features.h
+++ /dev/null
@@ -1,105 +0,0 @@
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#define _ASM_X86_REQUIRED_FEATURES_H
-
-/* Define minimum CPUID feature set for kernel These bits are checked
- really early to actually display a visible error message before the
- kernel dies. Make sure to assign features to the proper mask!
-
- Some requirements that are not in CPUID yet are also in the
- CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too.
-
- The real information is in arch/x86/Kconfig.cpu, this just converts
- the CONFIGs into a bitmask */
-
-#ifndef CONFIG_MATH_EMULATION
-# define NEED_FPU (1<<(X86_FEATURE_FPU & 31))
-#else
-# define NEED_FPU 0
-#endif
-
-#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
-# define NEED_PAE (1<<(X86_FEATURE_PAE & 31))
-#else
-# define NEED_PAE 0
-#endif
-
-#ifdef CONFIG_X86_CMPXCHG64
-# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31))
-#else
-# define NEED_CX8 0
-#endif
-
-#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64)
-# define NEED_CMOV (1<<(X86_FEATURE_CMOV & 31))
-#else
-# define NEED_CMOV 0
-#endif
-
-# define NEED_3DNOW 0
-
-#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64)
-# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31))
-#else
-# define NEED_NOPL 0
-#endif
-
-#ifdef CONFIG_MATOM
-# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31))
-#else
-# define NEED_MOVBE 0
-#endif
-
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_PARAVIRT_XXL
-/* Paravirtualized systems may not have PSE or PGE available */
-#define NEED_PSE 0
-#define NEED_PGE 0
-#else
-#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31)
-#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31)
-#endif
-#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
-#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31))
-#define NEED_XMM (1<<(X86_FEATURE_XMM & 31))
-#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31))
-#define NEED_LM (1<<(X86_FEATURE_LM & 31))
-#else
-#define NEED_PSE 0
-#define NEED_MSR 0
-#define NEED_PGE 0
-#define NEED_FXSR 0
-#define NEED_XMM 0
-#define NEED_XMM2 0
-#define NEED_LM 0
-#endif
-
-#define REQUIRED_MASK0 (NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\
- NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\
- NEED_XMM|NEED_XMM2)
-#define SSE_MASK (NEED_XMM|NEED_XMM2)
-
-#define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW)
-
-#define REQUIRED_MASK2 0
-#define REQUIRED_MASK3 (NEED_NOPL)
-#define REQUIRED_MASK4 (NEED_MOVBE)
-#define REQUIRED_MASK5 0
-#define REQUIRED_MASK6 0
-#define REQUIRED_MASK7 0
-#define REQUIRED_MASK8 0
-#define REQUIRED_MASK9 0
-#define REQUIRED_MASK10 0
-#define REQUIRED_MASK11 0
-#define REQUIRED_MASK12 0
-#define REQUIRED_MASK13 0
-#define REQUIRED_MASK14 0
-#define REQUIRED_MASK15 0
-#define REQUIRED_MASK16 0
-#define REQUIRED_MASK17 0
-#define REQUIRED_MASK18 0
-#define REQUIRED_MASK19 0
-#define REQUIRED_MASK20 0
-#define REQUIRED_MASK21 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
-
-#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 156b62a163c5..8a48cc2536f5 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -226,7 +226,7 @@ static int load_xbc_from_initrd(int fd, char **buf)
/* Wrong Checksum */
rcsum = xbc_calc_checksum(*buf, size);
if (csum != rcsum) {
- pr_err("checksum error: %d != %d\n", csum, rcsum);
+ pr_err("checksum error: %u != %u\n", csum, rcsum);
return -EINVAL;
}
@@ -395,7 +395,7 @@ static int apply_xbc(const char *path, const char *xbc_path)
xbc_get_info(&ret, NULL);
printf("\tNumber of nodes: %d\n", ret);
printf("\tSize: %u bytes\n", (unsigned int)size);
- printf("\tChecksum: %d\n", (unsigned int)csum);
+ printf("\tChecksum: %u\n", (unsigned int)csum);
/* TODO: Check the options by schema */
xbc_exit();
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 243b79f2b451..062bbd6cd048 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -27,12 +27,6 @@ srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
endif
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
FEATURE_USER = .bpf
FEATURE_TESTS = libbfd disassembler-four-args disassembler-init-styled
FEATURE_DISPLAY = libbfd
diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index 4315652678b9..bf843f328812 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -5,12 +5,6 @@ INSTALL ?= install
RM ?= rm -f
RMDIR ?= rmdir --ignore-fail-on-non-empty
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
prefix ?= /usr/local
mandir ?= $(prefix)/man
man8dir = $(mandir)/man8
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index dd9f3ec84201..9e9a5f006cd2 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -7,12 +7,6 @@ srctree := $(patsubst %/,%,$(dir $(srctree)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
endif
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
BPF_DIR = $(srctree)/tools/lib/bpf
ifneq ($(OUTPUT),)
@@ -71,7 +65,12 @@ prefix ?= /usr/local
bash_compdir ?= /usr/share/bash-completion/completions
CFLAGS += -O2
-CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers
+CFLAGS += -W
+CFLAGS += -Wall
+CFLAGS += -Wextra
+CFLAGS += -Wformat-signedness
+CFLAGS += -Wno-unused-parameter
+CFLAGS += -Wno-missing-field-initializers
CFLAGS += $(filter-out -Wswitch-enum -Wnested-externs,$(EXTRA_WARNINGS))
CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
-I$(or $(OUTPUT),.) \
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 2636655ac180..6b14cbfa58aa 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -253,7 +253,7 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
if (btf_kflag(t))
printf("\n\t'%s' val=%d", name, v->val);
else
- printf("\n\t'%s' val=%u", name, v->val);
+ printf("\n\t'%s' val=%u", name, (__u32)v->val);
}
}
if (json_output)
@@ -1022,7 +1022,7 @@ static int do_dump(int argc, char **argv)
for (i = 0; i < root_type_cnt; i++) {
if (root_type_ids[i] == root_id) {
err = -EINVAL;
- p_err("duplicate root_id %d supplied", root_id);
+ p_err("duplicate root_id %u supplied", root_id);
goto done;
}
}
@@ -1132,7 +1132,7 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type,
break;
default:
err = -1;
- p_err("unexpected object type: %d", type);
+ p_err("unexpected object type: %u", type);
goto err_free;
}
if (err) {
@@ -1155,7 +1155,7 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type,
break;
default:
err = -1;
- p_err("unexpected object type: %d", type);
+ p_err("unexpected object type: %u", type);
goto err_free;
}
if (fd < 0) {
@@ -1188,7 +1188,7 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type,
break;
default:
err = -1;
- p_err("unexpected object type: %d", type);
+ p_err("unexpected object type: %u", type);
goto err_free;
}
if (!btf_id)
@@ -1254,12 +1254,12 @@ show_btf_plain(struct bpf_btf_info *info, int fd,
n = 0;
hashmap__for_each_key_entry(btf_prog_table, entry, info->id) {
- printf("%s%lu", n++ == 0 ? " prog_ids " : ",", entry->value);
+ printf("%s%lu", n++ == 0 ? " prog_ids " : ",", (unsigned long)entry->value);
}
n = 0;
hashmap__for_each_key_entry(btf_map_table, entry, info->id) {
- printf("%s%lu", n++ == 0 ? " map_ids " : ",", entry->value);
+ printf("%s%lu", n++ == 0 ? " map_ids " : ",", (unsigned long)entry->value);
}
emit_obj_refs_plain(refs_table, info->id, "\n\tpids ");
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 527fe867a8fb..4e896d8a2416 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -653,7 +653,7 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
case BTF_KIND_ARRAY:
array = (struct btf_array *)(t + 1);
BTF_PRINT_TYPE(array->type);
- BTF_PRINT_ARG("[%d]", array->nelems);
+ BTF_PRINT_ARG("[%u]", array->nelems);
break;
case BTF_KIND_PTR:
BTF_PRINT_TYPE(t->type);
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 9af426d43299..93b139bfb988 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -191,7 +191,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
if (attach_btf_name)
printf(" %-15s", attach_btf_name);
else if (info.attach_btf_id)
- printf(" attach_btf_obj_id=%d attach_btf_id=%d",
+ printf(" attach_btf_obj_id=%u attach_btf_id=%u",
info.attach_btf_obj_id, info.attach_btf_id);
printf("\n");
}
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 9b75639434b8..ecfa790adc13 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -461,10 +461,11 @@ int get_fd_type(int fd)
p_err("can't read link type: %s", strerror(errno));
return -1;
}
- if (n == sizeof(path)) {
+ if (n == sizeof(buf)) {
p_err("can't read link type: path too long!");
return -1;
}
+ buf[n] = '\0';
if (strstr(buf, "bpf-map"))
return BPF_OBJ_MAP;
@@ -713,7 +714,7 @@ ifindex_to_arch(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, const char **opt)
int vendor_id;
if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) {
- p_err("Can't get net device name for ifindex %d: %s", ifindex,
+ p_err("Can't get net device name for ifindex %u: %s", ifindex,
strerror(errno));
return NULL;
}
@@ -738,7 +739,7 @@ ifindex_to_arch(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, const char **opt)
/* No NFP support in LLVM, we have no valid triple to return. */
default:
p_err("Can't get arch name for device vendor id 0x%04x",
- vendor_id);
+ (unsigned int)vendor_id);
return NULL;
}
}
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 5a4d3240689e..67a60114368f 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -670,7 +670,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
continue;
if (bpf_map__is_internal(map) &&
(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
- printf("\tskel_free_map_data(skel->%1$s, skel->maps.%1$s.initial_value, %2$zd);\n",
+ printf("\tskel_free_map_data(skel->%1$s, skel->maps.%1$s.initial_value, %2$zu);\n",
ident, bpf_map_mmap_sz(map));
codegen("\
\n\
@@ -984,7 +984,7 @@ static int walk_st_ops_shadow_vars(struct btf *btf, const char *ident,
offset = m->offset / 8;
if (next_offset < offset)
- printf("\t\t\tchar __padding_%d[%d];\n", i, offset - next_offset);
+ printf("\t\t\tchar __padding_%d[%u];\n", i, offset - next_offset);
switch (btf_kind(member_type)) {
case BTF_KIND_INT:
@@ -1052,7 +1052,7 @@ static int walk_st_ops_shadow_vars(struct btf *btf, const char *ident,
/* Cannot fail since it must be a struct type */
size = btf__resolve_size(btf, map_type_id);
if (next_offset < (__u32)size)
- printf("\t\t\tchar __padding_end[%d];\n", size - next_offset);
+ printf("\t\t\tchar __padding_end[%u];\n", size - next_offset);
out:
btf_dump__free(d);
@@ -2095,7 +2095,7 @@ btfgen_mark_type(struct btfgen_info *info, unsigned int type_id, bool follow_poi
break;
/* tells if some other type needs to be handled */
default:
- p_err("unsupported kind: %s (%d)", btf_kind_str(btf_type), type_id);
+ p_err("unsupported kind: %s (%u)", btf_kind_str(btf_type), type_id);
return -EINVAL;
}
@@ -2147,7 +2147,7 @@ static int btfgen_record_field_relo(struct btfgen_info *info, struct bpf_core_sp
btf_type = btf__type_by_id(btf, type_id);
break;
default:
- p_err("unsupported kind: %s (%d)",
+ p_err("unsupported kind: %s (%u)",
btf_kind_str(btf_type), btf_type->type);
return -EINVAL;
}
@@ -2246,7 +2246,7 @@ static int btfgen_mark_type_match(struct btfgen_info *info, __u32 type_id, bool
}
/* tells if some other type needs to be handled */
default:
- p_err("unsupported kind: %s (%d)", btf_kind_str(btf_type), type_id);
+ p_err("unsupported kind: %s (%u)", btf_kind_str(btf_type), type_id);
return -EINVAL;
}
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index c032d2c6ab6d..8895b4e1f690 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -343,7 +343,8 @@ int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
{
const struct bpf_line_info *linfo = NULL;
unsigned int nr_skip = 0;
- int count, i, pc = 0;
+ int count, i;
+ unsigned int pc = 0;
disasm_ctx_t ctx;
if (!len)
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 5cd503b763d7..52fd2c9fac56 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -107,7 +107,7 @@ static int link_parse_fd(int *argc, char ***argv)
fd = bpf_link_get_fd_by_id(id);
if (fd < 0)
- p_err("failed to get link with ID %d: %s", id, strerror(errno));
+ p_err("failed to get link with ID %u: %s", id, strerror(errno));
return fd;
} else if (is_prefix(**argv, "pinned")) {
char *path;
@@ -404,7 +404,7 @@ static char *perf_config_hw_cache_str(__u64 config)
if (hw_cache)
snprintf(str, PERF_HW_CACHE_LEN, "%s-", hw_cache);
else
- snprintf(str, PERF_HW_CACHE_LEN, "%lld-", config & 0xff);
+ snprintf(str, PERF_HW_CACHE_LEN, "%llu-", config & 0xff);
op = perf_event_name(evsel__hw_cache_op, (config >> 8) & 0xff);
if (op)
@@ -412,7 +412,7 @@ static char *perf_config_hw_cache_str(__u64 config)
"%s-", op);
else
snprintf(str + strlen(str), PERF_HW_CACHE_LEN - strlen(str),
- "%lld-", (config >> 8) & 0xff);
+ "%llu-", (config >> 8) & 0xff);
result = perf_event_name(evsel__hw_cache_result, config >> 16);
if (result)
@@ -420,7 +420,7 @@ static char *perf_config_hw_cache_str(__u64 config)
"%s", result);
else
snprintf(str + strlen(str), PERF_HW_CACHE_LEN - strlen(str),
- "%lld", config >> 16);
+ "%llu", config >> 16);
return str;
}
@@ -623,7 +623,7 @@ static void show_link_ifindex_plain(__u32 ifindex)
else
snprintf(devname, sizeof(devname), "(detached)");
if (ret)
- snprintf(devname, sizeof(devname), "%s(%d)",
+ snprintf(devname, sizeof(devname), "%s(%u)",
tmpname, ifindex);
printf("ifindex %s ", devname);
}
@@ -699,7 +699,7 @@ void netfilter_dump_plain(const struct bpf_link_info *info)
if (pfname)
printf("\n\t%s", pfname);
else
- printf("\n\tpf: %d", pf);
+ printf("\n\tpf: %u", pf);
if (hookname)
printf(" %s", hookname);
@@ -773,7 +773,7 @@ static void show_uprobe_multi_plain(struct bpf_link_info *info)
printf("func_cnt %u ", info->uprobe_multi.count);
if (info->uprobe_multi.pid)
- printf("pid %d ", info->uprobe_multi.pid);
+ printf("pid %u ", info->uprobe_multi.pid);
printf("\n\t%-16s %-16s %-16s", "offset", "ref_ctr_offset", "cookies");
for (i = 0; i < info->uprobe_multi.count; i++) {
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 08d0ac543c67..cd5963cb6058 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -152,7 +152,7 @@ static int do_version(int argc, char **argv)
BPFTOOL_MINOR_VERSION, BPFTOOL_PATCH_VERSION);
#endif
jsonw_name(json_wtr, "libbpf_version");
- jsonw_printf(json_wtr, "\"%d.%d\"",
+ jsonw_printf(json_wtr, "\"%u.%u\"",
libbpf_major_version(), libbpf_minor_version());
jsonw_name(json_wtr, "features");
@@ -370,7 +370,7 @@ static int do_batch(int argc, char **argv)
while ((cp = strstr(buf, "\\\n")) != NULL) {
if (!fgets(contline, sizeof(contline), fp) ||
strlen(contline) == 0) {
- p_err("missing continuation line on command %d",
+ p_err("missing continuation line on command %u",
lines);
err = -1;
goto err_close;
@@ -381,7 +381,7 @@ static int do_batch(int argc, char **argv)
*cp = '\0';
if (strlen(buf) + strlen(contline) + 1 > sizeof(buf)) {
- p_err("command %d is too long", lines);
+ p_err("command %u is too long", lines);
err = -1;
goto err_close;
}
@@ -423,7 +423,7 @@ static int do_batch(int argc, char **argv)
err = -1;
} else {
if (!json_output)
- printf("processed %d commands\n", lines);
+ printf("processed %u commands\n", lines);
}
err_close:
if (fp != stdin)
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index b89bd792c1d5..81cc668b4b05 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -285,7 +285,7 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
}
if (info->value_size) {
for (i = 0; i < n; i++) {
- printf("value (CPU %02d):%c",
+ printf("value (CPU %02u):%c",
i, info->value_size > 16 ? '\n' : ' ');
fprint_hex(stdout, value + i * step,
info->value_size, " ");
@@ -316,7 +316,7 @@ static char **parse_bytes(char **argv, const char *name, unsigned char *val,
}
if (i != n) {
- p_err("%s expected %d bytes got %d", name, n, i);
+ p_err("%s expected %u bytes got %u", name, n, i);
return NULL;
}
@@ -462,7 +462,7 @@ static void show_map_header_json(struct bpf_map_info *info, json_writer_t *wtr)
jsonw_string_field(wtr, "name", info->name);
jsonw_name(wtr, "flags");
- jsonw_printf(wtr, "%d", info->map_flags);
+ jsonw_printf(wtr, "%u", info->map_flags);
}
static int show_map_close_json(int fd, struct bpf_map_info *info)
@@ -588,7 +588,7 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
if (prog_type_str)
printf("owner_prog_type %s ", prog_type_str);
else
- printf("owner_prog_type %d ", prog_type);
+ printf("owner_prog_type %u ", prog_type);
}
if (owner_jited)
printf("owner%s jited",
@@ -615,7 +615,7 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
printf("\n\t");
if (info->btf_id)
- printf("btf_id %d", info->btf_id);
+ printf("btf_id %u", info->btf_id);
if (frozen)
printf("%sfrozen", info->btf_id ? " " : "");
@@ -1270,6 +1270,10 @@ static int do_create(int argc, char **argv)
} else if (is_prefix(*argv, "name")) {
NEXT_ARG();
map_name = GET_ARG();
+ if (strlen(map_name) > BPF_OBJ_NAME_LEN - 1) {
+ p_info("Warning: map name is longer than %u characters, it will be truncated.",
+ BPF_OBJ_NAME_LEN - 1);
+ }
} else if (is_prefix(*argv, "key")) {
if (parse_u32_arg(&argc, &argv, &key_size,
"key size"))
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
index 21d7d447e1f3..552b4ca40c27 100644
--- a/tools/bpf/bpftool/map_perf_ring.c
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -91,15 +91,15 @@ print_bpf_output(void *private_data, int cpu, struct perf_event_header *event)
jsonw_end_object(json_wtr);
} else {
if (e->header.type == PERF_RECORD_SAMPLE) {
- printf("== @%lld.%09lld CPU: %d index: %d =====\n",
+ printf("== @%llu.%09llu CPU: %d index: %d =====\n",
e->time / 1000000000ULL, e->time % 1000000000ULL,
cpu, idx);
fprint_hex(stdout, e->data, e->size, " ");
printf("\n");
} else if (e->header.type == PERF_RECORD_LOST) {
- printf("lost %lld events\n", lost->lost);
+ printf("lost %llu events\n", lost->lost);
} else {
- printf("unknown event type=%d size=%d\n",
+ printf("unknown event type=%u size=%u\n",
e->header.type, e->header.size);
}
}
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index d2242d9f8441..64f958f437b0 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -476,7 +476,7 @@ static void __show_dev_tc_bpf(const struct ip_devname_ifindex *dev,
for (i = 0; i < optq.count; i++) {
NET_START_OBJECT;
NET_DUMP_STR("devname", "%s", dev->devname);
- NET_DUMP_UINT("ifindex", "(%u)", dev->ifindex);
+ NET_DUMP_UINT("ifindex", "(%u)", (unsigned int)dev->ifindex);
NET_DUMP_STR("kind", " %s", attach_loc_strings[loc]);
ret = __show_dev_tc_bpf_name(prog_ids[i], prog_name,
sizeof(prog_name));
@@ -831,7 +831,7 @@ static void show_link_netfilter(void)
if (err) {
if (errno == ENOENT)
break;
- p_err("can't get next link: %s (id %d)", strerror(errno), id);
+ p_err("can't get next link: %s (id %u)", strerror(errno), id);
break;
}
diff --git a/tools/bpf/bpftool/netlink_dumper.c b/tools/bpf/bpftool/netlink_dumper.c
index 5f65140b003b..0a3c7e96c797 100644
--- a/tools/bpf/bpftool/netlink_dumper.c
+++ b/tools/bpf/bpftool/netlink_dumper.c
@@ -45,7 +45,7 @@ static int do_xdp_dump_one(struct nlattr *attr, unsigned int ifindex,
NET_START_OBJECT;
if (name)
NET_DUMP_STR("devname", "%s", name);
- NET_DUMP_UINT("ifindex", "(%d)", ifindex);
+ NET_DUMP_UINT("ifindex", "(%u)", ifindex);
if (mode == XDP_ATTACHED_MULTI) {
if (json_output) {
@@ -74,7 +74,7 @@ int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb)
if (!tb[IFLA_XDP])
return 0;
- return do_xdp_dump_one(tb[IFLA_XDP], ifinfo->ifi_index,
+ return do_xdp_dump_one(tb[IFLA_XDP], (unsigned int)ifinfo->ifi_index,
libbpf_nla_getattr_str(tb[IFLA_IFNAME]));
}
@@ -168,7 +168,7 @@ int do_filter_dump(struct tcmsg *info, struct nlattr **tb, const char *kind,
NET_START_OBJECT;
if (devname[0] != '\0')
NET_DUMP_STR("devname", "%s", devname);
- NET_DUMP_UINT("ifindex", "(%u)", ifindex);
+ NET_DUMP_UINT("ifindex", "(%u)", (unsigned int)ifindex);
NET_DUMP_STR("kind", " %s", kind);
ret = do_bpf_filter_dump(tb[TCA_OPTIONS]);
NET_END_OBJECT_FINAL;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index e71be67f1d86..f010295350be 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -521,10 +521,10 @@ static void print_prog_header_plain(struct bpf_prog_info *info, int fd)
print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
printf("%s", info->gpl_compatible ? " gpl" : "");
if (info->run_time_ns)
- printf(" run_time_ns %lld run_cnt %lld",
+ printf(" run_time_ns %llu run_cnt %llu",
info->run_time_ns, info->run_cnt);
if (info->recursion_misses)
- printf(" recursion_misses %lld", info->recursion_misses);
+ printf(" recursion_misses %llu", info->recursion_misses);
printf("\n");
}
@@ -569,7 +569,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd, bool orphaned)
}
if (info->btf_id)
- printf("\n\tbtf_id %d", info->btf_id);
+ printf("\n\tbtf_id %u", info->btf_id);
emit_obj_refs_plain(refs_table, info->id, "\n\tpids ");
@@ -1164,7 +1164,7 @@ static int get_run_data(const char *fname, void **data_ptr, unsigned int *size)
}
if (nb_read > buf_size - block_size) {
if (buf_size == UINT32_MAX) {
- p_err("data_in/ctx_in is too long (max: %d)",
+ p_err("data_in/ctx_in is too long (max: %u)",
UINT32_MAX);
goto err_free;
}
@@ -1928,6 +1928,7 @@ static int do_loader(int argc, char **argv)
obj = bpf_object__open_file(file, &open_opts);
if (!obj) {
+ err = -1;
p_err("failed to open object file");
goto err_close_obj;
}
@@ -2251,7 +2252,7 @@ static char *profile_target_name(int tgt_fd)
t = btf__type_by_id(btf, func_info.type_id);
if (!t) {
- p_err("btf %d doesn't have type %d",
+ p_err("btf %u doesn't have type %u",
info.btf_id, func_info.type_id);
goto out;
}
@@ -2329,7 +2330,7 @@ static int profile_open_perf_events(struct profiler_bpf *obj)
continue;
for (cpu = 0; cpu < obj->rodata->num_cpu; cpu++) {
if (profile_open_perf_event(m, cpu, map_fd)) {
- p_err("failed to create event %s on cpu %d",
+ p_err("failed to create event %s on cpu %u",
metrics[m].name, cpu);
return -1;
}
diff --git a/tools/bpf/bpftool/tracelog.c b/tools/bpf/bpftool/tracelog.c
index bf1f02212797..31d806e3bdaa 100644
--- a/tools/bpf/bpftool/tracelog.c
+++ b/tools/bpf/bpftool/tracelog.c
@@ -78,7 +78,7 @@ static bool get_tracefs_pipe(char *mnt)
return false;
/* Allow room for NULL terminating byte and pipe file name */
- snprintf(format, sizeof(format), "%%*s %%%zds %%99s %%*s %%*d %%*d\\n",
+ snprintf(format, sizeof(format), "%%*s %%%zus %%99s %%*s %%*d %%*d\\n",
PATH_MAX - strlen(pipe_name) - 1);
while (fscanf(fp, format, mnt, type) == 2)
if (strcmp(type, fstype) == 0) {
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index d0094345fb2b..5e7cb8b36fef 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -199,13 +199,13 @@ static const char *print_imm(void *private_data,
if (insn->src_reg == BPF_PSEUDO_MAP_FD)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
- "map[id:%u]", insn->imm);
+ "map[id:%d]", insn->imm);
else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
- "map[id:%u][0]+%u", insn->imm, (insn + 1)->imm);
+ "map[id:%d][0]+%d", insn->imm, (insn + 1)->imm);
else if (insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
- "map[idx:%u]+%u", insn->imm, (insn + 1)->imm);
+ "map[idx:%d]+%d", insn->imm, (insn + 1)->imm);
else if (insn->src_reg == BPF_PSEUDO_FUNC)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"subprog[%+d]", insn->imm);
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index 4b8079f294f6..afbddea3a39c 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -5,10 +5,8 @@ include ../../scripts/Makefile.arch
srctree := $(abspath $(CURDIR)/../../../)
ifeq ($(V),1)
- Q =
msg =
else
- Q = @
ifeq ($(silent),1)
msg =
else
diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index c4f1f1735af6..78a436c4072e 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -6,6 +6,7 @@ OUTPUT ?= $(abspath .output)/
BPFTOOL_OUTPUT := $(OUTPUT)bpftool/
DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bootstrap/bpftool
BPFTOOL ?= $(DEFAULT_BPFTOOL)
+BPF_TARGET_ENDIAN ?= --target=bpf
LIBBPF_SRC := $(abspath ../../lib/bpf)
BPFOBJ_OUTPUT := $(OUTPUT)libbpf/
BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a
@@ -26,10 +27,7 @@ VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \
VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \
$(wildcard $(VMLINUX_BTF_PATHS))))
-ifeq ($(V),1)
-Q =
-else
-Q = @
+ifneq ($(V),1)
MAKEFLAGS += --no-print-directory
submake_extras := feature_display=0
endif
@@ -63,7 +61,7 @@ $(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL)
$(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@
$(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT)
- $(QUIET_GEN)$(CLANG) -g -O2 --target=bpf $(INCLUDES) \
+ $(QUIET_GEN)$(CLANG) -g -O2 $(BPF_TARGET_ENDIAN) $(INCLUDES) \
-c $(filter %.c,$^) -o $@ && \
$(LLVM_STRIP) -g $@
diff --git a/tools/build/Makefile b/tools/build/Makefile
index 18ad131f6ea7..63ef21878761 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -17,13 +17,7 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld)
export HOSTCC HOSTLD HOSTAR
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
-export Q srctree CC LD
+export srctree CC LD
MAKEFLAGS := --no-print-directory
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 1931b6321314..1f44ca677ad3 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -135,7 +135,8 @@ FEATURE_TESTS_EXTRA := \
libbpf-bpf_create_map \
libpfm4 \
libdebuginfod \
- clang-bpf-co-re
+ clang-bpf-co-re \
+ bpftool-skeletons
FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index cb1e3e2feedf..b8b5fb183dd4 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -418,6 +418,9 @@ $(OUTPUT)test-file-handle.bin:
$(OUTPUT)test-libpfm4.bin:
$(BUILD) -lpfm
+$(OUTPUT)test-bpftool-skeletons.bin:
+ $(SYSTEM_BPFTOOL) version | grep '^features:.*skeletons' \
+ > $(@:.bin=.make.output) 2>&1
###############################
clean:
diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint
index 279be06332be..e7da0909d097 100755
--- a/tools/debugging/kernel-chktaint
+++ b/tools/debugging/kernel-chktaint
@@ -204,6 +204,14 @@ else
echo " * an in-kernel test (such as a KUnit test) has been run (#18)"
fi
+T=`expr $T / 2`
+if [ `expr $T % 2` -eq 0 ]; then
+ addout " "
+else
+ addout "J"
+ echo " * fwctl's mutating debug interface was used (#19)"
+fi
+
echo "For a more detailed explanation of the various taint flags see"
echo " Documentation/admin-guide/tainted-kernels.rst in the Linux kernel sources"
echo " or https://kernel.org/doc/html/latest/admin-guide/tainted-kernels.html"
diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h
index 60044b608817..8de2914e6510 100644
--- a/tools/include/linux/bits.h
+++ b/tools/include/linux/bits.h
@@ -41,7 +41,7 @@
* Missing asm support
*
* __GENMASK_U128() depends on _BIT128() which would not work
- * in the asm code, as it shifts an 'unsigned __init128' data
+ * in the asm code, as it shifts an 'unsigned __int128' data
* type instead of direct representation of 128 bit constants
* such as long and unsigned long. The fundamental problem is
* that a 128 bit constant will get silently truncated by the
diff --git a/tools/include/linux/kasan-tags.h b/tools/include/linux/kasan-tags.h
new file mode 100644
index 000000000000..4f85f562512c
--- /dev/null
+++ b/tools/include/linux/kasan-tags.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_KASAN_TAGS_H
+#define _LINUX_KASAN_TAGS_H
+
+#define KASAN_TAG_KERNEL 0xFF /* native kernel pointers tag */
+#define KASAN_TAG_INVALID 0xFE /* inaccessible memory tag */
+#define KASAN_TAG_MAX 0xFD /* maximum value for random tags */
+
+#ifdef CONFIG_KASAN_HW_TAGS
+#define KASAN_TAG_MIN 0xF0 /* minimum value for random tags */
+#else
+#define KASAN_TAG_MIN 0x00 /* minimum value for random tags */
+#endif
+
+#endif /* LINUX_KASAN_TAGS_H */
diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
index a1f55fb24bb3..f9702877ac21 100644
--- a/tools/include/nolibc/Makefile
+++ b/tools/include/nolibc/Makefile
@@ -29,7 +29,9 @@ all_files := \
compiler.h \
crt.h \
ctype.h \
+ dirent.h \
errno.h \
+ limits.h \
nolibc.h \
signal.h \
stackprotector.h \
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
index 1791a8ce58da..753a8ed2cf69 100644
--- a/tools/include/nolibc/arch-mips.h
+++ b/tools/include/nolibc/arch-mips.h
@@ -179,6 +179,7 @@
})
/* startup code, note that it's called __start on MIPS */
+void __start(void);
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __start(void)
{
__asm__ volatile (
diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h
index f9ab83a219b8..df4c3cc713ac 100644
--- a/tools/include/nolibc/arch-s390.h
+++ b/tools/include/nolibc/arch-s390.h
@@ -5,8 +5,8 @@
#ifndef _NOLIBC_ARCH_S390_H
#define _NOLIBC_ARCH_S390_H
-#include <asm/signal.h>
-#include <asm/unistd.h>
+#include <linux/signal.h>
+#include <linux/unistd.h>
#include "compiler.h"
#include "crt.h"
@@ -143,8 +143,13 @@
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
__asm__ volatile (
+#ifdef __s390x__
"lgr %r2, %r15\n" /* save stack pointer to %r2, as arg1 of _start_c */
"aghi %r15, -160\n" /* allocate new stackframe */
+#else
+ "lr %r2, %r15\n"
+ "ahi %r15, -96\n"
+#endif
"xc 0(8,%r15), 0(%r15)\n" /* clear backchain */
"brasl %r14, _start_c\n" /* transfer to c runtime */
);
diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
index c8f4e5d3add9..8a2c143c0fba 100644
--- a/tools/include/nolibc/arch.h
+++ b/tools/include/nolibc/arch.h
@@ -29,7 +29,7 @@
#include "arch-powerpc.h"
#elif defined(__riscv)
#include "arch-riscv.h"
-#elif defined(__s390x__)
+#elif defined(__s390x__) || defined(__s390__)
#include "arch-s390.h"
#elif defined(__loongarch__)
#include "arch-loongarch.h"
diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h
index bbcd5fd09806..c4b10103bbec 100644
--- a/tools/include/nolibc/crt.h
+++ b/tools/include/nolibc/crt.h
@@ -10,6 +10,7 @@
char **environ __attribute__((weak));
const unsigned long *_auxv __attribute__((weak));
+void _start(void);
static void __stack_chk_init(void);
static void exit(int);
@@ -22,6 +23,7 @@ extern void (*const __init_array_end[])(int, char **, char**) __attribute__((wea
extern void (*const __fini_array_start[])(void) __attribute__((weak));
extern void (*const __fini_array_end[])(void) __attribute__((weak));
+void _start_c(long *sp);
__attribute__((weak,used))
void _start_c(long *sp)
{
diff --git a/tools/include/nolibc/dirent.h b/tools/include/nolibc/dirent.h
new file mode 100644
index 000000000000..c5c30d0dd680
--- /dev/null
+++ b/tools/include/nolibc/dirent.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Directory access for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+#ifndef _NOLIBC_DIRENT_H
+#define _NOLIBC_DIRENT_H
+
+#include "stdint.h"
+#include "types.h"
+
+#include <linux/limits.h>
+
+struct dirent {
+ ino_t d_ino;
+ char d_name[NAME_MAX + 1];
+};
+
+/* See comment of FILE in stdio.h */
+typedef struct {
+ char dummy[1];
+} DIR;
+
+static __attribute__((unused))
+DIR *fdopendir(int fd)
+{
+ if (fd < 0) {
+ SET_ERRNO(EBADF);
+ return NULL;
+ }
+ return (DIR *)(intptr_t)~fd;
+}
+
+static __attribute__((unused))
+DIR *opendir(const char *name)
+{
+ int fd;
+
+ fd = open(name, O_RDONLY);
+ if (fd == -1)
+ return NULL;
+ return fdopendir(fd);
+}
+
+static __attribute__((unused))
+int closedir(DIR *dirp)
+{
+ intptr_t i = (intptr_t)dirp;
+
+ if (i >= 0) {
+ SET_ERRNO(EBADF);
+ return -1;
+ }
+ return close(~i);
+}
+
+static __attribute__((unused))
+int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result)
+{
+ char buf[sizeof(struct linux_dirent64) + NAME_MAX + 1];
+ struct linux_dirent64 *ldir = (void *)buf;
+ intptr_t i = (intptr_t)dirp;
+ int fd, ret;
+
+ if (i >= 0)
+ return EBADF;
+
+ fd = ~i;
+
+ ret = sys_getdents64(fd, ldir, sizeof(buf));
+ if (ret < 0)
+ return -ret;
+ if (ret == 0) {
+ *result = NULL;
+ return 0;
+ }
+
+ /*
+ * getdents64() returns as many entries as fit the buffer.
+ * readdir() can only return one entry at a time.
+ * Make sure the non-returned ones are not skipped.
+ */
+ ret = lseek(fd, ldir->d_off, SEEK_SET);
+ if (ret == -1)
+ return errno;
+
+ entry->d_ino = ldir->d_ino;
+ /* the destination should always be big enough */
+ strlcpy(entry->d_name, ldir->d_name, sizeof(entry->d_name));
+ *result = entry;
+ return 0;
+}
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#endif /* _NOLIBC_DIRENT_H */
diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h
index a44486ff0477..1d8d8033e8ff 100644
--- a/tools/include/nolibc/errno.h
+++ b/tools/include/nolibc/errno.h
@@ -7,7 +7,7 @@
#ifndef _NOLIBC_ERRNO_H
#define _NOLIBC_ERRNO_H
-#include <asm/errno.h>
+#include <linux/errno.h>
#ifndef NOLIBC_IGNORE_ERRNO
#define SET_ERRNO(v) do { errno = (v); } while (0)
diff --git a/tools/include/nolibc/limits.h b/tools/include/nolibc/limits.h
new file mode 100644
index 000000000000..306d4141f4d2
--- /dev/null
+++ b/tools/include/nolibc/limits.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Shim limits.h header for NOLIBC.
+ * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de>
+ */
+
+#include "nolibc.h"
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index 92436b1e4441..70872401aca8 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -31,8 +31,7 @@
* - The third level is the libc call definition. It exposes the lower raw
* sys_<name>() calls in a way that looks like what a libc usually does,
* takes care of specific input values, and of setting errno upon error.
- * There can be minor variations compared to standard libc calls. For
- * example the open() call always takes 3 args here.
+ * There can be minor variations compared to standard libc calls.
*
* The errno variable is declared static and unused. This way it can be
* optimized away if not used. However this means that a program made of
@@ -105,6 +104,7 @@
#include "string.h"
#include "time.h"
#include "stackprotector.h"
+#include "dirent.h"
/* Used by programs to avoid std includes */
#define NOLIBC
diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h
index 137552216e46..cdcc5904c51e 100644
--- a/tools/include/nolibc/signal.h
+++ b/tools/include/nolibc/signal.h
@@ -13,6 +13,7 @@
#include "sys.h"
/* This one is not marked static as it's needed by libgcc for divide by zero */
+int raise(int signal);
__attribute__((weak,unused,section(".text.nolibc_raise")))
int raise(int signal)
{
diff --git a/tools/include/nolibc/stackprotector.h b/tools/include/nolibc/stackprotector.h
index 1d0d5259ec41..c71a2c257177 100644
--- a/tools/include/nolibc/stackprotector.h
+++ b/tools/include/nolibc/stackprotector.h
@@ -18,6 +18,7 @@
* triggering stack protector errors themselves
*/
+void __stack_chk_fail(void);
__attribute__((weak,used,noreturn,section(".text.nolibc_stack_chk")))
void __stack_chk_fail(void)
{
@@ -28,6 +29,7 @@ void __stack_chk_fail(void)
for (;;);
}
+void __stack_chk_fail_local(void);
__attribute__((weak,noreturn,section(".text.nolibc_stack_chk")))
void __stack_chk_fail_local(void)
{
diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
index 3892034198dd..a403351dbf60 100644
--- a/tools/include/nolibc/stdio.h
+++ b/tools/include/nolibc/stdio.h
@@ -350,6 +350,104 @@ int printf(const char *fmt, ...)
}
static __attribute__((unused))
+int vsscanf(const char *str, const char *format, va_list args)
+{
+ uintmax_t uval;
+ intmax_t ival;
+ int base;
+ char *endptr;
+ int matches;
+ int lpref;
+
+ matches = 0;
+
+ while (1) {
+ if (*format == '%') {
+ /* start of pattern */
+ lpref = 0;
+ format++;
+
+ if (*format == 'l') {
+ /* same as in printf() */
+ lpref = 1;
+ format++;
+ if (*format == 'l') {
+ lpref = 2;
+ format++;
+ }
+ }
+
+ if (*format == '%') {
+ /* literal % */
+ if ('%' != *str)
+ goto done;
+ str++;
+ format++;
+ continue;
+ } else if (*format == 'd') {
+ ival = strtoll(str, &endptr, 10);
+ if (lpref == 0)
+ *va_arg(args, int *) = ival;
+ else if (lpref == 1)
+ *va_arg(args, long *) = ival;
+ else if (lpref == 2)
+ *va_arg(args, long long *) = ival;
+ } else if (*format == 'u' || *format == 'x' || *format == 'X') {
+ base = *format == 'u' ? 10 : 16;
+ uval = strtoull(str, &endptr, base);
+ if (lpref == 0)
+ *va_arg(args, unsigned int *) = uval;
+ else if (lpref == 1)
+ *va_arg(args, unsigned long *) = uval;
+ else if (lpref == 2)
+ *va_arg(args, unsigned long long *) = uval;
+ } else if (*format == 'p') {
+ *va_arg(args, void **) = (void *)strtoul(str, &endptr, 16);
+ } else {
+ SET_ERRNO(EILSEQ);
+ goto done;
+ }
+
+ format++;
+ str = endptr;
+ matches++;
+
+ } else if (*format == '\0') {
+ goto done;
+ } else if (isspace(*format)) {
+ /* skip spaces in format and str */
+ while (isspace(*format))
+ format++;
+ while (isspace(*str))
+ str++;
+ } else if (*format == *str) {
+ /* literal match */
+ format++;
+ str++;
+ } else {
+ if (!matches)
+ matches = EOF;
+ goto done;
+ }
+ }
+
+done:
+ return matches;
+}
+
+static __attribute__((unused, format(scanf, 2, 3)))
+int sscanf(const char *str, const char *format, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, format);
+ ret = vsscanf(str, format, args);
+ va_end(args);
+ return ret;
+}
+
+static __attribute__((unused))
void perror(const char *msg)
{
fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno);
diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
index 75aa273c23a6..86ad378ab1ea 100644
--- a/tools/include/nolibc/stdlib.h
+++ b/tools/include/nolibc/stdlib.h
@@ -30,6 +30,7 @@ static __attribute__((unused)) char itoa_buffer[21];
*/
/* must be exported, as it's used by libgcc for various divide functions */
+void abort(void);
__attribute__((weak,unused,noreturn,section(".text.nolibc_abort")))
void abort(void)
{
diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h
index 9ec9c24f38c0..ba84ab700e30 100644
--- a/tools/include/nolibc/string.h
+++ b/tools/include/nolibc/string.h
@@ -32,6 +32,7 @@ int memcmp(const void *s1, const void *s2, size_t n)
/* might be ignored by the compiler without -ffreestanding, then found as
* missing.
*/
+void *memmove(void *dst, const void *src, size_t len);
__attribute__((weak,unused,section(".text.nolibc_memmove")))
void *memmove(void *dst, const void *src, size_t len)
{
@@ -56,6 +57,7 @@ void *memmove(void *dst, const void *src, size_t len)
#ifndef NOLIBC_ARCH_HAS_MEMCPY
/* must be exported, as it's used by libgcc on ARM */
+void *memcpy(void *dst, const void *src, size_t len);
__attribute__((weak,unused,section(".text.nolibc_memcpy")))
void *memcpy(void *dst, const void *src, size_t len)
{
@@ -73,6 +75,7 @@ void *memcpy(void *dst, const void *src, size_t len)
/* might be ignored by the compiler without -ffreestanding, then found as
* missing.
*/
+void *memset(void *dst, int b, size_t len);
__attribute__((weak,unused,section(".text.nolibc_memset")))
void *memset(void *dst, int b, size_t len)
{
@@ -124,6 +127,7 @@ char *strcpy(char *dst, const char *src)
* thus itself, hence the asm() statement below that's meant to disable this
* confusing practice.
*/
+size_t strlen(const char *str);
__attribute__((weak,unused,section(".text.nolibc_strlen")))
size_t strlen(const char *str)
{
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index d4a5c2399a66..08c1c074bec8 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -10,10 +10,10 @@
#include "std.h"
/* system includes */
-#include <asm/unistd.h>
-#include <asm/signal.h> /* for SIGCHLD */
-#include <asm/ioctls.h>
-#include <asm/mman.h>
+#include <linux/unistd.h>
+#include <linux/signal.h> /* for SIGCHLD */
+#include <linux/termios.h>
+#include <linux/mman.h>
#include <linux/fs.h>
#include <linux/loop.h>
#include <linux/time.h>
@@ -23,7 +23,6 @@
#include <linux/prctl.h>
#include <linux/resource.h>
#include <linux/utsname.h>
-#include <linux/signal.h>
#include "arch.h"
#include "errno.h"
@@ -532,20 +531,16 @@ uid_t getuid(void)
/*
- * int ioctl(int fd, unsigned long req, void *value);
+ * int ioctl(int fd, unsigned long cmd, ... arg);
*/
static __attribute__((unused))
-int sys_ioctl(int fd, unsigned long req, void *value)
+long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
{
- return my_syscall3(__NR_ioctl, fd, req, value);
+ return my_syscall3(__NR_ioctl, fd, cmd, arg);
}
-static __attribute__((unused))
-int ioctl(int fd, unsigned long req, void *value)
-{
- return __sysret(sys_ioctl(fd, req, value));
-}
+#define ioctl(fd, cmd, arg) __sysret(sys_ioctl(fd, cmd, (unsigned long)(arg)))
/*
* int kill(pid_t pid, int signal);
@@ -602,9 +597,36 @@ off_t sys_lseek(int fd, off_t offset, int whence)
}
static __attribute__((unused))
+int sys_llseek(int fd, unsigned long offset_high, unsigned long offset_low,
+ __kernel_loff_t *result, int whence)
+{
+#ifdef __NR_llseek
+ return my_syscall5(__NR_llseek, fd, offset_high, offset_low, result, whence);
+#else
+ return __nolibc_enosys(__func__, fd, offset_high, offset_low, result, whence);
+#endif
+}
+
+static __attribute__((unused))
off_t lseek(int fd, off_t offset, int whence)
{
- return __sysret(sys_lseek(fd, offset, whence));
+ __kernel_loff_t loff = 0;
+ off_t result;
+ int ret;
+
+ result = sys_lseek(fd, offset, whence);
+ if (result == -ENOSYS) {
+ /* Only exists on 32bit where nolibc off_t is also 32bit */
+ ret = sys_llseek(fd, 0, offset, &loff, whence);
+ if (ret < 0)
+ result = ret;
+ else if (loff != (off_t)loff)
+ result = -EOVERFLOW;
+ else
+ result = loff;
+ }
+
+ return __sysret(result);
}
@@ -742,6 +764,31 @@ int mount(const char *src, const char *tgt,
return __sysret(sys_mount(src, tgt, fst, flags, data));
}
+/*
+ * int openat(int dirfd, const char *path, int flags[, mode_t mode]);
+ */
+
+static __attribute__((unused))
+int sys_openat(int dirfd, const char *path, int flags, mode_t mode)
+{
+ return my_syscall4(__NR_openat, dirfd, path, flags, mode);
+}
+
+static __attribute__((unused))
+int openat(int dirfd, const char *path, int flags, ...)
+{
+ mode_t mode = 0;
+
+ if (flags & O_CREAT) {
+ va_list args;
+
+ va_start(args, flags);
+ mode = va_arg(args, mode_t);
+ va_end(args);
+ }
+
+ return __sysret(sys_openat(dirfd, path, flags, mode));
+}
/*
* int open(const char *path, int flags[, mode_t mode]);
@@ -750,13 +797,7 @@ int mount(const char *src, const char *tgt,
static __attribute__((unused))
int sys_open(const char *path, int flags, mode_t mode)
{
-#ifdef __NR_openat
return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
-#elif defined(__NR_open)
- return my_syscall3(__NR_open, path, flags, mode);
-#else
- return __nolibc_enosys(__func__, path, flags, mode);
-#endif
}
static __attribute__((unused))
@@ -768,7 +809,7 @@ int open(const char *path, int flags, ...)
va_list args;
va_start(args, flags);
- mode = va_arg(args, int);
+ mode = va_arg(args, mode_t);
va_end(args);
}
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
index ffff554a5230..aa5016ff3d91 100644
--- a/tools/include/uapi/asm-generic/socket.h
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -119,14 +119,31 @@
#define SO_DETACH_REUSEPORT_BPF 68
+#define SO_PREFER_BUSY_POLL 69
+#define SO_BUSY_POLL_BUDGET 70
+
+#define SO_NETNS_COOKIE 71
+
+#define SO_BUF_LOCK 72
+
+#define SO_RESERVE_MEM 73
+
+#define SO_TXREHASH 74
+
#define SO_RCVMARK 75
#define SO_PASSPIDFD 76
#define SO_PEERPIDFD 77
-#define SCM_TS_OPT_ID 78
+#define SO_DEVMEM_LINEAR 78
+#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR
+#define SO_DEVMEM_DMABUF 79
+#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF
+#define SO_DEVMEM_DONTNEED 80
+
+#define SCM_TS_OPT_ID 81
-#define SO_RCVPRIORITY 79
+#define SO_RCVPRIORITY 82
#if !defined(__KERNEL__)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2acf9b336371..28705ae67784 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -51,6 +51,9 @@
#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
+#define BPF_LOAD_ACQ 0x100 /* load-acquire */
+#define BPF_STORE_REL 0x110 /* store-release */
+
enum bpf_cond_pseudo_jmp {
BPF_MAY_GOTO = 0,
};
@@ -1207,6 +1210,7 @@ enum bpf_perf_event_type {
#define BPF_F_BEFORE (1U << 3)
#define BPF_F_AFTER (1U << 4)
#define BPF_F_ID (1U << 5)
+#define BPF_F_PREORDER (1U << 6)
#define BPF_F_LINK BPF_F_LINK /* 1 << 13 */
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
@@ -1648,6 +1652,7 @@ union bpf_attr {
};
__u32 next_id;
__u32 open_flags;
+ __s32 fd_by_id_token_fd;
};
struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
@@ -6019,7 +6024,10 @@ union bpf_attr {
FN(user_ringbuf_drain, 209, ##ctx) \
FN(cgrp_storage_get, 210, ##ctx) \
FN(cgrp_storage_delete, 211, ##ctx) \
- /* */
+ /* This helper list is effectively frozen. If you are trying to \
+ * add a new helper, you should add a kfunc instead which has \
+ * less stability guarantees. See Documentation/bpf/kfuncs.rst \
+ */
/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
* know or care about integer value that is now passed as second argument
@@ -6913,6 +6921,12 @@ enum {
BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F,
};
+enum {
+ SK_BPF_CB_TX_TIMESTAMPING = 1<<0,
+ SK_BPF_CB_MASK = (SK_BPF_CB_TX_TIMESTAMPING - 1) |
+ SK_BPF_CB_TX_TIMESTAMPING
+};
+
/* List of known BPF sock_ops operators.
* New entries can only be added at the end
*/
@@ -7025,6 +7039,29 @@ enum {
* by the kernel or the
* earlier bpf-progs.
*/
+ BPF_SOCK_OPS_TSTAMP_SCHED_CB, /* Called when skb is passing
+ * through dev layer when
+ * SK_BPF_CB_TX_TIMESTAMPING
+ * feature is on.
+ */
+ BPF_SOCK_OPS_TSTAMP_SND_SW_CB, /* Called when skb is about to send
+ * to the nic when SK_BPF_CB_TX_TIMESTAMPING
+ * feature is on.
+ */
+ BPF_SOCK_OPS_TSTAMP_SND_HW_CB, /* Called in hardware phase when
+ * SK_BPF_CB_TX_TIMESTAMPING feature
+ * is on.
+ */
+ BPF_SOCK_OPS_TSTAMP_ACK_CB, /* Called when all the skbs in the
+ * same sendmsg call are acked
+ * when SK_BPF_CB_TX_TIMESTAMPING
+ * feature is on.
+ */
+ BPF_SOCK_OPS_TSTAMP_SENDMSG_CB, /* Called when every sendmsg syscall
+ * is triggered. It's used to correlate
+ * sendmsg timestamp with corresponding
+ * tskey.
+ */
};
/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
@@ -7091,6 +7128,7 @@ enum {
TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */
TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
+ SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */
};
enum {
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index ec1798b6d3ff..266d4ffa6c07 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -36,7 +36,8 @@ struct btf_type {
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bit 31: kind_flag, currently used by
- * struct, union, enum, fwd and enum64
+ * struct, union, enum, fwd, enum64,
+ * decl_tag and type_tag
*/
__u32 info;
/* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64.
diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h
index e16be0d37746..b8f629ef135f 100644
--- a/tools/include/uapi/linux/const.h
+++ b/tools/include/uapi/linux/const.h
@@ -33,7 +33,7 @@
* Missing asm support
*
* __BIT128() would not work in the asm code, as it shifts an
- * 'unsigned __init128' data type as direct representation of
+ * 'unsigned __int128' data type as direct representation of
* 128 bit constants is not supported in the gcc compiler, as
* they get silently truncated.
*
diff --git a/tools/include/uapi/linux/elf.h b/tools/include/uapi/linux/elf.h
new file mode 100644
index 000000000000..5834b83d7f9a
--- /dev/null
+++ b/tools/include/uapi/linux/elf.h
@@ -0,0 +1,524 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_ELF_H
+#define _LINUX_ELF_H
+
+#include <linux/types.h>
+#include <linux/elf-em.h>
+
+/* 32-bit ELF base types. */
+typedef __u32 Elf32_Addr;
+typedef __u16 Elf32_Half;
+typedef __u32 Elf32_Off;
+typedef __s32 Elf32_Sword;
+typedef __u32 Elf32_Word;
+typedef __u16 Elf32_Versym;
+
+/* 64-bit ELF base types. */
+typedef __u64 Elf64_Addr;
+typedef __u16 Elf64_Half;
+typedef __s16 Elf64_SHalf;
+typedef __u64 Elf64_Off;
+typedef __s32 Elf64_Sword;
+typedef __u32 Elf64_Word;
+typedef __u64 Elf64_Xword;
+typedef __s64 Elf64_Sxword;
+typedef __u16 Elf64_Versym;
+
+/* These constants are for the segment types stored in the image headers */
+#define PT_NULL 0
+#define PT_LOAD 1
+#define PT_DYNAMIC 2
+#define PT_INTERP 3
+#define PT_NOTE 4
+#define PT_SHLIB 5
+#define PT_PHDR 6
+#define PT_TLS 7 /* Thread local storage segment */
+#define PT_LOOS 0x60000000 /* OS-specific */
+#define PT_HIOS 0x6fffffff /* OS-specific */
+#define PT_LOPROC 0x70000000
+#define PT_HIPROC 0x7fffffff
+#define PT_GNU_EH_FRAME (PT_LOOS + 0x474e550)
+#define PT_GNU_STACK (PT_LOOS + 0x474e551)
+#define PT_GNU_RELRO (PT_LOOS + 0x474e552)
+#define PT_GNU_PROPERTY (PT_LOOS + 0x474e553)
+
+
+/* ARM MTE memory tag segment type */
+#define PT_AARCH64_MEMTAG_MTE (PT_LOPROC + 0x2)
+
+/*
+ * Extended Numbering
+ *
+ * If the real number of program header table entries is larger than
+ * or equal to PN_XNUM(0xffff), it is set to sh_info field of the
+ * section header at index 0, and PN_XNUM is set to e_phnum
+ * field. Otherwise, the section header at index 0 is zero
+ * initialized, if it exists.
+ *
+ * Specifications are available in:
+ *
+ * - Oracle: Linker and Libraries.
+ * Part No: 817–1984–19, August 2011.
+ * https://docs.oracle.com/cd/E18752_01/pdf/817-1984.pdf
+ *
+ * - System V ABI AMD64 Architecture Processor Supplement
+ * Draft Version 0.99.4,
+ * January 13, 2010.
+ * http://www.cs.washington.edu/education/courses/cse351/12wi/supp-docs/abi.pdf
+ */
+#define PN_XNUM 0xffff
+
+/* These constants define the different elf file types */
+#define ET_NONE 0
+#define ET_REL 1
+#define ET_EXEC 2
+#define ET_DYN 3
+#define ET_CORE 4
+#define ET_LOPROC 0xff00
+#define ET_HIPROC 0xffff
+
+/* This is the info that is needed to parse the dynamic section of the file */
+#define DT_NULL 0
+#define DT_NEEDED 1
+#define DT_PLTRELSZ 2
+#define DT_PLTGOT 3
+#define DT_HASH 4
+#define DT_STRTAB 5
+#define DT_SYMTAB 6
+#define DT_RELA 7
+#define DT_RELASZ 8
+#define DT_RELAENT 9
+#define DT_STRSZ 10
+#define DT_SYMENT 11
+#define DT_INIT 12
+#define DT_FINI 13
+#define DT_SONAME 14
+#define DT_RPATH 15
+#define DT_SYMBOLIC 16
+#define DT_REL 17
+#define DT_RELSZ 18
+#define DT_RELENT 19
+#define DT_PLTREL 20
+#define DT_DEBUG 21
+#define DT_TEXTREL 22
+#define DT_JMPREL 23
+#define DT_ENCODING 32
+#define OLD_DT_LOOS 0x60000000
+#define DT_LOOS 0x6000000d
+#define DT_HIOS 0x6ffff000
+#define DT_VALRNGLO 0x6ffffd00
+#define DT_VALRNGHI 0x6ffffdff
+#define DT_ADDRRNGLO 0x6ffffe00
+#define DT_GNU_HASH 0x6ffffef5
+#define DT_ADDRRNGHI 0x6ffffeff
+#define DT_VERSYM 0x6ffffff0
+#define DT_RELACOUNT 0x6ffffff9
+#define DT_RELCOUNT 0x6ffffffa
+#define DT_FLAGS_1 0x6ffffffb
+#define DT_VERDEF 0x6ffffffc
+#define DT_VERDEFNUM 0x6ffffffd
+#define DT_VERNEED 0x6ffffffe
+#define DT_VERNEEDNUM 0x6fffffff
+#define OLD_DT_HIOS 0x6fffffff
+#define DT_LOPROC 0x70000000
+#define DT_HIPROC 0x7fffffff
+
+/* This info is needed when parsing the symbol table */
+#define STB_LOCAL 0
+#define STB_GLOBAL 1
+#define STB_WEAK 2
+
+#define STN_UNDEF 0
+
+#define STT_NOTYPE 0
+#define STT_OBJECT 1
+#define STT_FUNC 2
+#define STT_SECTION 3
+#define STT_FILE 4
+#define STT_COMMON 5
+#define STT_TLS 6
+
+#define VER_FLG_BASE 0x1
+#define VER_FLG_WEAK 0x2
+
+#define ELF_ST_BIND(x) ((x) >> 4)
+#define ELF_ST_TYPE(x) ((x) & 0xf)
+#define ELF32_ST_BIND(x) ELF_ST_BIND(x)
+#define ELF32_ST_TYPE(x) ELF_ST_TYPE(x)
+#define ELF64_ST_BIND(x) ELF_ST_BIND(x)
+#define ELF64_ST_TYPE(x) ELF_ST_TYPE(x)
+
+typedef struct {
+ Elf32_Sword d_tag;
+ union {
+ Elf32_Sword d_val;
+ Elf32_Addr d_ptr;
+ } d_un;
+} Elf32_Dyn;
+
+typedef struct {
+ Elf64_Sxword d_tag; /* entry tag value */
+ union {
+ Elf64_Xword d_val;
+ Elf64_Addr d_ptr;
+ } d_un;
+} Elf64_Dyn;
+
+/* The following are used with relocations */
+#define ELF32_R_SYM(x) ((x) >> 8)
+#define ELF32_R_TYPE(x) ((x) & 0xff)
+
+#define ELF64_R_SYM(i) ((i) >> 32)
+#define ELF64_R_TYPE(i) ((i) & 0xffffffff)
+
+typedef struct elf32_rel {
+ Elf32_Addr r_offset;
+ Elf32_Word r_info;
+} Elf32_Rel;
+
+typedef struct elf64_rel {
+ Elf64_Addr r_offset; /* Location at which to apply the action */
+ Elf64_Xword r_info; /* index and type of relocation */
+} Elf64_Rel;
+
+typedef struct elf32_rela {
+ Elf32_Addr r_offset;
+ Elf32_Word r_info;
+ Elf32_Sword r_addend;
+} Elf32_Rela;
+
+typedef struct elf64_rela {
+ Elf64_Addr r_offset; /* Location at which to apply the action */
+ Elf64_Xword r_info; /* index and type of relocation */
+ Elf64_Sxword r_addend; /* Constant addend used to compute value */
+} Elf64_Rela;
+
+typedef struct elf32_sym {
+ Elf32_Word st_name;
+ Elf32_Addr st_value;
+ Elf32_Word st_size;
+ unsigned char st_info;
+ unsigned char st_other;
+ Elf32_Half st_shndx;
+} Elf32_Sym;
+
+typedef struct elf64_sym {
+ Elf64_Word st_name; /* Symbol name, index in string tbl */
+ unsigned char st_info; /* Type and binding attributes */
+ unsigned char st_other; /* No defined meaning, 0 */
+ Elf64_Half st_shndx; /* Associated section index */
+ Elf64_Addr st_value; /* Value of the symbol */
+ Elf64_Xword st_size; /* Associated symbol size */
+} Elf64_Sym;
+
+
+#define EI_NIDENT 16
+
+typedef struct elf32_hdr {
+ unsigned char e_ident[EI_NIDENT];
+ Elf32_Half e_type;
+ Elf32_Half e_machine;
+ Elf32_Word e_version;
+ Elf32_Addr e_entry; /* Entry point */
+ Elf32_Off e_phoff;
+ Elf32_Off e_shoff;
+ Elf32_Word e_flags;
+ Elf32_Half e_ehsize;
+ Elf32_Half e_phentsize;
+ Elf32_Half e_phnum;
+ Elf32_Half e_shentsize;
+ Elf32_Half e_shnum;
+ Elf32_Half e_shstrndx;
+} Elf32_Ehdr;
+
+typedef struct elf64_hdr {
+ unsigned char e_ident[EI_NIDENT]; /* ELF "magic number" */
+ Elf64_Half e_type;
+ Elf64_Half e_machine;
+ Elf64_Word e_version;
+ Elf64_Addr e_entry; /* Entry point virtual address */
+ Elf64_Off e_phoff; /* Program header table file offset */
+ Elf64_Off e_shoff; /* Section header table file offset */
+ Elf64_Word e_flags;
+ Elf64_Half e_ehsize;
+ Elf64_Half e_phentsize;
+ Elf64_Half e_phnum;
+ Elf64_Half e_shentsize;
+ Elf64_Half e_shnum;
+ Elf64_Half e_shstrndx;
+} Elf64_Ehdr;
+
+/* These constants define the permissions on sections in the program
+ header, p_flags. */
+#define PF_R 0x4
+#define PF_W 0x2
+#define PF_X 0x1
+
+typedef struct elf32_phdr {
+ Elf32_Word p_type;
+ Elf32_Off p_offset;
+ Elf32_Addr p_vaddr;
+ Elf32_Addr p_paddr;
+ Elf32_Word p_filesz;
+ Elf32_Word p_memsz;
+ Elf32_Word p_flags;
+ Elf32_Word p_align;
+} Elf32_Phdr;
+
+typedef struct elf64_phdr {
+ Elf64_Word p_type;
+ Elf64_Word p_flags;
+ Elf64_Off p_offset; /* Segment file offset */
+ Elf64_Addr p_vaddr; /* Segment virtual address */
+ Elf64_Addr p_paddr; /* Segment physical address */
+ Elf64_Xword p_filesz; /* Segment size in file */
+ Elf64_Xword p_memsz; /* Segment size in memory */
+ Elf64_Xword p_align; /* Segment alignment, file & memory */
+} Elf64_Phdr;
+
+/* sh_type */
+#define SHT_NULL 0
+#define SHT_PROGBITS 1
+#define SHT_SYMTAB 2
+#define SHT_STRTAB 3
+#define SHT_RELA 4
+#define SHT_HASH 5
+#define SHT_DYNAMIC 6
+#define SHT_NOTE 7
+#define SHT_NOBITS 8
+#define SHT_REL 9
+#define SHT_SHLIB 10
+#define SHT_DYNSYM 11
+#define SHT_NUM 12
+#define SHT_LOPROC 0x70000000
+#define SHT_HIPROC 0x7fffffff
+#define SHT_LOUSER 0x80000000
+#define SHT_HIUSER 0xffffffff
+
+/* sh_flags */
+#define SHF_WRITE 0x1
+#define SHF_ALLOC 0x2
+#define SHF_EXECINSTR 0x4
+#define SHF_RELA_LIVEPATCH 0x00100000
+#define SHF_RO_AFTER_INIT 0x00200000
+#define SHF_MASKPROC 0xf0000000
+
+/* special section indexes */
+#define SHN_UNDEF 0
+#define SHN_LORESERVE 0xff00
+#define SHN_LOPROC 0xff00
+#define SHN_HIPROC 0xff1f
+#define SHN_LIVEPATCH 0xff20
+#define SHN_ABS 0xfff1
+#define SHN_COMMON 0xfff2
+#define SHN_HIRESERVE 0xffff
+
+typedef struct elf32_shdr {
+ Elf32_Word sh_name;
+ Elf32_Word sh_type;
+ Elf32_Word sh_flags;
+ Elf32_Addr sh_addr;
+ Elf32_Off sh_offset;
+ Elf32_Word sh_size;
+ Elf32_Word sh_link;
+ Elf32_Word sh_info;
+ Elf32_Word sh_addralign;
+ Elf32_Word sh_entsize;
+} Elf32_Shdr;
+
+typedef struct elf64_shdr {
+ Elf64_Word sh_name; /* Section name, index in string tbl */
+ Elf64_Word sh_type; /* Type of section */
+ Elf64_Xword sh_flags; /* Miscellaneous section attributes */
+ Elf64_Addr sh_addr; /* Section virtual addr at execution */
+ Elf64_Off sh_offset; /* Section file offset */
+ Elf64_Xword sh_size; /* Size of section in bytes */
+ Elf64_Word sh_link; /* Index of another section */
+ Elf64_Word sh_info; /* Additional section information */
+ Elf64_Xword sh_addralign; /* Section alignment */
+ Elf64_Xword sh_entsize; /* Entry size if section holds table */
+} Elf64_Shdr;
+
+#define EI_MAG0 0 /* e_ident[] indexes */
+#define EI_MAG1 1
+#define EI_MAG2 2
+#define EI_MAG3 3
+#define EI_CLASS 4
+#define EI_DATA 5
+#define EI_VERSION 6
+#define EI_OSABI 7
+#define EI_PAD 8
+
+#define ELFMAG0 0x7f /* EI_MAG */
+#define ELFMAG1 'E'
+#define ELFMAG2 'L'
+#define ELFMAG3 'F'
+#define ELFMAG "\177ELF"
+#define SELFMAG 4
+
+#define ELFCLASSNONE 0 /* EI_CLASS */
+#define ELFCLASS32 1
+#define ELFCLASS64 2
+#define ELFCLASSNUM 3
+
+#define ELFDATANONE 0 /* e_ident[EI_DATA] */
+#define ELFDATA2LSB 1
+#define ELFDATA2MSB 2
+
+#define EV_NONE 0 /* e_version, EI_VERSION */
+#define EV_CURRENT 1
+#define EV_NUM 2
+
+#define ELFOSABI_NONE 0
+#define ELFOSABI_LINUX 3
+
+#ifndef ELF_OSABI
+#define ELF_OSABI ELFOSABI_NONE
+#endif
+
+/*
+ * Notes used in ET_CORE. Architectures export some of the arch register sets
+ * using the corresponding note types via the PTRACE_GETREGSET and
+ * PTRACE_SETREGSET requests.
+ * The note name for these types is "LINUX", except NT_PRFPREG that is named
+ * "CORE".
+ */
+#define NT_PRSTATUS 1
+#define NT_PRFPREG 2
+#define NT_PRPSINFO 3
+#define NT_TASKSTRUCT 4
+#define NT_AUXV 6
+/*
+ * Note to userspace developers: size of NT_SIGINFO note may increase
+ * in the future to accomodate more fields, don't assume it is fixed!
+ */
+#define NT_SIGINFO 0x53494749
+#define NT_FILE 0x46494c45
+#define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */
+#define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */
+#define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */
+#define NT_PPC_VSX 0x102 /* PowerPC VSX registers */
+#define NT_PPC_TAR 0x103 /* Target Address Register */
+#define NT_PPC_PPR 0x104 /* Program Priority Register */
+#define NT_PPC_DSCR 0x105 /* Data Stream Control Register */
+#define NT_PPC_EBB 0x106 /* Event Based Branch Registers */
+#define NT_PPC_PMU 0x107 /* Performance Monitor Registers */
+#define NT_PPC_TM_CGPR 0x108 /* TM checkpointed GPR Registers */
+#define NT_PPC_TM_CFPR 0x109 /* TM checkpointed FPR Registers */
+#define NT_PPC_TM_CVMX 0x10a /* TM checkpointed VMX Registers */
+#define NT_PPC_TM_CVSX 0x10b /* TM checkpointed VSX Registers */
+#define NT_PPC_TM_SPR 0x10c /* TM Special Purpose Registers */
+#define NT_PPC_TM_CTAR 0x10d /* TM checkpointed Target Address Register */
+#define NT_PPC_TM_CPPR 0x10e /* TM checkpointed Program Priority Register */
+#define NT_PPC_TM_CDSCR 0x10f /* TM checkpointed Data Stream Control Register */
+#define NT_PPC_PKEY 0x110 /* Memory Protection Keys registers */
+#define NT_PPC_DEXCR 0x111 /* PowerPC DEXCR registers */
+#define NT_PPC_HASHKEYR 0x112 /* PowerPC HASHKEYR register */
+#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */
+#define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */
+#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */
+/* Old binutils treats 0x203 as a CET state */
+#define NT_X86_SHSTK 0x204 /* x86 SHSTK state */
+#define NT_X86_XSAVE_LAYOUT 0x205 /* XSAVE layout description */
+#define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */
+#define NT_S390_TIMER 0x301 /* s390 timer register */
+#define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */
+#define NT_S390_TODPREG 0x303 /* s390 TOD programmable register */
+#define NT_S390_CTRS 0x304 /* s390 control registers */
+#define NT_S390_PREFIX 0x305 /* s390 prefix register */
+#define NT_S390_LAST_BREAK 0x306 /* s390 breaking event address */
+#define NT_S390_SYSTEM_CALL 0x307 /* s390 system call restart data */
+#define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */
+#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */
+#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */
+#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */
+#define NT_S390_GS_BC 0x30c /* s390 guarded storage broadcast control block */
+#define NT_S390_RI_CB 0x30d /* s390 runtime instrumentation */
+#define NT_S390_PV_CPU_DATA 0x30e /* s390 protvirt cpu dump data */
+#define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */
+#define NT_ARM_TLS 0x401 /* ARM TLS register */
+#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */
+#define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */
+#define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */
+#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */
+#define NT_ARM_PAC_MASK 0x406 /* ARM pointer authentication code masks */
+#define NT_ARM_PACA_KEYS 0x407 /* ARM pointer authentication address keys */
+#define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */
+#define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */
+#define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */
+#define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */
+#define NT_ARM_ZA 0x40c /* ARM SME ZA registers */
+#define NT_ARM_ZT 0x40d /* ARM SME ZT registers */
+#define NT_ARM_FPMR 0x40e /* ARM floating point mode register */
+#define NT_ARM_POE 0x40f /* ARM POE registers */
+#define NT_ARM_GCS 0x410 /* ARM GCS state */
+#define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */
+#define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */
+#define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */
+#define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */
+#define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */
+#define NT_RISCV_CSR 0x900 /* RISC-V Control and Status Registers */
+#define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */
+#define NT_RISCV_TAGGED_ADDR_CTRL 0x902 /* RISC-V tagged address control (prctl()) */
+#define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers */
+#define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and status registers */
+#define NT_LOONGARCH_LSX 0xa02 /* LoongArch Loongson SIMD Extension registers */
+#define NT_LOONGARCH_LASX 0xa03 /* LoongArch Loongson Advanced SIMD Extension registers */
+#define NT_LOONGARCH_LBT 0xa04 /* LoongArch Loongson Binary Translation registers */
+#define NT_LOONGARCH_HW_BREAK 0xa05 /* LoongArch hardware breakpoint registers */
+#define NT_LOONGARCH_HW_WATCH 0xa06 /* LoongArch hardware watchpoint registers */
+
+/* Note types with note name "GNU" */
+#define NT_GNU_PROPERTY_TYPE_0 5
+
+/* Note header in a PT_NOTE section */
+typedef struct elf32_note {
+ Elf32_Word n_namesz; /* Name size */
+ Elf32_Word n_descsz; /* Content size */
+ Elf32_Word n_type; /* Content type */
+} Elf32_Nhdr;
+
+/* Note header in a PT_NOTE section */
+typedef struct elf64_note {
+ Elf64_Word n_namesz; /* Name size */
+ Elf64_Word n_descsz; /* Content size */
+ Elf64_Word n_type; /* Content type */
+} Elf64_Nhdr;
+
+/* .note.gnu.property types for EM_AARCH64: */
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
+
+/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
+
+typedef struct {
+ Elf32_Half vd_version;
+ Elf32_Half vd_flags;
+ Elf32_Half vd_ndx;
+ Elf32_Half vd_cnt;
+ Elf32_Word vd_hash;
+ Elf32_Word vd_aux;
+ Elf32_Word vd_next;
+} Elf32_Verdef;
+
+typedef struct {
+ Elf64_Half vd_version;
+ Elf64_Half vd_flags;
+ Elf64_Half vd_ndx;
+ Elf64_Half vd_cnt;
+ Elf64_Word vd_hash;
+ Elf64_Word vd_aux;
+ Elf64_Word vd_next;
+} Elf64_Verdef;
+
+typedef struct {
+ Elf32_Word vda_name;
+ Elf32_Word vda_next;
+} Elf32_Verdaux;
+
+typedef struct {
+ Elf64_Word vda_name;
+ Elf64_Word vda_next;
+} Elf64_Verdaux;
+
+#endif /* _LINUX_ELF_H */
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index 42ec5ddaab8d..42869770776e 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -127,6 +127,12 @@ struct xdp_options {
*/
#define XDP_TXMD_FLAGS_CHECKSUM (1 << 1)
+/* Request launch time hardware offload. The device will schedule the packet for
+ * transmission at a pre-determined time called launch time. The value of
+ * launch time is communicated via launch_time field of struct xsk_tx_metadata.
+ */
+#define XDP_TXMD_FLAGS_LAUNCH_TIME (1 << 2)
+
/* AF_XDP offloads request. 'request' union member is consumed by the driver
* when the packet is being transmitted. 'completion' union member is
* filled by the driver when the transmit completion arrives.
@@ -142,6 +148,10 @@ struct xsk_tx_metadata {
__u16 csum_start;
/* Offset from csum_start where checksum should be stored. */
__u16 csum_offset;
+
+ /* XDP_TXMD_FLAGS_LAUNCH_TIME */
+ /* Launch time in nanosecond against the PTP HW Clock */
+ __u64 launch_time;
} request;
struct {
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index e4be227d3ad6..7600bf62dbdf 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -59,10 +59,13 @@ enum netdev_xdp_rx_metadata {
* by the driver.
* @NETDEV_XSK_FLAGS_TX_CHECKSUM: L3 checksum HW offload is supported by the
* driver.
+ * @NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO: Launch time HW offload is supported
+ * by the driver.
*/
enum netdev_xsk_flags {
NETDEV_XSK_FLAGS_TX_TIMESTAMP = 1,
NETDEV_XSK_FLAGS_TX_CHECKSUM = 2,
+ NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO = 4,
};
enum netdev_queue_type {
@@ -87,6 +90,11 @@ enum {
};
enum {
+ __NETDEV_A_IO_URING_PROVIDER_INFO_MAX,
+ NETDEV_A_IO_URING_PROVIDER_INFO_MAX = (__NETDEV_A_IO_URING_PROVIDER_INFO_MAX - 1)
+};
+
+enum {
NETDEV_A_PAGE_POOL_ID = 1,
NETDEV_A_PAGE_POOL_IFINDEX,
NETDEV_A_PAGE_POOL_NAPI_ID,
@@ -94,6 +102,7 @@ enum {
NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
NETDEV_A_PAGE_POOL_DETACH_TIME,
NETDEV_A_PAGE_POOL_DMABUF,
+ NETDEV_A_PAGE_POOL_IO_URING,
__NETDEV_A_PAGE_POOL_MAX,
NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1)
@@ -131,11 +140,18 @@ enum {
};
enum {
+ __NETDEV_A_XSK_INFO_MAX,
+ NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1)
+};
+
+enum {
NETDEV_A_QUEUE_ID = 1,
NETDEV_A_QUEUE_IFINDEX,
NETDEV_A_QUEUE_TYPE,
NETDEV_A_QUEUE_NAPI_ID,
NETDEV_A_QUEUE_DMABUF,
+ NETDEV_A_QUEUE_IO_URING,
+ NETDEV_A_QUEUE_XSK,
__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 857a5f7b413d..168140f8e646 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -53,13 +53,6 @@ include $(srctree)/tools/scripts/Makefile.include
# copy a bit from Linux kbuild
-ifeq ("$(origin V)", "command line")
- VERBOSE = $(V)
-endif
-ifndef VERBOSE
- VERBOSE = 0
-endif
-
INCLUDES = -I$(or $(OUTPUT),.) \
-I$(srctree)/tools/include -I$(srctree)/tools/include/uapi \
-I$(srctree)/tools/arch/$(SRCARCH)/include
@@ -96,12 +89,6 @@ override CFLAGS += $(CLANG_CROSS_FLAGS)
# flags specific for shared library
SHLIB_FLAGS := -DSHARED -fPIC
-ifeq ($(VERBOSE),1)
- Q =
-else
- Q = @
-endif
-
# Disable command line variables (CFLAGS) override from top
# level Makefile (perf), otherwise build Makefile will get
# the same command line setup.
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 359f73ead613..a9c3e33d0f8a 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -1097,7 +1097,7 @@ int bpf_map_get_fd_by_id(__u32 id)
int bpf_btf_get_fd_by_id_opts(__u32 id,
const struct bpf_get_fd_by_id_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
+ const size_t attr_sz = offsetofend(union bpf_attr, fd_by_id_token_fd);
union bpf_attr attr;
int fd;
@@ -1107,6 +1107,7 @@ int bpf_btf_get_fd_by_id_opts(__u32 id,
memset(&attr, 0, attr_sz);
attr.btf_id = id;
attr.open_flags = OPTS_GET(opts, open_flags, 0);
+ attr.fd_by_id_token_fd = OPTS_GET(opts, token_fd, 0);
fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, attr_sz);
return libbpf_err_errno(fd);
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 435da95d2058..777627d33d25 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -487,9 +487,10 @@ LIBBPF_API int bpf_link_get_next_id(__u32 start_id, __u32 *next_id);
struct bpf_get_fd_by_id_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
__u32 open_flags; /* permissions requested for the operation on fd */
+ __u32 token_fd;
size_t :0;
};
-#define bpf_get_fd_by_id_opts__last_field open_flags
+#define bpf_get_fd_by_id_opts__last_field token_fd
LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
LIBBPF_API int bpf_prog_get_fd_by_id_opts(__u32 id,
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 48c66f3a9200..38bc6b14b066 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1619,12 +1619,18 @@ exit_free:
return btf;
}
-struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf)
+struct btf *btf_load_from_kernel(__u32 id, struct btf *base_btf, int token_fd)
{
struct btf *btf;
int btf_fd;
+ LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts);
+
+ if (token_fd) {
+ opts.open_flags |= BPF_F_TOKEN_FD;
+ opts.token_fd = token_fd;
+ }
- btf_fd = bpf_btf_get_fd_by_id(id);
+ btf_fd = bpf_btf_get_fd_by_id_opts(id, &opts);
if (btf_fd < 0)
return libbpf_err_ptr(-errno);
@@ -1634,6 +1640,11 @@ struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf)
return libbpf_ptr(btf);
}
+struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf)
+{
+ return btf_load_from_kernel(id, base_btf, 0);
+}
+
struct btf *btf__load_from_kernel_by_id(__u32 id)
{
return btf__load_from_kernel_by_id_split(id, NULL);
@@ -2090,7 +2101,7 @@ static int validate_type_id(int id)
}
/* generic append function for PTR, TYPEDEF, CONST/VOLATILE/RESTRICT */
-static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id)
+static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id, int kflag)
{
struct btf_type *t;
int sz, name_off = 0;
@@ -2113,7 +2124,7 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref
}
t->name_off = name_off;
- t->info = btf_type_info(kind, 0, 0);
+ t->info = btf_type_info(kind, 0, kflag);
t->type = ref_type_id;
return btf_commit_type(btf, sz);
@@ -2128,7 +2139,7 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref
*/
int btf__add_ptr(struct btf *btf, int ref_type_id)
{
- return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id);
+ return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id, 0);
}
/*
@@ -2506,7 +2517,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
struct btf_type *t;
int id;
- id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0);
+ id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0, 0);
if (id <= 0)
return id;
t = btf_type_by_id(btf, id);
@@ -2536,7 +2547,7 @@ int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id)
if (!name || !name[0])
return libbpf_err(-EINVAL);
- return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id);
+ return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id, 0);
}
/*
@@ -2548,7 +2559,7 @@ int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id)
*/
int btf__add_volatile(struct btf *btf, int ref_type_id)
{
- return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id);
+ return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id, 0);
}
/*
@@ -2560,7 +2571,7 @@ int btf__add_volatile(struct btf *btf, int ref_type_id)
*/
int btf__add_const(struct btf *btf, int ref_type_id)
{
- return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id);
+ return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id, 0);
}
/*
@@ -2572,7 +2583,7 @@ int btf__add_const(struct btf *btf, int ref_type_id)
*/
int btf__add_restrict(struct btf *btf, int ref_type_id)
{
- return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id);
+ return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id, 0);
}
/*
@@ -2588,7 +2599,24 @@ int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id)
if (!value || !value[0])
return libbpf_err(-EINVAL);
- return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id);
+ return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id, 0);
+}
+
+/*
+ * Append new BTF_KIND_TYPE_TAG type with:
+ * - *value*, non-empty/non-NULL tag value;
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Set info->kflag to 1, indicating this tag is an __attribute__
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_type_attr(struct btf *btf, const char *value, int ref_type_id)
+{
+ if (!value || !value[0])
+ return libbpf_err(-EINVAL);
+
+ return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id, 1);
}
/*
@@ -2610,7 +2638,7 @@ int btf__add_func(struct btf *btf, const char *name,
linkage != BTF_FUNC_EXTERN)
return libbpf_err(-EINVAL);
- id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id);
+ id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id, 0);
if (id > 0) {
struct btf_type *t = btf_type_by_id(btf, id);
@@ -2845,18 +2873,8 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
return 0;
}
-/*
- * Append new BTF_KIND_DECL_TAG type with:
- * - *value* - non-empty/non-NULL string;
- * - *ref_type_id* - referenced type ID, it might not exist yet;
- * - *component_idx* - -1 for tagging reference type, otherwise struct/union
- * member or function argument index;
- * Returns:
- * - >0, type ID of newly added BTF type;
- * - <0, on error.
- */
-int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
- int component_idx)
+static int btf_add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
+ int component_idx, int kflag)
{
struct btf_type *t;
int sz, value_off;
@@ -2880,13 +2898,46 @@ int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
return value_off;
t->name_off = value_off;
- t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, false);
+ t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, kflag);
t->type = ref_type_id;
btf_decl_tag(t)->component_idx = component_idx;
return btf_commit_type(btf, sz);
}
+/*
+ * Append new BTF_KIND_DECL_TAG type with:
+ * - *value* - non-empty/non-NULL string;
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * - *component_idx* - -1 for tagging reference type, otherwise struct/union
+ * member or function argument index;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
+ int component_idx)
+{
+ return btf_add_decl_tag(btf, value, ref_type_id, component_idx, 0);
+}
+
+/*
+ * Append new BTF_KIND_DECL_TAG type with:
+ * - *value* - non-empty/non-NULL string;
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * - *component_idx* - -1 for tagging reference type, otherwise struct/union
+ * member or function argument index;
+ * Set info->kflag to 1, indicating this tag is an __attribute__
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_decl_attr(struct btf *btf, const char *value, int ref_type_id,
+ int component_idx)
+{
+ return btf_add_decl_tag(btf, value, ref_type_id, component_idx, 1);
+}
+
struct btf_ext_sec_info_param {
__u32 off;
__u32 len;
@@ -3015,8 +3066,6 @@ static int btf_ext_parse_info(struct btf_ext *btf_ext, bool is_native)
.desc = "line_info",
};
struct btf_ext_sec_info_param core_relo = {
- .off = btf_ext->hdr->core_relo_off,
- .len = btf_ext->hdr->core_relo_len,
.min_rec_size = sizeof(struct bpf_core_relo),
.ext_info = &btf_ext->core_relo_info,
.desc = "core_relo",
@@ -3034,6 +3083,8 @@ static int btf_ext_parse_info(struct btf_ext *btf_ext, bool is_native)
if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
return 0; /* skip core relos parsing */
+ core_relo.off = btf_ext->hdr->core_relo_off;
+ core_relo.len = btf_ext->hdr->core_relo_len;
err = btf_ext_parse_sec_info(btf_ext, &core_relo, is_native);
if (err)
return err;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 47ee8f6ac489..4392451d634b 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -227,6 +227,7 @@ LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id);
LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id);
LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id);
LIBBPF_API int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id);
+LIBBPF_API int btf__add_type_attr(struct btf *btf, const char *value, int ref_type_id);
/* func and func_proto construction APIs */
LIBBPF_API int btf__add_func(struct btf *btf, const char *name,
@@ -243,6 +244,8 @@ LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id,
/* tag construction API */
LIBBPF_API int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
int component_idx);
+LIBBPF_API int btf__add_decl_attr(struct btf *btf, const char *value, int ref_type_id,
+ int component_idx);
struct btf_dedup_opts {
size_t sz;
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index a3fc6908f6c9..460c3e57fadb 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -1494,7 +1494,10 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
case BTF_KIND_TYPE_TAG:
btf_dump_emit_mods(d, decls);
name = btf_name_of(d, t->name_off);
- btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name);
+ if (btf_kflag(t))
+ btf_dump_printf(d, " __attribute__((%s))", name);
+ else
+ btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name);
break;
case BTF_KIND_ARRAY: {
const struct btf_array *a = btf_array(t);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 194809da5172..6b85060f07b3 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -670,11 +670,18 @@ struct elf_state {
struct usdt_manager;
+enum bpf_object_state {
+ OBJ_OPEN,
+ OBJ_PREPARED,
+ OBJ_LOADED,
+};
+
struct bpf_object {
char name[BPF_OBJ_NAME_LEN];
char license[64];
__u32 kern_version;
+ enum bpf_object_state state;
struct bpf_program *programs;
size_t nr_programs;
struct bpf_map *maps;
@@ -686,7 +693,6 @@ struct bpf_object {
int nr_extern;
int kconfig_map_idx;
- bool loaded;
bool has_subcalls;
bool has_rodata;
@@ -1511,7 +1517,7 @@ static struct bpf_object *bpf_object__new(const char *path,
obj->kconfig_map_idx = -1;
obj->kern_version = get_kernel_version();
- obj->loaded = false;
+ obj->state = OBJ_OPEN;
return obj;
}
@@ -2106,7 +2112,7 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
}
len = strlen(value);
- if (value[len - 1] != '"') {
+ if (len < 2 || value[len - 1] != '"') {
pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
ext->name, value);
return -EINVAL;
@@ -4845,6 +4851,11 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
return 0;
}
+static bool map_is_created(const struct bpf_map *map)
+{
+ return map->obj->state >= OBJ_PREPARED || map->reused;
+}
+
bool bpf_map__autocreate(const struct bpf_map *map)
{
return map->autocreate;
@@ -4852,7 +4863,7 @@ bool bpf_map__autocreate(const struct bpf_map *map)
int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
{
- if (map->obj->loaded)
+ if (map_is_created(map))
return libbpf_err(-EBUSY);
map->autocreate = autocreate;
@@ -4946,7 +4957,7 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
{
- if (map->obj->loaded)
+ if (map_is_created(map))
return libbpf_err(-EBUSY);
map->def.max_entries = max_entries;
@@ -5191,11 +5202,6 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
static void bpf_map__destroy(struct bpf_map *map);
-static bool map_is_created(const struct bpf_map *map)
-{
- return map->obj->loaded || map->reused;
-}
-
static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
{
LIBBPF_OPTS(bpf_map_create_opts, create_attr);
@@ -7897,13 +7903,6 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
- err = bpf_object__sanitize_prog(obj, prog);
- if (err)
- return err;
- }
-
- for (i = 0; i < obj->nr_programs; i++) {
- prog = &obj->programs[i];
if (prog_is_subprog(obj, prog))
continue;
if (!prog->autoload) {
@@ -7927,6 +7926,21 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
return 0;
}
+static int bpf_object_prepare_progs(struct bpf_object *obj)
+{
+ struct bpf_program *prog;
+ size_t i;
+ int err;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+ err = bpf_object__sanitize_prog(obj, prog);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
static const struct bpf_sec_def *find_sec_def(const char *sec_name);
static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
@@ -8543,14 +8557,77 @@ static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
return 0;
}
+static void bpf_object_unpin(struct bpf_object *obj)
+{
+ int i;
+
+ /* unpin any maps that were auto-pinned during load */
+ for (i = 0; i < obj->nr_maps; i++)
+ if (obj->maps[i].pinned && !obj->maps[i].reused)
+ bpf_map__unpin(&obj->maps[i], NULL);
+}
+
+static void bpf_object_post_load_cleanup(struct bpf_object *obj)
+{
+ int i;
+
+ /* clean up fd_array */
+ zfree(&obj->fd_array);
+
+ /* clean up module BTFs */
+ for (i = 0; i < obj->btf_module_cnt; i++) {
+ close(obj->btf_modules[i].fd);
+ btf__free(obj->btf_modules[i].btf);
+ free(obj->btf_modules[i].name);
+ }
+ obj->btf_module_cnt = 0;
+ zfree(&obj->btf_modules);
+
+ /* clean up vmlinux BTF */
+ btf__free(obj->btf_vmlinux);
+ obj->btf_vmlinux = NULL;
+}
+
+static int bpf_object_prepare(struct bpf_object *obj, const char *target_btf_path)
+{
+ int err;
+
+ if (obj->state >= OBJ_PREPARED) {
+ pr_warn("object '%s': prepare loading can't be attempted twice\n", obj->name);
+ return -EINVAL;
+ }
+
+ err = bpf_object_prepare_token(obj);
+ err = err ? : bpf_object__probe_loading(obj);
+ err = err ? : bpf_object__load_vmlinux_btf(obj, false);
+ err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
+ err = err ? : bpf_object__sanitize_maps(obj);
+ err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
+ err = err ? : bpf_object_adjust_struct_ops_autoload(obj);
+ err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
+ err = err ? : bpf_object__sanitize_and_load_btf(obj);
+ err = err ? : bpf_object__create_maps(obj);
+ err = err ? : bpf_object_prepare_progs(obj);
+
+ if (err) {
+ bpf_object_unpin(obj);
+ bpf_object_unload(obj);
+ obj->state = OBJ_LOADED;
+ return err;
+ }
+
+ obj->state = OBJ_PREPARED;
+ return 0;
+}
+
static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
{
- int err, i;
+ int err;
if (!obj)
return libbpf_err(-EINVAL);
- if (obj->loaded) {
+ if (obj->state >= OBJ_LOADED) {
pr_warn("object '%s': load can't be attempted twice\n", obj->name);
return libbpf_err(-EINVAL);
}
@@ -8565,17 +8642,12 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
return libbpf_err(-LIBBPF_ERRNO__ENDIAN);
}
- err = bpf_object_prepare_token(obj);
- err = err ? : bpf_object__probe_loading(obj);
- err = err ? : bpf_object__load_vmlinux_btf(obj, false);
- err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
- err = err ? : bpf_object__sanitize_maps(obj);
- err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
- err = err ? : bpf_object_adjust_struct_ops_autoload(obj);
- err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
- err = err ? : bpf_object__sanitize_and_load_btf(obj);
- err = err ? : bpf_object__create_maps(obj);
- err = err ? : bpf_object__load_progs(obj, extra_log_level);
+ if (obj->state < OBJ_PREPARED) {
+ err = bpf_object_prepare(obj, target_btf_path);
+ if (err)
+ return libbpf_err(err);
+ }
+ err = bpf_object__load_progs(obj, extra_log_level);
err = err ? : bpf_object_init_prog_arrays(obj);
err = err ? : bpf_object_prepare_struct_ops(obj);
@@ -8587,36 +8659,22 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
}
- /* clean up fd_array */
- zfree(&obj->fd_array);
+ bpf_object_post_load_cleanup(obj);
+ obj->state = OBJ_LOADED; /* doesn't matter if successfully or not */
- /* clean up module BTFs */
- for (i = 0; i < obj->btf_module_cnt; i++) {
- close(obj->btf_modules[i].fd);
- btf__free(obj->btf_modules[i].btf);
- free(obj->btf_modules[i].name);
+ if (err) {
+ bpf_object_unpin(obj);
+ bpf_object_unload(obj);
+ pr_warn("failed to load object '%s'\n", obj->path);
+ return libbpf_err(err);
}
- free(obj->btf_modules);
-
- /* clean up vmlinux BTF */
- btf__free(obj->btf_vmlinux);
- obj->btf_vmlinux = NULL;
-
- obj->loaded = true; /* doesn't matter if successfully or not */
-
- if (err)
- goto out;
return 0;
-out:
- /* unpin any maps that were auto-pinned during load */
- for (i = 0; i < obj->nr_maps; i++)
- if (obj->maps[i].pinned && !obj->maps[i].reused)
- bpf_map__unpin(&obj->maps[i], NULL);
+}
- bpf_object_unload(obj);
- pr_warn("failed to load object '%s'\n", obj->path);
- return libbpf_err(err);
+int bpf_object__prepare(struct bpf_object *obj)
+{
+ return libbpf_err(bpf_object_prepare(obj, NULL));
}
int bpf_object__load(struct bpf_object *obj)
@@ -8866,7 +8924,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
if (!obj)
return libbpf_err(-ENOENT);
- if (!obj->loaded) {
+ if (obj->state < OBJ_PREPARED) {
pr_warn("object not yet loaded; load it first\n");
return libbpf_err(-ENOENT);
}
@@ -8945,7 +9003,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
if (!obj)
return libbpf_err(-ENOENT);
- if (!obj->loaded) {
+ if (obj->state < OBJ_LOADED) {
pr_warn("object not yet loaded; load it first\n");
return libbpf_err(-ENOENT);
}
@@ -9064,6 +9122,13 @@ void bpf_object__close(struct bpf_object *obj)
if (IS_ERR_OR_NULL(obj))
return;
+ /*
+ * if user called bpf_object__prepare() without ever getting to
+ * bpf_object__load(), we need to clean up stuff that is normally
+ * cleaned up at the end of loading step
+ */
+ bpf_object_post_load_cleanup(obj);
+
usdt_manager_free(obj->usdt_man);
obj->usdt_man = NULL;
@@ -9132,7 +9197,7 @@ int bpf_object__btf_fd(const struct bpf_object *obj)
int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
{
- if (obj->loaded)
+ if (obj->state >= OBJ_LOADED)
return libbpf_err(-EINVAL);
obj->kern_version = kern_version;
@@ -9145,12 +9210,12 @@ int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
struct bpf_gen *gen;
if (!opts)
- return -EFAULT;
+ return libbpf_err(-EFAULT);
if (!OPTS_VALID(opts, gen_loader_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
gen = calloc(sizeof(*gen), 1);
if (!gen)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
gen->opts = opts;
gen->swapped_endian = !is_native_endianness(obj);
obj->gen_loader = gen;
@@ -9229,7 +9294,7 @@ bool bpf_program__autoload(const struct bpf_program *prog)
int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
{
- if (prog->obj->loaded)
+ if (prog->obj->state >= OBJ_LOADED)
return libbpf_err(-EINVAL);
prog->autoload = autoload;
@@ -9261,14 +9326,14 @@ int bpf_program__set_insns(struct bpf_program *prog,
{
struct bpf_insn *insns;
- if (prog->obj->loaded)
- return -EBUSY;
+ if (prog->obj->state >= OBJ_LOADED)
+ return libbpf_err(-EBUSY);
insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
/* NULL is a valid return from reallocarray if the new count is zero */
if (!insns && new_insn_cnt) {
pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
}
memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
@@ -9304,7 +9369,7 @@ static int last_custom_sec_def_handler_id;
int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
{
- if (prog->obj->loaded)
+ if (prog->obj->state >= OBJ_LOADED)
return libbpf_err(-EBUSY);
/* if type is not changed, do nothing */
@@ -9335,7 +9400,7 @@ enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program
int bpf_program__set_expected_attach_type(struct bpf_program *prog,
enum bpf_attach_type type)
{
- if (prog->obj->loaded)
+ if (prog->obj->state >= OBJ_LOADED)
return libbpf_err(-EBUSY);
prog->expected_attach_type = type;
@@ -9349,7 +9414,7 @@ __u32 bpf_program__flags(const struct bpf_program *prog)
int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
{
- if (prog->obj->loaded)
+ if (prog->obj->state >= OBJ_LOADED)
return libbpf_err(-EBUSY);
prog->prog_flags = flags;
@@ -9363,7 +9428,7 @@ __u32 bpf_program__log_level(const struct bpf_program *prog)
int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
{
- if (prog->obj->loaded)
+ if (prog->obj->state >= OBJ_LOADED)
return libbpf_err(-EBUSY);
prog->log_level = log_level;
@@ -9379,11 +9444,11 @@ const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_siz
int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
{
if (log_size && !log_buf)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (prog->log_size > UINT_MAX)
- return -EINVAL;
- if (prog->obj->loaded)
- return -EBUSY;
+ return libbpf_err(-EINVAL);
+ if (prog->obj->state >= OBJ_LOADED)
+ return libbpf_err(-EBUSY);
prog->log_buf = log_buf;
prog->log_size = log_size;
@@ -9959,7 +10024,7 @@ int libbpf_find_vmlinux_btf_id(const char *name,
return libbpf_err(err);
}
-static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
+static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd, int token_fd)
{
struct bpf_prog_info info;
__u32 info_len = sizeof(info);
@@ -9979,7 +10044,7 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
pr_warn("The target program doesn't have BTF\n");
goto out;
}
- btf = btf__load_from_kernel_by_id(info.btf_id);
+ btf = btf_load_from_kernel(info.btf_id, NULL, token_fd);
err = libbpf_get_error(btf);
if (err) {
pr_warn("Failed to get BTF %d of the program: %s\n", info.btf_id, errstr(err));
@@ -10062,7 +10127,7 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac
pr_warn("prog '%s': attach program FD is not set\n", prog->name);
return -EINVAL;
}
- err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
+ err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd, prog->obj->token_fd);
if (err < 0) {
pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %s\n",
prog->name, attach_prog_fd, attach_name, errstr(err));
@@ -10299,7 +10364,7 @@ static int map_btf_datasec_resize(struct bpf_map *map, __u32 size)
int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
{
- if (map->obj->loaded || map->reused)
+ if (map_is_created(map))
return libbpf_err(-EBUSY);
if (map->mmaped) {
@@ -10307,7 +10372,7 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
int err;
if (map->def.type != BPF_MAP_TYPE_ARRAY)
- return -EOPNOTSUPP;
+ return libbpf_err(-EOPNOTSUPP);
mmap_old_sz = bpf_map_mmap_sz(map);
mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries);
@@ -10315,7 +10380,7 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
if (err) {
pr_warn("map '%s': failed to resize memory-mapped region: %s\n",
bpf_map__name(map), errstr(err));
- return err;
+ return libbpf_err(err);
}
err = map_btf_datasec_resize(map, size);
if (err && err != -ENOENT) {
@@ -10345,7 +10410,7 @@ int bpf_map__set_initial_value(struct bpf_map *map,
{
size_t actual_sz;
- if (map->obj->loaded || map->reused)
+ if (map_is_created(map))
return libbpf_err(-EBUSY);
if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
@@ -12858,7 +12923,7 @@ struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
if (target_fd) {
LIBBPF_OPTS(bpf_link_create_opts, target_opts);
- btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
+ btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd, prog->obj->token_fd);
if (btf_id < 0)
return libbpf_err_ptr(btf_id);
@@ -13070,17 +13135,17 @@ int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map)
int err;
if (!bpf_map__is_struct_ops(map))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (map->fd < 0) {
pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
st_ops_link = container_of(link, struct bpf_link_struct_ops, link);
/* Ensure the type of a link is correct */
if (st_ops_link->map_fd < 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
/* It can be EBUSY if the map has been used to create or
@@ -13666,7 +13731,7 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
if (!prog || attach_prog_fd < 0)
return libbpf_err(-EINVAL);
- if (prog->obj->loaded)
+ if (prog->obj->state >= OBJ_LOADED)
return libbpf_err(-EINVAL);
if (attach_prog_fd && !attach_func_name) {
@@ -13679,7 +13744,7 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
if (attach_prog_fd) {
btf_id = libbpf_find_prog_btf_id(attach_func_name,
- attach_prog_fd);
+ attach_prog_fd, prog->obj->token_fd);
if (btf_id < 0)
return libbpf_err(btf_id);
} else {
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 3020ee45303a..e0605403f977 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -242,6 +242,19 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
const struct bpf_object_open_opts *opts);
/**
+ * @brief **bpf_object__prepare()** prepares BPF object for loading:
+ * performs ELF processing, relocations, prepares final state of BPF program
+ * instructions (accessible with bpf_program__insns()), creates and
+ * (potentially) pins maps. Leaves BPF object in the state ready for program
+ * loading.
+ * @param obj Pointer to a valid BPF object instance returned by
+ * **bpf_object__open*()** API
+ * @return 0, on success; negative error code, otherwise, error code is
+ * stored in errno
+ */
+int bpf_object__prepare(struct bpf_object *obj);
+
+/**
* @brief **bpf_object__load()** loads BPF object into kernel.
* @param obj Pointer to a valid BPF object instance returned by
* **bpf_object__open*()** APIs
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index a8b2936a1646..d8b71f22f197 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -436,4 +436,7 @@ LIBBPF_1.6.0 {
bpf_linker__add_buf;
bpf_linker__add_fd;
bpf_linker__new_fd;
+ bpf_object__prepare;
+ btf__add_decl_attr;
+ btf__add_type_attr;
} LIBBPF_1.5.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index de498e2dd6b0..76669c73dcd1 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -409,6 +409,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
int btf_load_into_kernel(struct btf *btf,
char *log_buf, size_t log_sz, __u32 log_level,
int token_fd);
+struct btf *btf_load_from_kernel(__u32 id, struct btf *base_btf, int token_fd);
struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index b52f71c59616..800e0ef09c37 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -2163,7 +2163,7 @@ add_sym:
obj->sym_map[src_sym_idx] = dst_sym_idx;
- if (sym_type == STT_SECTION && dst_sym) {
+ if (sym_type == STT_SECTION && dst_sec) {
dst_sec->sec_sym_idx = dst_sym_idx;
dst_sym->st_value = 0;
}
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
index 7632e9d41827..2b83c98a1137 100644
--- a/tools/lib/bpf/relo_core.c
+++ b/tools/lib/bpf/relo_core.c
@@ -683,7 +683,7 @@ static int bpf_core_calc_field_relo(const char *prog_name,
{
const struct bpf_core_accessor *acc;
const struct btf_type *t;
- __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
+ __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id, elem_id;
const struct btf_member *m;
const struct btf_type *mt;
bool bitfield;
@@ -706,8 +706,14 @@ static int bpf_core_calc_field_relo(const char *prog_name,
if (!acc->name) {
if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) {
*val = spec->bit_offset / 8;
- /* remember field size for load/store mem size */
- sz = btf__resolve_size(spec->btf, acc->type_id);
+ /* remember field size for load/store mem size;
+ * note, for arrays we care about individual element
+ * sizes, not the overall array size
+ */
+ t = skip_mods_and_typedefs(spec->btf, acc->type_id, &elem_id);
+ while (btf_is_array(t))
+ t = skip_mods_and_typedefs(spec->btf, btf_array(t)->type, &elem_id);
+ sz = btf__resolve_size(spec->btf, elem_id);
if (sz < 0)
return -EINVAL;
*field_sz = sz;
@@ -767,7 +773,17 @@ static int bpf_core_calc_field_relo(const char *prog_name,
case BPF_CORE_FIELD_BYTE_OFFSET:
*val = byte_off;
if (!bitfield) {
- *field_sz = byte_sz;
+ /* remember field size for load/store mem size;
+ * note, for arrays we care about individual element
+ * sizes, not the overall array size
+ */
+ t = skip_mods_and_typedefs(spec->btf, field_type_id, &elem_id);
+ while (btf_is_array(t))
+ t = skip_mods_and_typedefs(spec->btf, btf_array(t)->type, &elem_id);
+ sz = btf__resolve_size(spec->btf, elem_id);
+ if (sz < 0)
+ return -EINVAL;
+ *field_sz = sz;
*type_id = field_type_id;
}
break;
diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c
index 8743049e32b7..9a541762f54c 100644
--- a/tools/lib/bpf/str_error.c
+++ b/tools/lib/bpf/str_error.c
@@ -36,7 +36,7 @@ char *libbpf_strerror_r(int err, char *dst, int len)
return dst;
}
-const char *errstr(int err)
+const char *libbpf_errstr(int err)
{
static __thread char buf[12];
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h
index 66ffebde0684..53e7fbffc13e 100644
--- a/tools/lib/bpf/str_error.h
+++ b/tools/lib/bpf/str_error.h
@@ -7,10 +7,13 @@
char *libbpf_strerror_r(int err, char *dst, int len);
/**
- * @brief **errstr()** returns string corresponding to numeric errno
+ * @brief **libbpf_errstr()** returns string corresponding to numeric errno
* @param err negative numeric errno
* @return pointer to string representation of the errno, that is invalidated
* upon the next call.
*/
-const char *errstr(int err);
+const char *libbpf_errstr(int err);
+
+#define errstr(err) libbpf_errstr(err)
+
#endif /* __LIBBPF_STR_ERROR_H */
diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h
index b811f754939f..2a7865c8e3fe 100644
--- a/tools/lib/bpf/usdt.bpf.h
+++ b/tools/lib/bpf/usdt.bpf.h
@@ -108,6 +108,38 @@ int bpf_usdt_arg_cnt(struct pt_regs *ctx)
return spec->arg_cnt;
}
+/* Returns the size in bytes of the #*arg_num* (zero-indexed) USDT argument.
+ * Returns negative error if argument is not found or arg_num is invalid.
+ */
+static __always_inline
+int bpf_usdt_arg_size(struct pt_regs *ctx, __u64 arg_num)
+{
+ struct __bpf_usdt_arg_spec *arg_spec;
+ struct __bpf_usdt_spec *spec;
+ int spec_id;
+
+ spec_id = __bpf_usdt_spec_id(ctx);
+ if (spec_id < 0)
+ return -ESRCH;
+
+ spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
+ if (!spec)
+ return -ESRCH;
+
+ if (arg_num >= BPF_USDT_MAX_ARG_CNT)
+ return -ENOENT;
+ barrier_var(arg_num);
+ if (arg_num >= spec->arg_cnt)
+ return -ENOENT;
+
+ arg_spec = &spec->args[arg_num];
+
+ /* arg_spec->arg_bitshift = 64 - arg_sz * 8
+ * so: arg_sz = (64 - arg_spec->arg_bitshift) / 8
+ */
+ return (unsigned int)(64 - arg_spec->arg_bitshift) / 8;
+}
+
/* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res.
* Returns 0 on success; negative error, otherwise.
* On error *res is guaranteed to be set to zero.
diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
index 3a9b2140aa04..e9a7ac2c062e 100644
--- a/tools/lib/perf/Makefile
+++ b/tools/lib/perf/Makefile
@@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative)
libdir_SQ = $(subst ','\'',$(libdir))
libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
-ifeq ("$(origin V)", "command line")
- VERBOSE = $(V)
-endif
-ifndef VERBOSE
- VERBOSE = 0
-endif
-
-ifeq ($(VERBOSE),1)
- Q =
-else
- Q = @
-endif
-
TEST_ARGS := $(if $(V),-v)
# Set compile option CFLAGS
diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile
index 8890fd57b110..a1f5e388644d 100644
--- a/tools/lib/thermal/Makefile
+++ b/tools/lib/thermal/Makefile
@@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative)
libdir_SQ = $(subst ','\'',$(libdir))
libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
-ifeq ("$(origin V)", "command line")
- VERBOSE = $(V)
-endif
-ifndef VERBOSE
- VERBOSE = 0
-endif
-
-ifeq ($(VERBOSE),1)
- Q =
-else
- Q = @
-endif
-
# Set compile option CFLAGS
ifdef EXTRA_CFLAGS
CFLAGS := $(EXTRA_CFLAGS)
diff --git a/tools/memory-model/Documentation/glossary.txt b/tools/memory-model/Documentation/glossary.txt
index 6f3d16dbf467..7ead94bffa4e 100644
--- a/tools/memory-model/Documentation/glossary.txt
+++ b/tools/memory-model/Documentation/glossary.txt
@@ -15,14 +15,14 @@ Address Dependency: When the address of a later memory access is computed
3 do_something(p->a);
4 rcu_read_unlock();
- In this case, because the address of "p->a" on line 3 is computed
- from the value returned by the rcu_dereference() on line 2, the
- address dependency extends from that rcu_dereference() to that
- "p->a". In rare cases, optimizing compilers can destroy address
- dependencies. Please see Documentation/RCU/rcu_dereference.rst
- for more information.
+ In this case, because the address of "p->a" on line 3 is computed
+ from the value returned by the rcu_dereference() on line 2, the
+ address dependency extends from that rcu_dereference() to that
+ "p->a". In rare cases, optimizing compilers can destroy address
+ dependencies. Please see Documentation/RCU/rcu_dereference.rst
+ for more information.
- See also "Control Dependency" and "Data Dependency".
+ See also "Control Dependency" and "Data Dependency".
Acquire: With respect to a lock, acquiring that lock, for example,
using spin_lock(). With respect to a non-lock shared variable,
@@ -59,12 +59,12 @@ Control Dependency: When a later store's execution depends on a test
1 if (READ_ONCE(x))
2 WRITE_ONCE(y, 1);
- Here, the control dependency extends from the READ_ONCE() on
- line 1 to the WRITE_ONCE() on line 2. Control dependencies are
- fragile, and can be easily destroyed by optimizing compilers.
- Please see control-dependencies.txt for more information.
+ Here, the control dependency extends from the READ_ONCE() on
+ line 1 to the WRITE_ONCE() on line 2. Control dependencies are
+ fragile, and can be easily destroyed by optimizing compilers.
+ Please see control-dependencies.txt for more information.
- See also "Address Dependency" and "Data Dependency".
+ See also "Address Dependency" and "Data Dependency".
Cycle: Memory-barrier pairing is restricted to a pair of CPUs, as the
name suggests. And in a great many cases, a pair of CPUs is all
@@ -72,10 +72,10 @@ Cycle: Memory-barrier pairing is restricted to a pair of CPUs, as the
extended to additional CPUs, and the result is called a "cycle".
In a cycle, each CPU's ordering interacts with that of the next:
- CPU 0 CPU 1 CPU 2
- WRITE_ONCE(x, 1); WRITE_ONCE(y, 1); WRITE_ONCE(z, 1);
- smp_mb(); smp_mb(); smp_mb();
- r0 = READ_ONCE(y); r1 = READ_ONCE(z); r2 = READ_ONCE(x);
+ CPU 0 CPU 1 CPU 2
+ WRITE_ONCE(x, 1); WRITE_ONCE(y, 1); WRITE_ONCE(z, 1);
+ smp_mb(); smp_mb(); smp_mb();
+ r0 = READ_ONCE(y); r1 = READ_ONCE(z); r2 = READ_ONCE(x);
CPU 0's smp_mb() interacts with that of CPU 1, which interacts
with that of CPU 2, which in turn interacts with that of CPU 0
diff --git a/tools/memory-model/Documentation/herd-representation.txt b/tools/memory-model/Documentation/herd-representation.txt
index ed988906f2b7..4e19b4f2a476 100644
--- a/tools/memory-model/Documentation/herd-representation.txt
+++ b/tools/memory-model/Documentation/herd-representation.txt
@@ -18,6 +18,11 @@
#
# By convention, a blank line in a cell means "same as the preceding line".
#
+# Note that the syntactic representation does not always match the sets and
+# relations in linux-kernel.cat, due to redefinitions in linux-kernel.bell and
+# lock.cat. For example, the po link between LKR and LKW is upgraded to an rmw
+# link, and W[ACQUIRE] are not included in the Acquire set.
+#
# Disclaimer. The table includes representations of "add" and "and" operations;
# corresponding/identical representations of "sub", "inc", "dec" and "or", "xor",
# "andnot" operations are omitted.
@@ -27,16 +32,16 @@
------------------------------------------------------------------------------
| Non-RMW ops | |
------------------------------------------------------------------------------
- | READ_ONCE | R[once] |
+ | READ_ONCE | R[ONCE] |
| atomic_read | |
- | WRITE_ONCE | W[once] |
+ | WRITE_ONCE | W[ONCE] |
| atomic_set | |
- | smp_load_acquire | R[acquire] |
+ | smp_load_acquire | R[ACQUIRE] |
| atomic_read_acquire | |
- | smp_store_release | W[release] |
+ | smp_store_release | W[RELEASE] |
| atomic_set_release | |
- | smp_store_mb | W[once] ->po F[mb] |
- | smp_mb | F[mb] |
+ | smp_store_mb | W[ONCE] ->po F[MB] |
+ | smp_mb | F[MB] |
| smp_rmb | F[rmb] |
| smp_wmb | F[wmb] |
| smp_mb__before_atomic | F[before-atomic] |
@@ -49,8 +54,8 @@
| rcu_read_lock | F[rcu-lock] |
| rcu_read_unlock | F[rcu-unlock] |
| synchronize_rcu | F[sync-rcu] |
- | rcu_dereference | R[once] |
- | rcu_assign_pointer | W[release] |
+ | rcu_dereference | R[ONCE] |
+ | rcu_assign_pointer | W[RELEASE] |
| srcu_read_lock | R[srcu-lock] |
| srcu_down_read | |
| srcu_read_unlock | W[srcu-unlock] |
@@ -60,32 +65,31 @@
------------------------------------------------------------------------------
| RMW ops w/o return value | |
------------------------------------------------------------------------------
- | atomic_add | R*[noreturn] ->rmw W*[once] |
+ | atomic_add | R*[NORETURN] ->rmw W*[NORETURN] |
| atomic_and | |
| spin_lock | LKR ->po LKW |
------------------------------------------------------------------------------
| RMW ops w/ return value | |
------------------------------------------------------------------------------
- | atomic_add_return | F[mb] ->po R*[once] |
- | | ->rmw W*[once] ->po F[mb] |
+ | atomic_add_return | R*[MB] ->rmw W*[MB] |
| atomic_fetch_add | |
| atomic_fetch_and | |
| atomic_xchg | |
| xchg | |
| atomic_add_negative | |
- | atomic_add_return_relaxed | R*[once] ->rmw W*[once] |
+ | atomic_add_return_relaxed | R*[ONCE] ->rmw W*[ONCE] |
| atomic_fetch_add_relaxed | |
| atomic_fetch_and_relaxed | |
| atomic_xchg_relaxed | |
| xchg_relaxed | |
| atomic_add_negative_relaxed | |
- | atomic_add_return_acquire | R*[acquire] ->rmw W*[once] |
+ | atomic_add_return_acquire | R*[ACQUIRE] ->rmw W*[ACQUIRE] |
| atomic_fetch_add_acquire | |
| atomic_fetch_and_acquire | |
| atomic_xchg_acquire | |
| xchg_acquire | |
| atomic_add_negative_acquire | |
- | atomic_add_return_release | R*[once] ->rmw W*[release] |
+ | atomic_add_return_release | R*[RELEASE] ->rmw W*[RELEASE] |
| atomic_fetch_add_release | |
| atomic_fetch_and_release | |
| atomic_xchg_release | |
@@ -94,17 +98,16 @@
------------------------------------------------------------------------------
| Conditional RMW ops | |
------------------------------------------------------------------------------
- | atomic_cmpxchg | On success: F[mb] ->po R*[once] |
- | | ->rmw W*[once] ->po F[mb] |
- | | On failure: R*[once] |
+ | atomic_cmpxchg | On success: R*[MB] ->rmw W*[MB] |
+ | | On failure: R*[MB] |
| cmpxchg | |
| atomic_add_unless | |
- | atomic_cmpxchg_relaxed | On success: R*[once] ->rmw W*[once] |
- | | On failure: R*[once] |
- | atomic_cmpxchg_acquire | On success: R*[acquire] ->rmw W*[once] |
- | | On failure: R*[once] |
- | atomic_cmpxchg_release | On success: R*[once] ->rmw W*[release] |
- | | On failure: R*[once] |
+ | atomic_cmpxchg_relaxed | On success: R*[ONCE] ->rmw W*[ONCE] |
+ | | On failure: R*[ONCE] |
+ | atomic_cmpxchg_acquire | On success: R*[ACQUIRE] ->rmw W*[ACQUIRE] |
+ | | On failure: R*[ACQUIRE] |
+ | atomic_cmpxchg_release | On success: R*[RELEASE] ->rmw W*[RELEASE] |
+ | | On failure: R*[RELEASE] |
| spin_trylock | On success: LKR ->po LKW |
| | On failure: LF |
------------------------------------------------------------------------------
diff --git a/tools/memory-model/README b/tools/memory-model/README
index dab38904206a..64c860863aa9 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -20,7 +20,7 @@ that litmus test to be exercised within the Linux kernel.
REQUIREMENTS
============
-Version 7.52 or higher of the "herd7" and "klitmus7" tools must be
+Version 7.58 or higher of the "herd7" and "klitmus7" tools must be
downloaded separately:
https://github.com/herd/herdtools7
@@ -79,7 +79,7 @@ Several thousand more example litmus tests are available here:
https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd
https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus
-Documentation describing litmus tests and now to use them may be found
+Documentation describing litmus tests and how to use them may be found
here:
tools/memory-model/Documentation/litmus-tests.txt
diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index ce068700939c..fe65998002b9 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -13,17 +13,18 @@
"Linux-kernel memory consistency model"
-enum Accesses = 'once (*READ_ONCE,WRITE_ONCE*) ||
- 'release (*smp_store_release*) ||
- 'acquire (*smp_load_acquire*) ||
- 'noreturn (* R of non-return RMW *)
-instructions R[{'once,'acquire,'noreturn}]
-instructions W[{'once,'release}]
-instructions RMW[{'once,'acquire,'release}]
+enum Accesses = 'ONCE (*READ_ONCE,WRITE_ONCE*) ||
+ 'RELEASE (*smp_store_release*) ||
+ 'ACQUIRE (*smp_load_acquire*) ||
+ 'NORETURN (* R of non-return RMW *) ||
+ 'MB (*xchg(),cmpxchg(),...*)
+instructions R[Accesses]
+instructions W[Accesses]
+instructions RMW[Accesses]
enum Barriers = 'wmb (*smp_wmb*) ||
'rmb (*smp_rmb*) ||
- 'mb (*smp_mb*) ||
+ 'MB (*smp_mb*) ||
'barrier (*barrier*) ||
'rcu-lock (*rcu_read_lock*) ||
'rcu-unlock (*rcu_read_unlock*) ||
@@ -35,6 +36,17 @@ enum Barriers = 'wmb (*smp_wmb*) ||
'after-srcu-read-unlock (*smp_mb__after_srcu_read_unlock*)
instructions F[Barriers]
+
+(*
+ * Filter out syntactic annotations that do not provide the corresponding
+ * semantic ordering, such as Acquire on a store or Mb on a failed RMW.
+ *)
+let FailedRMW = RMW \ (domain(rmw) | range(rmw))
+let Acquire = ACQUIRE \ W \ FailedRMW
+let Release = RELEASE \ R \ FailedRMW
+let Mb = MB \ FailedRMW
+let Noreturn = NORETURN \ W
+
(* SRCU *)
enum SRCU = 'srcu-lock || 'srcu-unlock || 'sync-srcu
instructions SRCU[SRCU]
@@ -73,7 +85,7 @@ flag ~empty rcu-rscs & (po ; [Sync-srcu] ; po) as invalid-sleep
flag ~empty different-values(srcu-rscs) as srcu-bad-value-match
(* Compute marked and plain memory accesses *)
-let Marked = (~M) | IW | Once | Release | Acquire | domain(rmw) | range(rmw) |
+let Marked = (~M) | IW | ONCE | RELEASE | ACQUIRE | MB | RMW |
LKR | LKW | UL | LF | RL | RU | Srcu-lock | Srcu-unlock
let Plain = M \ Marked
@@ -82,3 +94,6 @@ let carry-dep = (data ; [~ Srcu-unlock] ; rfi)*
let addr = carry-dep ; addr
let ctrl = carry-dep ; ctrl
let data = carry-dep ; data
+
+flag ~empty (if "lkmmv2" then 0 else _)
+ as this-model-requires-variant-higher-than-lkmmv1
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index adf3c4f41229..d7e7bf13c831 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -34,6 +34,16 @@ let R4rmb = R \ Noreturn (* Reads for which rmb works *)
let rmb = [R4rmb] ; fencerel(Rmb) ; [R4rmb]
let wmb = [W] ; fencerel(Wmb) ; [W]
let mb = ([M] ; fencerel(Mb) ; [M]) |
+ (*
+ * full-barrier RMWs (successful cmpxchg(), xchg(), etc.) act as
+ * though there were enclosed by smp_mb().
+ * The effect of these virtual smp_mb() is formalized by adding
+ * Mb tags to the read and write of the operation, and providing
+ * the same ordering as though there were additional po edges
+ * between the Mb tag and the read resp. write.
+ *)
+ ([M] ; po ; [Mb & R]) |
+ ([Mb & W] ; po ; [M]) |
([M] ; fencerel(Before-atomic) ; [RMW] ; po? ; [M]) |
([M] ; po? ; [RMW] ; fencerel(After-atomic) ; [M]) |
([M] ; po? ; [LKW] ; fencerel(After-spinlock) ; [M]) |
diff --git a/tools/memory-model/linux-kernel.cfg b/tools/memory-model/linux-kernel.cfg
index 3c8098e99f41..69b04f3aad73 100644
--- a/tools/memory-model/linux-kernel.cfg
+++ b/tools/memory-model/linux-kernel.cfg
@@ -1,6 +1,7 @@
macros linux-kernel.def
bell linux-kernel.bell
model linux-kernel.cat
+variant lkmmv2
graph columns
squished true
showevents noregs
diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def
index 88a39601f525..49e402782e49 100644
--- a/tools/memory-model/linux-kernel.def
+++ b/tools/memory-model/linux-kernel.def
@@ -6,18 +6,18 @@
// which appeared in ASPLOS 2018.
// ONCE
-READ_ONCE(X) __load{once}(X)
-WRITE_ONCE(X,V) { __store{once}(X,V); }
+READ_ONCE(X) __load{ONCE}(X)
+WRITE_ONCE(X,V) { __store{ONCE}(X,V); }
// Release Acquire and friends
-smp_store_release(X,V) { __store{release}(*X,V); }
-smp_load_acquire(X) __load{acquire}(*X)
-rcu_assign_pointer(X,V) { __store{release}(X,V); }
-rcu_dereference(X) __load{once}(X)
-smp_store_mb(X,V) { __store{once}(X,V); __fence{mb}; }
+smp_store_release(X,V) { __store{RELEASE}(*X,V); }
+smp_load_acquire(X) __load{ACQUIRE}(*X)
+rcu_assign_pointer(X,V) { __store{RELEASE}(X,V); }
+rcu_dereference(X) __load{ONCE}(X)
+smp_store_mb(X,V) { __store{ONCE}(X,V); __fence{MB}; }
// Fences
-smp_mb() { __fence{mb}; }
+smp_mb() { __fence{MB}; }
smp_rmb() { __fence{rmb}; }
smp_wmb() { __fence{wmb}; }
smp_mb__before_atomic() { __fence{before-atomic}; }
@@ -28,14 +28,14 @@ smp_mb__after_srcu_read_unlock() { __fence{after-srcu-read-unlock}; }
barrier() { __fence{barrier}; }
// Exchange
-xchg(X,V) __xchg{mb}(X,V)
-xchg_relaxed(X,V) __xchg{once}(X,V)
-xchg_release(X,V) __xchg{release}(X,V)
-xchg_acquire(X,V) __xchg{acquire}(X,V)
-cmpxchg(X,V,W) __cmpxchg{mb}(X,V,W)
-cmpxchg_relaxed(X,V,W) __cmpxchg{once}(X,V,W)
-cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W)
-cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W)
+xchg(X,V) __xchg{MB}(X,V)
+xchg_relaxed(X,V) __xchg{ONCE}(X,V)
+xchg_release(X,V) __xchg{RELEASE}(X,V)
+xchg_acquire(X,V) __xchg{ACQUIRE}(X,V)
+cmpxchg(X,V,W) __cmpxchg{MB}(X,V,W)
+cmpxchg_relaxed(X,V,W) __cmpxchg{ONCE}(X,V,W)
+cmpxchg_acquire(X,V,W) __cmpxchg{ACQUIRE}(X,V,W)
+cmpxchg_release(X,V,W) __cmpxchg{RELEASE}(X,V,W)
// Spinlocks
spin_lock(X) { __lock(X); }
@@ -63,57 +63,86 @@ atomic_set(X,V) { WRITE_ONCE(*X,V); }
atomic_read_acquire(X) smp_load_acquire(X)
atomic_set_release(X,V) { smp_store_release(X,V); }
-atomic_add(V,X) { __atomic_op(X,+,V); }
-atomic_sub(V,X) { __atomic_op(X,-,V); }
-atomic_inc(X) { __atomic_op(X,+,1); }
-atomic_dec(X) { __atomic_op(X,-,1); }
-
-atomic_add_return(V,X) __atomic_op_return{mb}(X,+,V)
-atomic_add_return_relaxed(V,X) __atomic_op_return{once}(X,+,V)
-atomic_add_return_acquire(V,X) __atomic_op_return{acquire}(X,+,V)
-atomic_add_return_release(V,X) __atomic_op_return{release}(X,+,V)
-atomic_fetch_add(V,X) __atomic_fetch_op{mb}(X,+,V)
-atomic_fetch_add_relaxed(V,X) __atomic_fetch_op{once}(X,+,V)
-atomic_fetch_add_acquire(V,X) __atomic_fetch_op{acquire}(X,+,V)
-atomic_fetch_add_release(V,X) __atomic_fetch_op{release}(X,+,V)
-
-atomic_inc_return(X) __atomic_op_return{mb}(X,+,1)
-atomic_inc_return_relaxed(X) __atomic_op_return{once}(X,+,1)
-atomic_inc_return_acquire(X) __atomic_op_return{acquire}(X,+,1)
-atomic_inc_return_release(X) __atomic_op_return{release}(X,+,1)
-atomic_fetch_inc(X) __atomic_fetch_op{mb}(X,+,1)
-atomic_fetch_inc_relaxed(X) __atomic_fetch_op{once}(X,+,1)
-atomic_fetch_inc_acquire(X) __atomic_fetch_op{acquire}(X,+,1)
-atomic_fetch_inc_release(X) __atomic_fetch_op{release}(X,+,1)
-
-atomic_sub_return(V,X) __atomic_op_return{mb}(X,-,V)
-atomic_sub_return_relaxed(V,X) __atomic_op_return{once}(X,-,V)
-atomic_sub_return_acquire(V,X) __atomic_op_return{acquire}(X,-,V)
-atomic_sub_return_release(V,X) __atomic_op_return{release}(X,-,V)
-atomic_fetch_sub(V,X) __atomic_fetch_op{mb}(X,-,V)
-atomic_fetch_sub_relaxed(V,X) __atomic_fetch_op{once}(X,-,V)
-atomic_fetch_sub_acquire(V,X) __atomic_fetch_op{acquire}(X,-,V)
-atomic_fetch_sub_release(V,X) __atomic_fetch_op{release}(X,-,V)
-
-atomic_dec_return(X) __atomic_op_return{mb}(X,-,1)
-atomic_dec_return_relaxed(X) __atomic_op_return{once}(X,-,1)
-atomic_dec_return_acquire(X) __atomic_op_return{acquire}(X,-,1)
-atomic_dec_return_release(X) __atomic_op_return{release}(X,-,1)
-atomic_fetch_dec(X) __atomic_fetch_op{mb}(X,-,1)
-atomic_fetch_dec_relaxed(X) __atomic_fetch_op{once}(X,-,1)
-atomic_fetch_dec_acquire(X) __atomic_fetch_op{acquire}(X,-,1)
-atomic_fetch_dec_release(X) __atomic_fetch_op{release}(X,-,1)
-
-atomic_xchg(X,V) __xchg{mb}(X,V)
-atomic_xchg_relaxed(X,V) __xchg{once}(X,V)
-atomic_xchg_release(X,V) __xchg{release}(X,V)
-atomic_xchg_acquire(X,V) __xchg{acquire}(X,V)
-atomic_cmpxchg(X,V,W) __cmpxchg{mb}(X,V,W)
-atomic_cmpxchg_relaxed(X,V,W) __cmpxchg{once}(X,V,W)
-atomic_cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W)
-atomic_cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W)
-
-atomic_sub_and_test(V,X) __atomic_op_return{mb}(X,-,V) == 0
-atomic_dec_and_test(X) __atomic_op_return{mb}(X,-,1) == 0
-atomic_inc_and_test(X) __atomic_op_return{mb}(X,+,1) == 0
-atomic_add_negative(V,X) __atomic_op_return{mb}(X,+,V) < 0
+atomic_add(V,X) { __atomic_op{NORETURN}(X,+,V); }
+atomic_sub(V,X) { __atomic_op{NORETURN}(X,-,V); }
+atomic_and(V,X) { __atomic_op{NORETURN}(X,&,V); }
+atomic_or(V,X) { __atomic_op{NORETURN}(X,|,V); }
+atomic_xor(V,X) { __atomic_op{NORETURN}(X,^,V); }
+atomic_inc(X) { __atomic_op{NORETURN}(X,+,1); }
+atomic_dec(X) { __atomic_op{NORETURN}(X,-,1); }
+atomic_andnot(V,X) { __atomic_op{NORETURN}(X,&~,V); }
+
+atomic_add_return(V,X) __atomic_op_return{MB}(X,+,V)
+atomic_add_return_relaxed(V,X) __atomic_op_return{ONCE}(X,+,V)
+atomic_add_return_acquire(V,X) __atomic_op_return{ACQUIRE}(X,+,V)
+atomic_add_return_release(V,X) __atomic_op_return{RELEASE}(X,+,V)
+atomic_fetch_add(V,X) __atomic_fetch_op{MB}(X,+,V)
+atomic_fetch_add_relaxed(V,X) __atomic_fetch_op{ONCE}(X,+,V)
+atomic_fetch_add_acquire(V,X) __atomic_fetch_op{ACQUIRE}(X,+,V)
+atomic_fetch_add_release(V,X) __atomic_fetch_op{RELEASE}(X,+,V)
+
+atomic_fetch_and(V,X) __atomic_fetch_op{MB}(X,&,V)
+atomic_fetch_and_relaxed(V,X) __atomic_fetch_op{ONCE}(X,&,V)
+atomic_fetch_and_acquire(V,X) __atomic_fetch_op{ACQUIRE}(X,&,V)
+atomic_fetch_and_release(V,X) __atomic_fetch_op{RELEASE}(X,&,V)
+
+atomic_fetch_or(V,X) __atomic_fetch_op{MB}(X,|,V)
+atomic_fetch_or_relaxed(V,X) __atomic_fetch_op{ONCE}(X,|,V)
+atomic_fetch_or_acquire(V,X) __atomic_fetch_op{ACQUIRE}(X,|,V)
+atomic_fetch_or_release(V,X) __atomic_fetch_op{RELEASE}(X,|,V)
+
+atomic_fetch_xor(V,X) __atomic_fetch_op{MB}(X,^,V)
+atomic_fetch_xor_relaxed(V,X) __atomic_fetch_op{ONCE}(X,^,V)
+atomic_fetch_xor_acquire(V,X) __atomic_fetch_op{ACQUIRE}(X,^,V)
+atomic_fetch_xor_release(V,X) __atomic_fetch_op{RELEASE}(X,^,V)
+
+atomic_inc_return(X) __atomic_op_return{MB}(X,+,1)
+atomic_inc_return_relaxed(X) __atomic_op_return{ONCE}(X,+,1)
+atomic_inc_return_acquire(X) __atomic_op_return{ACQUIRE}(X,+,1)
+atomic_inc_return_release(X) __atomic_op_return{RELEASE}(X,+,1)
+atomic_fetch_inc(X) __atomic_fetch_op{MB}(X,+,1)
+atomic_fetch_inc_relaxed(X) __atomic_fetch_op{ONCE}(X,+,1)
+atomic_fetch_inc_acquire(X) __atomic_fetch_op{ACQUIRE}(X,+,1)
+atomic_fetch_inc_release(X) __atomic_fetch_op{RELEASE}(X,+,1)
+
+atomic_sub_return(V,X) __atomic_op_return{MB}(X,-,V)
+atomic_sub_return_relaxed(V,X) __atomic_op_return{ONCE}(X,-,V)
+atomic_sub_return_acquire(V,X) __atomic_op_return{ACQUIRE}(X,-,V)
+atomic_sub_return_release(V,X) __atomic_op_return{RELEASE}(X,-,V)
+atomic_fetch_sub(V,X) __atomic_fetch_op{MB}(X,-,V)
+atomic_fetch_sub_relaxed(V,X) __atomic_fetch_op{ONCE}(X,-,V)
+atomic_fetch_sub_acquire(V,X) __atomic_fetch_op{ACQUIRE}(X,-,V)
+atomic_fetch_sub_release(V,X) __atomic_fetch_op{RELEASE}(X,-,V)
+
+atomic_dec_return(X) __atomic_op_return{MB}(X,-,1)
+atomic_dec_return_relaxed(X) __atomic_op_return{ONCE}(X,-,1)
+atomic_dec_return_acquire(X) __atomic_op_return{ACQUIRE}(X,-,1)
+atomic_dec_return_release(X) __atomic_op_return{RELEASE}(X,-,1)
+atomic_fetch_dec(X) __atomic_fetch_op{MB}(X,-,1)
+atomic_fetch_dec_relaxed(X) __atomic_fetch_op{ONCE}(X,-,1)
+atomic_fetch_dec_acquire(X) __atomic_fetch_op{ACQUIRE}(X,-,1)
+atomic_fetch_dec_release(X) __atomic_fetch_op{RELEASE}(X,-,1)
+
+atomic_xchg(X,V) __xchg{MB}(X,V)
+atomic_xchg_relaxed(X,V) __xchg{ONCE}(X,V)
+atomic_xchg_release(X,V) __xchg{RELEASE}(X,V)
+atomic_xchg_acquire(X,V) __xchg{ACQUIRE}(X,V)
+atomic_cmpxchg(X,V,W) __cmpxchg{MB}(X,V,W)
+atomic_cmpxchg_relaxed(X,V,W) __cmpxchg{ONCE}(X,V,W)
+atomic_cmpxchg_acquire(X,V,W) __cmpxchg{ACQUIRE}(X,V,W)
+atomic_cmpxchg_release(X,V,W) __cmpxchg{RELEASE}(X,V,W)
+
+atomic_sub_and_test(V,X) __atomic_op_return{MB}(X,-,V) == 0
+atomic_dec_and_test(X) __atomic_op_return{MB}(X,-,1) == 0
+atomic_inc_and_test(X) __atomic_op_return{MB}(X,+,1) == 0
+atomic_add_negative(V,X) __atomic_op_return{MB}(X,+,V) < 0
+atomic_add_negative_relaxed(V,X) __atomic_op_return{ONCE}(X,+,V) < 0
+atomic_add_negative_acquire(V,X) __atomic_op_return{ACQUIRE}(X,+,V) < 0
+atomic_add_negative_release(V,X) __atomic_op_return{RELEASE}(X,+,V) < 0
+
+atomic_fetch_andnot(V,X) __atomic_fetch_op{MB}(X,&~,V)
+atomic_fetch_andnot_acquire(V,X) __atomic_fetch_op{ACQUIRE}(X,&~,V)
+atomic_fetch_andnot_release(V,X) __atomic_fetch_op{RELEASE}(X,&~,V)
+atomic_fetch_andnot_relaxed(V,X) __atomic_fetch_op{ONCE}(X,&~,V)
+
+atomic_add_unless(X,V,W) __atomic_add_unless{MB}(X,V,W)
diff --git a/tools/mm/page-types.c b/tools/mm/page-types.c
index bcac7ebfb51f..d7e5e8902af8 100644
--- a/tools/mm/page-types.c
+++ b/tools/mm/page-types.c
@@ -24,8 +24,8 @@
#include <signal.h>
#include <inttypes.h>
#include <sys/types.h>
-#include <sys/errno.h>
-#include <sys/fcntl.h>
+#include <errno.h>
+#include <fcntl.h>
#include <sys/mount.h>
#include <sys/statfs.h>
#include <sys/mman.h>
diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps
index 0712b5e82eb7..f3269ce39e5b 100644
--- a/tools/net/ynl/Makefile.deps
+++ b/tools/net/ynl/Makefile.deps
@@ -17,10 +17,13 @@ get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2)
CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h)
CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h)
CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \
- $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h)
+ $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) \
+ $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_GENERATED_H,ethtool_netlink_generated.h)
CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h)
CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h)
+CFLAGS_net_shaper:=$(call get_hdr_inc,_LINUX_NET_SHAPER_H,net_shaper.h)
CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h)
+CFLAGS_nl80211:=$(call get_hdr_inc,__LINUX_NL802121_H,nl80211.h)
CFLAGS_nlctrl:=$(call get_hdr_inc,__LINUX_GENERIC_NETLINK_H,genetlink.h)
CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h)
CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index e16cef160bc2..ce32cb35007d 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -95,7 +95,7 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
ynl_attr_for_each_payload(start, data_len, attr) {
astart_off = (char *)attr - (char *)start;
- aend_off = astart_off + ynl_attr_data_len(attr);
+ aend_off = (char *)ynl_attr_data_end(attr) - (char *)start;
if (aend_off <= off)
continue;
diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py
index 08f8bf89cfc2..dcc2c6b298d6 100644
--- a/tools/net/ynl/pyynl/lib/ynl.py
+++ b/tools/net/ynl/pyynl/lib/ynl.py
@@ -536,9 +536,11 @@ class YnlFamily(SpecFamily):
try:
return int(value)
except (ValueError, TypeError) as e:
- if 'enum' not in attr_spec:
- raise e
- return self._encode_enum(attr_spec, value)
+ if 'enum' in attr_spec:
+ return self._encode_enum(attr_spec, value)
+ if attr_spec.display_hint:
+ return self._from_string(value, attr_spec)
+ raise e
def _add_attr(self, space, name, value, search_attrs):
try:
@@ -571,7 +573,10 @@ class YnlFamily(SpecFamily):
if isinstance(value, bytes):
attr_payload = value
elif isinstance(value, str):
- attr_payload = bytes.fromhex(value)
+ if attr.display_hint:
+ attr_payload = self._from_string(value, attr)
+ else:
+ attr_payload = bytes.fromhex(value)
elif isinstance(value, dict) and attr.struct_name:
attr_payload = self._encode_struct(attr.struct_name, value)
else:
@@ -627,6 +632,11 @@ class YnlFamily(SpecFamily):
decoded = self._decode_struct(attr.raw, attr_spec.struct_name)
elif attr_spec.sub_type:
decoded = attr.as_c_array(attr_spec.sub_type)
+ if 'enum' in attr_spec:
+ decoded = [ self._decode_enum(x, attr_spec) for x in decoded ]
+ elif attr_spec.display_hint:
+ decoded = [ self._formatted_string(x, attr_spec.display_hint)
+ for x in decoded ]
else:
decoded = attr.as_bin()
if attr_spec.display_hint:
@@ -644,15 +654,17 @@ class YnlFamily(SpecFamily):
subattrs = self._decode(NlAttrs(item.raw), attr_spec['nested-attributes'])
decoded.append({ item.type: subattrs })
elif attr_spec["sub-type"] == 'binary':
- subattrs = item.as_bin()
+ subattr = item.as_bin()
if attr_spec.display_hint:
- subattrs = self._formatted_string(subattrs, attr_spec.display_hint)
- decoded.append(subattrs)
+ subattr = self._formatted_string(subattr, attr_spec.display_hint)
+ decoded.append(subattr)
elif attr_spec["sub-type"] in NlAttr.type_formats:
- subattrs = item.as_scalar(attr_spec['sub-type'], attr_spec.byte_order)
- if attr_spec.display_hint:
- subattrs = self._formatted_string(subattrs, attr_spec.display_hint)
- decoded.append(subattrs)
+ subattr = item.as_scalar(attr_spec['sub-type'], attr_spec.byte_order)
+ if 'enum' in attr_spec:
+ subattr = self._decode_enum(subattr, attr_spec)
+ elif attr_spec.display_hint:
+ subattr = self._formatted_string(subattr, attr_spec.display_hint)
+ decoded.append(subattr)
else:
raise Exception(f'Unknown {attr_spec["sub-type"]} with name {attr_spec["name"]}')
return decoded
@@ -899,6 +911,18 @@ class YnlFamily(SpecFamily):
formatted = raw
return formatted
+ def _from_string(self, string, attr_spec):
+ if attr_spec.display_hint in ['ipv4', 'ipv6']:
+ ip = ipaddress.ip_address(string)
+ if attr_spec['type'] == 'binary':
+ raw = ip.packed
+ else:
+ raw = int(ip)
+ else:
+ raise Exception(f"Display hint '{attr_spec.display_hint}' not implemented"
+ f" when parsing '{attr_spec['name']}'")
+ return raw
+
def handle_ntf(self, decoded):
msg = dict()
if self.include_raw:
diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index c2eabc90dce8..a1427c537030 100755
--- a/tools/net/ynl/pyynl/ynl_gen_c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -74,6 +74,8 @@ class Type(SpecAttr):
self.c_name = c_lower(self.name)
if self.c_name in _C_KW:
self.c_name += '_'
+ if self.c_name[0].isdigit():
+ self.c_name = '_' + self.c_name
# Added by resolve():
self.enum_name = None
@@ -100,7 +102,7 @@ class Type(SpecAttr):
if isinstance(value, int):
return value
if value in self.family.consts:
- raise Exception("Resolving family constants not implemented, yet")
+ return self.family.consts[value]["value"]
return limit_to_number(value)
def get_limit_str(self, limit, default=None, suffix=''):
@@ -110,6 +112,9 @@ class Type(SpecAttr):
if isinstance(value, int):
return str(value) + suffix
if value in self.family.consts:
+ const = self.family.consts[value]
+ if const.get('header'):
+ return c_upper(value)
return c_upper(f"{self.family['name']}-{value}")
return c_upper(value)
@@ -683,7 +688,10 @@ class TypeArrayNest(Type):
raise Exception(f"Sub-type {self.attr['sub-type']} not supported yet")
def _attr_typol(self):
- return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
+ if self.attr['sub-type'] in scalars:
+ return f'.type = YNL_PT_U{c_upper(self.sub_type[1:])}, '
+ else:
+ return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
def _attr_get(self, ri, var):
local_vars = ['const struct nlattr *attr2;']
@@ -885,7 +893,7 @@ class AttrSet(SpecAttrSet):
elif elem['type'] == 'nest':
t = TypeNest(self.family, self, elem, value)
elif elem['type'] == 'indexed-array' and 'sub-type' in elem:
- if elem["sub-type"] == 'nest':
+ if elem["sub-type"] in ['nest', 'u32']:
t = TypeArrayNest(self.family, self, elem, value)
else:
raise Exception(f'new_attr: unsupported sub-type {elem["sub-type"]}')
@@ -1437,7 +1445,7 @@ class CodeWriter:
self._ifdef_block = config_option
-scalars = {'u8', 'u16', 'u32', 'u64', 's32', 's64', 'uint', 'sint'}
+scalars = {'u8', 'u16', 'u32', 'u64', 's8', 's16', 's32', 's64', 'uint', 'sint'}
direction_to_suffix = {
'reply': '_rsp',
@@ -1669,6 +1677,9 @@ def _multi_parse(ri, struct, init_lines, local_vars):
if aspec["sub-type"] == 'nest':
local_vars.append(f'const struct nlattr *attr_{aspec.c_name};')
array_nests.add(arg)
+ elif aspec['sub-type'] in scalars:
+ local_vars.append(f'const struct nlattr *attr_{aspec.c_name};')
+ array_nests.add(arg)
else:
raise Exception(f'Not supported sub-type {aspec["sub-type"]}')
if 'multi-attr' in aspec:
@@ -1724,11 +1735,17 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};")
ri.cw.p('i = 0;')
- ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;")
+ if 'nested-attributes' in aspec:
+ ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;")
ri.cw.block_start(line=f"ynl_attr_for_each_nested(attr, attr_{aspec.c_name})")
- ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];")
- ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr, ynl_attr_type(attr)))")
- ri.cw.p('return YNL_PARSE_CB_ERROR;')
+ if 'nested-attributes' in aspec:
+ ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];")
+ ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr, ynl_attr_type(attr)))")
+ ri.cw.p('return YNL_PARSE_CB_ERROR;')
+ elif aspec.sub_type in scalars:
+ ri.cw.p(f"dst->{aspec.c_name}[i] = ynl_attr_get_{aspec.sub_type}(attr);")
+ else:
+ raise Exception(f"Nest parsing type not supported in {aspec['name']}")
ri.cw.p('i++;')
ri.cw.block_end()
ri.cw.block_end()
@@ -2549,6 +2566,9 @@ def render_uapi(family, cw):
defines = []
for const in family['definitions']:
+ if const.get('header'):
+ continue
+
if const['type'] != 'const':
cw.writes_defines(defines)
defines = []
diff --git a/tools/objtool/Documentation/objtool.txt b/tools/objtool/Documentation/objtool.txt
index 7c3ee959b63c..28ac57b9e102 100644
--- a/tools/objtool/Documentation/objtool.txt
+++ b/tools/objtool/Documentation/objtool.txt
@@ -28,6 +28,15 @@ Objtool has the following features:
sites, enabling the kernel to patch them inline, to prevent "thunk
funneling" for both security and performance reasons
+- Return thunk validation -- validates return thunks are used for
+ certain CPU mitigations including Retbleed and SRSO
+
+- Return thunk annotation -- annotates all return thunk sites so kernel
+ can patch them inline, depending on enabled mitigations
+
+- Return thunk training valiation -- validate that all entry paths
+ untrain a "safe return" before the first return (or call)
+
- Non-instrumentation validation -- validates non-instrumentable
("noinstr") code rules, preventing instrumentation in low-level C
entry code
@@ -53,6 +62,9 @@ Objtool has the following features:
- Function entry annotation -- annotates function entries, enabling
kernel function tracing
+- Function preamble (prefix) annotation and/or symbol generation -- used
+ for FineIBT and call depth tracking
+
- Other toolchain hacks which will go unmentioned at this time...
Each feature can be enabled individually or in combination using the
@@ -197,19 +209,17 @@ To achieve the validation, objtool enforces the following rules:
1. Each callable function must be annotated as such with the ELF
function type. In asm code, this is typically done using the
- ENTRY/ENDPROC macros. If objtool finds a return instruction
+ SYM_FUNC_{START,END} macros. If objtool finds a return instruction
outside of a function, it flags an error since that usually indicates
callable code which should be annotated accordingly.
This rule is needed so that objtool can properly identify each
callable function in order to analyze its stack metadata.
-2. Conversely, each section of code which is *not* callable should *not*
- be annotated as an ELF function. The ENDPROC macro shouldn't be used
- in this case.
-
- This rule is needed so that objtool can ignore non-callable code.
- Such code doesn't have to follow any of the other rules.
+2. Conversely, each section of code which is *not* callable, or is
+ otherwise doing funny things with the stack or registers, should
+ *not* be annotated as an ELF function. Rather, SYM_CODE_{START,END}
+ should be used along with unwind hints.
3. Each callable function which calls another function must have the
correct frame pointer logic, if required by CONFIG_FRAME_POINTER or
@@ -221,7 +231,7 @@ To achieve the validation, objtool enforces the following rules:
function B, the _caller_ of function A will be skipped on the stack
trace.
-4. Dynamic jumps and jumps to undefined symbols are only allowed if:
+4. Indirect jumps and jumps to undefined symbols are only allowed if:
a) the jump is part of a switch statement; or
@@ -304,29 +314,29 @@ the objtool maintainers.
001e 2823e: 80 ce 02 or $0x2,%dh
...
+
2. file.o: warning: objtool: .text+0x53: unreachable instruction
Objtool couldn't find a code path to reach the instruction.
If the error is for an asm file, and the instruction is inside (or
reachable from) a callable function, the function should be annotated
- with the ENTRY/ENDPROC macros (ENDPROC is the important one).
- Otherwise, the code should probably be annotated with the unwind hint
- macros in asm/unwind_hints.h so objtool and the unwinder can know the
- stack state associated with the code.
+ with the SYM_FUNC_START and SYM_FUNC_END macros.
- If you're 100% sure the code won't affect stack traces, or if you're
- a just a bad person, you can tell objtool to ignore it. See the
- "Adding exceptions" section below.
+ Otherwise, SYM_CODE_START can be used. In that case the code needs
+ to be annotated with unwind hint macros.
- If it's not actually in a callable function (e.g. kernel entry code),
- change ENDPROC to END.
+ If you're sure the code won't affect the reliability of runtime stack
+ traces and want objtool to ignore it, see "Adding exceptions" below.
-3. file.o: warning: objtool: foo+0x48c: bar() is missing a __noreturn annotation
- The call from foo() to bar() doesn't return, but bar() is missing the
- __noreturn annotation. NOTE: In addition to annotating the function
- with __noreturn, please also add it to tools/objtool/noreturns.h.
+3. file.o: warning: objtool: foo+0x48c: bar() missing __noreturn in .c/.h or NORETURN() in noreturns.h
+
+ The call from foo() to bar() doesn't return, but bar() is incorrectly
+ annotated. A noreturn function must be marked __noreturn in both its
+ declaration and its definition, and must have a NORETURN() annotation
+ in tools/objtool/noreturns.h.
+
4. file.o: warning: objtool: func(): can't find starting instruction
or
@@ -341,23 +351,21 @@ the objtool maintainers.
This is a kernel entry/exit instruction like sysenter or iret. Such
instructions aren't allowed in a callable function, and are most
- likely part of the kernel entry code. They should usually not have
- the callable function annotation (ENDPROC) and should always be
- annotated with the unwind hint macros in asm/unwind_hints.h.
+ likely part of the kernel entry code. Such code should probably be
+ placed in a SYM_FUNC_CODE block with unwind hints.
6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame
- This is a dynamic jump or a jump to an undefined symbol. Objtool
- assumed it's a sibling call and detected that the frame pointer
- wasn't first restored to its original state.
+ This is a branch to an UNDEF symbol. Objtool assumed it's a
+ sibling call and detected that the stack wasn't first restored to its
+ original state.
- If it's not really a sibling call, you may need to move the
- destination code to the local file.
+ If it's not really a sibling call, you may need to use unwind hints
+ and/or move the destination code to the local file.
If the instruction is not actually in a callable function (e.g.
- kernel entry code), change ENDPROC to END and annotate manually with
- the unwind hint macros in asm/unwind_hints.h.
+ kernel entry code), use SYM_CODE_{START,END} and unwind hints.
7. file: warning: objtool: func()+0x5c: stack state mismatch
@@ -373,8 +381,8 @@ the objtool maintainers.
Another possibility is that the code has some asm or inline asm which
does some unusual things to the stack or the frame pointer. In such
- cases it's probably appropriate to use the unwind hint macros in
- asm/unwind_hints.h.
+ cases it's probably appropriate to use SYM_FUNC_CODE with unwind
+ hints.
8. file.o: warning: objtool: funcA() falls through to next function funcB()
@@ -384,17 +392,16 @@ the objtool maintainers.
can fall through into the next function. There could be different
reasons for this:
- 1) funcA()'s last instruction is a call to a "noreturn" function like
+ a) funcA()'s last instruction is a call to a "noreturn" function like
panic(). In this case the noreturn function needs to be added to
objtool's hard-coded global_noreturns array. Feel free to bug the
objtool maintainer, or you can submit a patch.
- 2) funcA() uses the unreachable() annotation in a section of code
+ b) funcA() uses the unreachable() annotation in a section of code
that is actually reachable.
- 3) If funcA() calls an inline function, the object code for funcA()
- might be corrupt due to a gcc bug. For more details, see:
- https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646
+ c) Some undefined behavior like divide by zero.
+
9. file.o: warning: objtool: funcA() call to funcB() with UACCESS enabled
@@ -432,24 +439,26 @@ the objtool maintainers.
This limitation can be overcome by massaging the alternatives with
NOPs to shift the stack changes around so they no longer conflict.
+
11. file.o: warning: unannotated intra-function call
- This warning means that a direct call is done to a destination which
- is not at the beginning of a function. If this is a legit call, you
- can remove this warning by putting the ANNOTATE_INTRA_FUNCTION_CALL
- directive right before the call.
+ This warning means that a direct call is done to a destination which
+ is not at the beginning of a function. If this is a legit call, you
+ can remove this warning by putting the ANNOTATE_INTRA_FUNCTION_CALL
+ directive right before the call.
+
12. file.o: warning: func(): not an indirect call target
- This means that objtool is running with --ibt and a function expected
- to be an indirect call target is not. In particular, this happens for
- init_module() or cleanup_module() if a module relies on these special
- names and does not use module_init() / module_exit() macros to create
- them.
+ This means that objtool is running with --ibt and a function
+ expected to be an indirect call target is not. In particular, this
+ happens for init_module() or cleanup_module() if a module relies on
+ these special names and does not use module_init() / module_exit()
+ macros to create them.
If the error doesn't seem to make sense, it could be a bug in objtool.
-Feel free to ask the objtool maintainer for help.
+Feel free to ask objtool maintainers for help.
Adding exceptions
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index f56e27727534..8c20361dd100 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -37,7 +37,7 @@ OBJTOOL_CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBE
OBJTOOL_LDFLAGS := $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
# Allow old libelf to be used:
-elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(HOSTCC) $(OBJTOOL_CFLAGS) -x c -E - | grep elf_getshdr)
+elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(HOSTCC) $(OBJTOOL_CFLAGS) -x c -E - 2>/dev/null | grep elf_getshdr)
OBJTOOL_CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
# Always want host compilation.
@@ -46,12 +46,6 @@ HOST_OVERRIDES := CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)"
AWK = awk
MKDIR = mkdir
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
BUILD_ORC := n
ifeq ($(SRCARCH),x86)
diff --git a/tools/objtool/arch/loongarch/decode.c b/tools/objtool/arch/loongarch/decode.c
index 69b66994f2a1..02e490555966 100644
--- a/tools/objtool/arch/loongarch/decode.c
+++ b/tools/objtool/arch/loongarch/decode.c
@@ -5,10 +5,7 @@
#include <asm/inst.h>
#include <asm/orc_types.h>
#include <linux/objtool_types.h>
-
-#ifndef EM_LOONGARCH
-#define EM_LOONGARCH 258
-#endif
+#include <arch/elf.h>
int arch_ftrace_match(char *name)
{
@@ -363,3 +360,26 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state)
state->cfa.base = CFI_SP;
state->cfa.offset = 0;
}
+
+unsigned int arch_reloc_size(struct reloc *reloc)
+{
+ switch (reloc_type(reloc)) {
+ case R_LARCH_32:
+ case R_LARCH_32_PCREL:
+ return 4;
+ default:
+ return 8;
+ }
+}
+
+unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table)
+{
+ switch (reloc_type(reloc)) {
+ case R_LARCH_32_PCREL:
+ case R_LARCH_64_PCREL:
+ return reloc->sym->offset + reloc_addend(reloc) -
+ (reloc_offset(reloc) - reloc_offset(table));
+ default:
+ return reloc->sym->offset + reloc_addend(reloc);
+ }
+}
diff --git a/tools/objtool/arch/loongarch/include/arch/elf.h b/tools/objtool/arch/loongarch/include/arch/elf.h
index 9623d663220e..ec79062c9554 100644
--- a/tools/objtool/arch/loongarch/include/arch/elf.h
+++ b/tools/objtool/arch/loongarch/include/arch/elf.h
@@ -18,6 +18,13 @@
#ifndef R_LARCH_32_PCREL
#define R_LARCH_32_PCREL 99
#endif
+#ifndef R_LARCH_64_PCREL
+#define R_LARCH_64_PCREL 109
+#endif
+
+#ifndef EM_LOONGARCH
+#define EM_LOONGARCH 258
+#endif
#define R_NONE R_LARCH_NONE
#define R_ABS32 R_LARCH_32
diff --git a/tools/objtool/arch/loongarch/special.c b/tools/objtool/arch/loongarch/special.c
index 87230ed570fd..e39f86d97002 100644
--- a/tools/objtool/arch/loongarch/special.c
+++ b/tools/objtool/arch/loongarch/special.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
+#include <string.h>
#include <objtool/special.h>
+#include <objtool/warn.h>
bool arch_support_alt_relocation(struct special_alt *special_alt,
struct instruction *insn,
@@ -8,9 +10,164 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
return false;
}
+struct table_info {
+ struct list_head jump_info;
+ unsigned long insn_offset;
+ unsigned long rodata_offset;
+};
+
+static void get_rodata_table_size_by_table_annotate(struct objtool_file *file,
+ struct instruction *insn,
+ unsigned long *table_size)
+{
+ struct section *rsec;
+ struct reloc *reloc;
+ struct list_head table_list;
+ struct table_info *orig_table;
+ struct table_info *next_table;
+ unsigned long tmp_insn_offset;
+ unsigned long tmp_rodata_offset;
+
+ rsec = find_section_by_name(file->elf, ".rela.discard.tablejump_annotate");
+ if (!rsec)
+ return;
+
+ INIT_LIST_HEAD(&table_list);
+
+ for_each_reloc(rsec, reloc) {
+ orig_table = malloc(sizeof(struct table_info));
+ if (!orig_table) {
+ WARN("malloc failed");
+ return;
+ }
+
+ orig_table->insn_offset = reloc->sym->offset + reloc_addend(reloc);
+ reloc++;
+ orig_table->rodata_offset = reloc->sym->offset + reloc_addend(reloc);
+
+ list_add_tail(&orig_table->jump_info, &table_list);
+
+ if (reloc_idx(reloc) + 1 == sec_num_entries(rsec))
+ break;
+ }
+
+ list_for_each_entry(orig_table, &table_list, jump_info) {
+ next_table = list_next_entry(orig_table, jump_info);
+ list_for_each_entry_from(next_table, &table_list, jump_info) {
+ if (next_table->rodata_offset < orig_table->rodata_offset) {
+ tmp_insn_offset = next_table->insn_offset;
+ tmp_rodata_offset = next_table->rodata_offset;
+ next_table->insn_offset = orig_table->insn_offset;
+ next_table->rodata_offset = orig_table->rodata_offset;
+ orig_table->insn_offset = tmp_insn_offset;
+ orig_table->rodata_offset = tmp_rodata_offset;
+ }
+ }
+ }
+
+ list_for_each_entry(orig_table, &table_list, jump_info) {
+ if (insn->offset == orig_table->insn_offset) {
+ next_table = list_next_entry(orig_table, jump_info);
+ if (&next_table->jump_info == &table_list) {
+ *table_size = 0;
+ return;
+ }
+
+ while (next_table->rodata_offset == orig_table->rodata_offset) {
+ next_table = list_next_entry(next_table, jump_info);
+ if (&next_table->jump_info == &table_list) {
+ *table_size = 0;
+ return;
+ }
+ }
+
+ *table_size = next_table->rodata_offset - orig_table->rodata_offset;
+ }
+ }
+}
+
+static struct reloc *find_reloc_by_table_annotate(struct objtool_file *file,
+ struct instruction *insn,
+ unsigned long *table_size)
+{
+ struct section *rsec;
+ struct reloc *reloc;
+ unsigned long offset;
+
+ rsec = find_section_by_name(file->elf, ".rela.discard.tablejump_annotate");
+ if (!rsec)
+ return NULL;
+
+ for_each_reloc(rsec, reloc) {
+ if (reloc->sym->sec->rodata)
+ continue;
+
+ if (strcmp(insn->sec->name, reloc->sym->sec->name))
+ continue;
+
+ offset = reloc->sym->offset + reloc_addend(reloc);
+ if (insn->offset == offset) {
+ get_rodata_table_size_by_table_annotate(file, insn, table_size);
+ reloc++;
+ return reloc;
+ }
+ }
+
+ return NULL;
+}
+
+static struct reloc *find_reloc_of_rodata_c_jump_table(struct section *sec,
+ unsigned long offset,
+ unsigned long *table_size)
+{
+ struct section *rsec;
+ struct reloc *reloc;
+
+ rsec = sec->rsec;
+ if (!rsec)
+ return NULL;
+
+ for_each_reloc(rsec, reloc) {
+ if (reloc_offset(reloc) > offset)
+ break;
+
+ if (!strcmp(reloc->sym->sec->name, C_JUMP_TABLE_SECTION)) {
+ *table_size = 0;
+ return reloc;
+ }
+ }
+
+ return NULL;
+}
+
struct reloc *arch_find_switch_table(struct objtool_file *file,
struct instruction *insn,
unsigned long *table_size)
{
- return NULL;
+ struct reloc *annotate_reloc;
+ struct reloc *rodata_reloc;
+ struct section *table_sec;
+ unsigned long table_offset;
+
+ annotate_reloc = find_reloc_by_table_annotate(file, insn, table_size);
+ if (!annotate_reloc) {
+ annotate_reloc = find_reloc_of_rodata_c_jump_table(
+ insn->sec, insn->offset, table_size);
+ if (!annotate_reloc)
+ return NULL;
+ }
+
+ table_sec = annotate_reloc->sym->sec;
+ table_offset = annotate_reloc->sym->offset + reloc_addend(annotate_reloc);
+
+ /*
+ * Each table entry has a rela associated with it. The rela
+ * should reference text in the same function as the original
+ * instruction.
+ */
+ rodata_reloc = find_reloc_by_dest(file->elf, table_sec, table_offset);
+ if (!rodata_reloc)
+ return NULL;
+
+ return rodata_reloc;
}
diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c
index 53b55690f320..c851c51d4bd3 100644
--- a/tools/objtool/arch/powerpc/decode.c
+++ b/tools/objtool/arch/powerpc/decode.c
@@ -55,12 +55,17 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
switch (opcode) {
case 18: /* b[l][a] */
- if ((ins & 3) == 1) /* bl */
+ if (ins == 0x48000005) /* bl .+4 */
+ typ = INSN_OTHER;
+ else if (ins & 1) /* bl[a] */
typ = INSN_CALL;
+ else /* b[a] */
+ typ = INSN_JUMP_UNCONDITIONAL;
imm = ins & 0x3fffffc;
if (imm & 0x2000000)
imm -= 0x4000000;
+ imm |= ins & 2; /* AA flag */
break;
}
@@ -77,6 +82,9 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
unsigned long arch_jump_destination(struct instruction *insn)
{
+ if (insn->immediate & 2)
+ return insn->immediate & ~2;
+
return insn->offset + insn->immediate;
}
@@ -106,3 +114,17 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state)
state->regs[CFI_RA].base = CFI_CFA;
state->regs[CFI_RA].offset = 0;
}
+
+unsigned int arch_reloc_size(struct reloc *reloc)
+{
+ switch (reloc_type(reloc)) {
+ case R_PPC_REL32:
+ case R_PPC_ADDR32:
+ case R_PPC_UADDR32:
+ case R_PPC_PLT32:
+ case R_PPC_PLTREL32:
+ return 4;
+ default:
+ return 8;
+ }
+}
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index fe1362c34564..7567c893f45e 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -850,5 +850,19 @@ bool arch_is_rethunk(struct symbol *sym)
bool arch_is_embedded_insn(struct symbol *sym)
{
return !strcmp(sym->name, "retbleed_return_thunk") ||
+ !strcmp(sym->name, "srso_alias_safe_ret") ||
!strcmp(sym->name, "srso_safe_ret");
}
+
+unsigned int arch_reloc_size(struct reloc *reloc)
+{
+ switch (reloc_type(reloc)) {
+ case R_X86_64_32:
+ case R_X86_64_32S:
+ case R_X86_64_PC32:
+ case R_X86_64_PLT32:
+ return 4;
+ default:
+ return 8;
+ }
+}
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 387d56a7f5fb..5f761f420b8c 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -6,6 +6,10 @@
#include <subcmd/parse-options.h>
#include <string.h>
#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/sendfile.h>
#include <objtool/builtin.h>
#include <objtool/objtool.h>
@@ -14,6 +18,8 @@
"error: objtool: " format "\n", \
##__VA_ARGS__)
+const char *objname;
+
struct opts opts;
static const char * const check_usage[] = {
@@ -71,7 +77,7 @@ static const struct option check_options[] = {
OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"),
OPT_BOOLEAN('m', "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"),
OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"),
- OPT_BOOLEAN('o', "orc", &opts.orc, "generate ORC metadata"),
+ OPT_BOOLEAN(0, "orc", &opts.orc, "generate ORC metadata"),
OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
OPT_BOOLEAN(0, "rethunk", &opts.rethunk, "validate and annotate rethunk usage"),
OPT_BOOLEAN(0, "unret", &opts.unret, "validate entry unret placement"),
@@ -84,16 +90,17 @@ static const struct option check_options[] = {
OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump),
OPT_GROUP("Options:"),
- OPT_BOOLEAN(0, "backtrace", &opts.backtrace, "unwind on error"),
- OPT_BOOLEAN(0, "backup", &opts.backup, "create .orig files before modification"),
- OPT_BOOLEAN(0, "dry-run", &opts.dryrun, "don't write modifications"),
- OPT_BOOLEAN(0, "link", &opts.link, "object is a linked object"),
- OPT_BOOLEAN(0, "module", &opts.module, "object is part of a kernel module"),
- OPT_BOOLEAN(0, "mnop", &opts.mnop, "nop out mcount call sites"),
- OPT_BOOLEAN(0, "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"),
- OPT_BOOLEAN(0, "sec-address", &opts.sec_address, "print section addresses in warnings"),
- OPT_BOOLEAN(0, "stats", &opts.stats, "print statistics"),
+ OPT_BOOLEAN(0, "backtrace", &opts.backtrace, "unwind on error"),
+ OPT_BOOLEAN(0, "dry-run", &opts.dryrun, "don't write modifications"),
+ OPT_BOOLEAN(0, "link", &opts.link, "object is a linked object"),
+ OPT_BOOLEAN(0, "module", &opts.module, "object is part of a kernel module"),
+ OPT_BOOLEAN(0, "mnop", &opts.mnop, "nop out mcount call sites"),
+ OPT_BOOLEAN(0, "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"),
+ OPT_STRING('o', "output", &opts.output, "file", "output file name"),
+ OPT_BOOLEAN(0, "sec-address", &opts.sec_address, "print section addresses in warnings"),
+ OPT_BOOLEAN(0, "stats", &opts.stats, "print statistics"),
OPT_BOOLEAN('v', "verbose", &opts.verbose, "verbose warnings"),
+ OPT_BOOLEAN(0, "Werror", &opts.werror, "return error on warnings"),
OPT_END(),
};
@@ -131,6 +138,26 @@ int cmd_parse_options(int argc, const char **argv, const char * const usage[])
static bool opts_valid(void)
{
+ if (opts.mnop && !opts.mcount) {
+ ERROR("--mnop requires --mcount");
+ return false;
+ }
+
+ if (opts.noinstr && !opts.link) {
+ ERROR("--noinstr requires --link");
+ return false;
+ }
+
+ if (opts.ibt && !opts.link) {
+ ERROR("--ibt requires --link");
+ return false;
+ }
+
+ if (opts.unret && !opts.link) {
+ ERROR("--unret requires --link");
+ return false;
+ }
+
if (opts.hack_jump_label ||
opts.hack_noinstr ||
opts.ibt ||
@@ -151,11 +178,6 @@ static bool opts_valid(void)
return true;
}
- if (opts.unret && !opts.rethunk) {
- ERROR("--unret requires --rethunk");
- return false;
- }
-
if (opts.dump_orc)
return true;
@@ -163,76 +185,156 @@ static bool opts_valid(void)
return false;
}
-static bool mnop_opts_valid(void)
+static int copy_file(const char *src, const char *dst)
{
- if (opts.mnop && !opts.mcount) {
- ERROR("--mnop requires --mcount");
- return false;
+ size_t to_copy, copied;
+ int dst_fd, src_fd;
+ struct stat stat;
+ off_t offset = 0;
+
+ src_fd = open(src, O_RDONLY);
+ if (src_fd == -1) {
+ ERROR("can't open '%s' for reading", src);
+ return 1;
}
- return true;
-}
-
-static bool link_opts_valid(struct objtool_file *file)
-{
- if (opts.link)
- return true;
+ dst_fd = open(dst, O_WRONLY | O_CREAT | O_TRUNC, 0400);
+ if (dst_fd == -1) {
+ ERROR("can't open '%s' for writing", dst);
+ return 1;
+ }
- if (has_multiple_files(file->elf)) {
- ERROR("Linked object detected, forcing --link");
- opts.link = true;
- return true;
+ if (fstat(src_fd, &stat) == -1) {
+ perror("fstat");
+ return 1;
}
- if (opts.noinstr) {
- ERROR("--noinstr requires --link");
- return false;
+ if (fchmod(dst_fd, stat.st_mode) == -1) {
+ perror("fchmod");
+ return 1;
}
- if (opts.ibt) {
- ERROR("--ibt requires --link");
- return false;
+ for (to_copy = stat.st_size; to_copy > 0; to_copy -= copied) {
+ copied = sendfile(dst_fd, src_fd, &offset, to_copy);
+ if (copied == -1) {
+ perror("sendfile");
+ return 1;
+ }
}
- if (opts.unret) {
- ERROR("--unret requires --link");
- return false;
+ close(dst_fd);
+ close(src_fd);
+ return 0;
+}
+
+static char **save_argv(int argc, const char **argv)
+{
+ char **orig_argv;
+
+ orig_argv = calloc(argc, sizeof(char *));
+ if (!orig_argv) {
+ perror("calloc");
+ return NULL;
}
- return true;
+ for (int i = 0; i < argc; i++) {
+ orig_argv[i] = strdup(argv[i]);
+ if (!orig_argv[i]) {
+ perror("strdup");
+ return NULL;
+ }
+ };
+
+ return orig_argv;
}
+#define ORIG_SUFFIX ".orig"
+
int objtool_run(int argc, const char **argv)
{
- const char *objname;
struct objtool_file *file;
- int ret;
+ char *backup = NULL;
+ char **orig_argv;
+ int ret = 0;
- argc = cmd_parse_options(argc, argv, check_usage);
- objname = argv[0];
+ orig_argv = save_argv(argc, argv);
+ if (!orig_argv)
+ return 1;
+
+ cmd_parse_options(argc, argv, check_usage);
if (!opts_valid())
return 1;
+ objname = argv[0];
+
if (opts.dump_orc)
return orc_dump(objname);
+ if (!opts.dryrun && opts.output) {
+ /* copy original .o file to output file */
+ if (copy_file(objname, opts.output))
+ return 1;
+
+ /* from here on, work directly on the output file */
+ objname = opts.output;
+ }
+
file = objtool_open_read(objname);
if (!file)
- return 1;
+ goto err;
- if (!mnop_opts_valid())
- return 1;
-
- if (!link_opts_valid(file))
- return 1;
+ if (!opts.link && has_multiple_files(file->elf)) {
+ ERROR("Linked object requires --link");
+ goto err;
+ }
ret = check(file);
if (ret)
- return ret;
+ goto err;
- if (file->elf->changed)
- return elf_write(file->elf);
+ if (!opts.dryrun && file->elf->changed && elf_write(file->elf))
+ goto err;
return 0;
+
+err:
+ if (opts.dryrun)
+ goto err_msg;
+
+ if (opts.output) {
+ unlink(opts.output);
+ goto err_msg;
+ }
+
+ /*
+ * Make a backup before kbuild deletes the file so the error
+ * can be recreated without recompiling or relinking.
+ */
+ backup = malloc(strlen(objname) + strlen(ORIG_SUFFIX) + 1);
+ if (!backup) {
+ perror("malloc");
+ return 1;
+ }
+
+ strcpy(backup, objname);
+ strcat(backup, ORIG_SUFFIX);
+ if (copy_file(objname, backup))
+ return 1;
+
+err_msg:
+ fprintf(stderr, "%s", orig_argv[0]);
+
+ for (int i = 1; i < argc; i++) {
+ char *arg = orig_argv[i];
+
+ if (backup && !strcmp(arg, objname))
+ fprintf(stderr, " %s -o %s", backup, objname);
+ else
+ fprintf(stderr, " %s", arg);
+ }
+
+ fprintf(stderr, "\n");
+
+ return 1;
}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 753dbc4f8198..ca3435acc326 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -227,6 +227,7 @@ static bool is_rust_noreturn(const struct symbol *func)
str_ends_with(func->name, "_4core9panicking18panic_bounds_check") ||
str_ends_with(func->name, "_4core9panicking19assert_failed_inner") ||
str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") ||
+ strstr(func->name, "_4core9panicking13assert_failed") ||
strstr(func->name, "_4core9panicking11panic_const24panic_const_") ||
(strstr(func->name, "_4core5slice5index24slice_") &&
str_ends_with(func->name, "_fail"));
@@ -1283,15 +1284,6 @@ static void annotate_call_site(struct objtool_file *file,
if (!sym)
sym = reloc->sym;
- /*
- * Alternative replacement code is just template code which is
- * sometimes copied to the original instruction. For now, don't
- * annotate it. (In the future we might consider annotating the
- * original instruction if/when it ever makes sense to do so.)
- */
- if (!strcmp(insn->sec->name, ".altinstr_replacement"))
- return;
-
if (sym->static_call_tramp) {
list_add_tail(&insn->call_node, &file->static_call_list);
return;
@@ -1349,7 +1341,8 @@ static void annotate_call_site(struct objtool_file *file,
return;
}
- if (insn->type == INSN_CALL && !insn->sec->init)
+ if (insn->type == INSN_CALL && !insn->sec->init &&
+ !insn->_call_dest->embedded_insn)
list_add_tail(&insn->call_node, &file->call_list);
if (!sibling && dead_end_function(file, sym))
@@ -1943,6 +1936,11 @@ out:
return ret;
}
+__weak unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table)
+{
+ return reloc->sym->offset + reloc_addend(reloc);
+}
+
static int add_jump_table(struct objtool_file *file, struct instruction *insn,
struct reloc *next_table)
{
@@ -1953,6 +1951,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
unsigned int prev_offset = 0;
struct reloc *reloc = table;
struct alternative *alt;
+ unsigned long sym_offset;
/*
* Each @reloc is a switch table relocation which points to the target
@@ -1967,15 +1966,24 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
break;
/* Make sure the table entries are consecutive: */
- if (prev_offset && reloc_offset(reloc) != prev_offset + 8)
+ if (prev_offset && reloc_offset(reloc) != prev_offset + arch_reloc_size(reloc))
break;
+ sym_offset = arch_jump_table_sym_offset(reloc, table);
+
/* Detect function pointers from contiguous objects: */
- if (reloc->sym->sec == pfunc->sec &&
- reloc_addend(reloc) == pfunc->offset)
+ if (reloc->sym->sec == pfunc->sec && sym_offset == pfunc->offset)
break;
- dest_insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
+ /*
+ * Clang sometimes leaves dangling unused jump table entries
+ * which point to the end of the function. Ignore them.
+ */
+ if (reloc->sym->sec == pfunc->sec &&
+ sym_offset == pfunc->offset + pfunc->len)
+ goto next;
+
+ dest_insn = find_insn(file, reloc->sym->sec, sym_offset);
if (!dest_insn)
break;
@@ -1992,6 +2000,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
alt->insn = dest_insn;
alt->next = insn->alts;
insn->alts = alt;
+next:
prev_offset = reloc_offset(reloc);
}
@@ -2013,6 +2022,7 @@ static void find_jump_table(struct objtool_file *file, struct symbol *func,
struct reloc *table_reloc;
struct instruction *dest_insn, *orig_insn = insn;
unsigned long table_size;
+ unsigned long sym_offset;
/*
* Backward search using the @first_jump_src links, these help avoid
@@ -2036,7 +2046,10 @@ static void find_jump_table(struct objtool_file *file, struct symbol *func,
table_reloc = arch_find_switch_table(file, insn, &table_size);
if (!table_reloc)
continue;
- dest_insn = find_insn(file, table_reloc->sym->sec, reloc_addend(table_reloc));
+
+ sym_offset = table_reloc->sym->offset + reloc_addend(table_reloc);
+
+ dest_insn = find_insn(file, table_reloc->sym->sec, sym_offset);
if (!dest_insn || !insn_func(dest_insn) || insn_func(dest_insn)->pfunc != func)
continue;
@@ -2264,7 +2277,7 @@ static int read_annotate(struct objtool_file *file,
if (sec->sh.sh_entsize != 8) {
static bool warned = false;
- if (!warned) {
+ if (!warned && opts.verbose) {
WARN("%s: dodgy linker, sh_entsize != 8", sec->name);
warned = true;
}
@@ -2462,13 +2475,14 @@ static void mark_rodata(struct objtool_file *file)
*
* - .rodata: can contain GCC switch tables
* - .rodata.<func>: same, if -fdata-sections is being used
- * - .rodata..c_jump_table: contains C annotated jump tables
+ * - .data.rel.ro.c_jump_table: contains C annotated jump tables
*
* .rodata.str1.* sections are ignored; they don't contain jump tables.
*/
for_each_sec(file, sec) {
- if (!strncmp(sec->name, ".rodata", 7) &&
- !strstr(sec->name, ".str1.")) {
+ if ((!strncmp(sec->name, ".rodata", 7) &&
+ !strstr(sec->name, ".str1.")) ||
+ !strncmp(sec->name, ".data.rel.ro", 12)) {
sec->rodata = true;
found = true;
}
@@ -4438,35 +4452,6 @@ static int validate_sls(struct objtool_file *file)
return warnings;
}
-static bool ignore_noreturn_call(struct instruction *insn)
-{
- struct symbol *call_dest = insn_call_dest(insn);
-
- /*
- * FIXME: hack, we need a real noreturn solution
- *
- * Problem is, exc_double_fault() may or may not return, depending on
- * whether CONFIG_X86_ESPFIX64 is set. But objtool has no visibility
- * to the kernel config.
- *
- * Other potential ways to fix it:
- *
- * - have compiler communicate __noreturn functions somehow
- * - remove CONFIG_X86_ESPFIX64
- * - read the .config file
- * - add a cmdline option
- * - create a generic objtool annotation format (vs a bunch of custom
- * formats) and annotate it
- */
- if (!strcmp(call_dest->name, "exc_double_fault")) {
- /* prevent further unreachable warnings for the caller */
- insn->sym->warned = 1;
- return true;
- }
-
- return false;
-}
-
static int validate_reachable_instructions(struct objtool_file *file)
{
struct instruction *insn, *prev_insn;
@@ -4483,8 +4468,8 @@ static int validate_reachable_instructions(struct objtool_file *file)
prev_insn = prev_insn_same_sec(file, insn);
if (prev_insn && prev_insn->dead_end) {
call_dest = insn_call_dest(prev_insn);
- if (call_dest && !ignore_noreturn_call(prev_insn)) {
- WARN_INSN(insn, "%s() is missing a __noreturn annotation",
+ if (call_dest) {
+ WARN_INSN(insn, "%s() missing __noreturn in .c/.h or NORETURN() in noreturns.h",
call_dest->name);
warnings++;
continue;
@@ -4554,7 +4539,7 @@ static int disas_warned_funcs(struct objtool_file *file)
char *funcs = NULL, *tmp;
for_each_sym(file, sym) {
- if (sym->warned) {
+ if (sym->warnings) {
if (!funcs) {
funcs = malloc(strlen(sym->name) + 1);
strcpy(funcs, sym->name);
@@ -4612,8 +4597,10 @@ int check(struct objtool_file *file)
init_cfi_state(&force_undefined_cfi);
force_undefined_cfi.force_undefined = true;
- if (!cfi_hash_alloc(1UL << (file->elf->symbol_bits - 3)))
+ if (!cfi_hash_alloc(1UL << (file->elf->symbol_bits - 3))) {
+ ret = -1;
goto out;
+ }
cfi_hash_add(&init_cfi);
cfi_hash_add(&func_cfi);
@@ -4630,7 +4617,7 @@ int check(struct objtool_file *file)
if (opts.retpoline) {
ret = validate_retpoline(file);
if (ret < 0)
- return ret;
+ goto out;
warnings += ret;
}
@@ -4666,7 +4653,7 @@ int check(struct objtool_file *file)
*/
ret = validate_unrets(file);
if (ret < 0)
- return ret;
+ goto out;
warnings += ret;
}
@@ -4729,7 +4716,7 @@ int check(struct objtool_file *file)
if (opts.prefix) {
ret = add_prefix_symbols(file);
if (ret < 0)
- return ret;
+ goto out;
warnings += ret;
}
@@ -4761,9 +4748,18 @@ int check(struct objtool_file *file)
out:
/*
- * For now, don't fail the kernel build on fatal warnings. These
- * errors are still fairly common due to the growing matrix of
- * supported toolchains and their recent pace of change.
+ * CONFIG_OBJTOOL_WERROR upgrades all warnings (and errors) to actual
+ * errors.
+ *
+ * Note that even "fatal" type errors don't actually return an error
+ * without CONFIG_OBJTOOL_WERROR. That probably needs improved at some
+ * point.
*/
+ if (opts.werror && (ret || warnings)) {
+ if (warnings)
+ WARN("%d warning(s) upgraded to errors", warnings);
+ return 1;
+ }
+
return 0;
}
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 6f64d611faea..be4f4b62730c 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -1302,9 +1302,6 @@ int elf_write(struct elf *elf)
struct section *sec;
Elf_Scn *s;
- if (opts.dryrun)
- return 0;
-
/* Update changed relocation sections and section headers: */
list_for_each_entry(sec, &elf->sections, list) {
if (sec->truncate)
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index d63b46a19f39..089a1acc48a8 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -97,4 +97,7 @@ int arch_rewrite_retpolines(struct objtool_file *file);
bool arch_pc_relative_reloc(struct reloc *reloc);
+unsigned int arch_reloc_size(struct reloc *reloc);
+unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table);
+
#endif /* _ARCH_H */
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index fcca6662c8b4..0fafd0f7a209 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -29,15 +29,16 @@ struct opts {
/* options: */
bool backtrace;
- bool backup;
bool dryrun;
bool link;
bool mnop;
bool module;
bool no_unreachable;
+ const char *output;
bool sec_address;
bool stats;
bool verbose;
+ bool werror;
};
extern struct opts opts;
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index d7e815c2fd15..223ac1c24b90 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -65,10 +65,10 @@ struct symbol {
u8 return_thunk : 1;
u8 fentry : 1;
u8 profiling_func : 1;
- u8 warned : 1;
u8 embedded_insn : 1;
u8 local_label : 1;
u8 frame_pointer : 1;
+ u8 warnings : 2;
struct list_head pv_target;
struct reloc *relocs;
};
diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h
index e7ee7ffccefd..e049679bb17b 100644
--- a/tools/objtool/include/objtool/special.h
+++ b/tools/objtool/include/objtool/special.h
@@ -10,7 +10,7 @@
#include <objtool/check.h>
#include <objtool/elf.h>
-#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table"
+#define C_JUMP_TABLE_SECTION ".data.rel.ro.c_jump_table"
struct special_alt {
struct list_head list;
diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h
index ac04d3fe4dd9..e72b9d630551 100644
--- a/tools/objtool/include/objtool/warn.h
+++ b/tools/objtool/include/objtool/warn.h
@@ -43,8 +43,10 @@ static inline char *offstr(struct section *sec, unsigned long offset)
#define WARN(format, ...) \
fprintf(stderr, \
- "%s: warning: objtool: " format "\n", \
- objname, ##__VA_ARGS__)
+ "%s: %s: objtool: " format "\n", \
+ objname, \
+ opts.werror ? "error" : "warning", \
+ ##__VA_ARGS__)
#define WARN_FUNC(format, sec, offset, ...) \
({ \
@@ -53,14 +55,22 @@ static inline char *offstr(struct section *sec, unsigned long offset)
free(_str); \
})
+#define WARN_LIMIT 2
+
#define WARN_INSN(insn, format, ...) \
({ \
struct instruction *_insn = (insn); \
- if (!_insn->sym || !_insn->sym->warned) \
+ BUILD_BUG_ON(WARN_LIMIT > 2); \
+ if (!_insn->sym || _insn->sym->warnings < WARN_LIMIT) { \
WARN_FUNC(format, _insn->sec, _insn->offset, \
##__VA_ARGS__); \
- if (_insn->sym) \
- _insn->sym->warned = 1; \
+ if (_insn->sym) \
+ _insn->sym->warnings++; \
+ } else if (_insn->sym && _insn->sym->warnings == WARN_LIMIT) { \
+ WARN_FUNC("skipping duplicate warning(s)", \
+ _insn->sec, _insn->offset); \
+ _insn->sym->warnings++; \
+ } \
})
#define BT_INSN(insn, format, ...) \
diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h
index b2174894f9f7..eacfe3b0a8d1 100644
--- a/tools/objtool/noreturns.h
+++ b/tools/objtool/noreturns.h
@@ -16,10 +16,11 @@ NORETURN(__tdx_hypercall_failed)
NORETURN(__ubsan_handle_builtin_unreachable)
NORETURN(__x64_sys_exit)
NORETURN(__x64_sys_exit_group)
+NORETURN(acpi_processor_ffh_play_dead)
NORETURN(arch_cpu_idle_dead)
NORETURN(bch2_trans_in_restart_error)
NORETURN(bch2_trans_restart_error)
-NORETURN(bch2_trans_unlocked_error)
+NORETURN(bch2_trans_unlocked_or_in_restart_error)
NORETURN(cpu_bringup_and_idle)
NORETURN(cpu_startup_entry)
NORETURN(do_exit)
@@ -34,6 +35,7 @@ NORETURN(kunit_try_catch_throw)
NORETURN(machine_real_restart)
NORETURN(make_task_dead)
NORETURN(mpt_halt_firmware)
+NORETURN(mwait_play_dead)
NORETURN(nmi_panic_self_stop)
NORETURN(panic)
NORETURN(panic_smp_self_stop)
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index f40febdd6e36..1c73fb62fd57 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -18,87 +18,19 @@
bool help;
-const char *objname;
static struct objtool_file file;
-static bool objtool_create_backup(const char *_objname)
+struct objtool_file *objtool_open_read(const char *filename)
{
- int len = strlen(_objname);
- char *buf, *base, *name = malloc(len+6);
- int s, d, l, t;
-
- if (!name) {
- perror("failed backup name malloc");
- return false;
- }
-
- strcpy(name, _objname);
- strcpy(name + len, ".orig");
-
- d = open(name, O_CREAT|O_WRONLY|O_TRUNC, 0644);
- if (d < 0) {
- perror("failed to create backup file");
- return false;
- }
-
- s = open(_objname, O_RDONLY);
- if (s < 0) {
- perror("failed to open orig file");
- return false;
- }
-
- buf = malloc(4096);
- if (!buf) {
- perror("failed backup data malloc");
- return false;
- }
-
- while ((l = read(s, buf, 4096)) > 0) {
- base = buf;
- do {
- t = write(d, base, l);
- if (t < 0) {
- perror("failed backup write");
- return false;
- }
- base += t;
- l -= t;
- } while (l);
- }
-
- if (l < 0) {
- perror("failed backup read");
- return false;
- }
-
- free(name);
- free(buf);
- close(d);
- close(s);
-
- return true;
-}
-
-struct objtool_file *objtool_open_read(const char *_objname)
-{
- if (objname) {
- if (strcmp(objname, _objname)) {
- WARN("won't handle more than one file at a time");
- return NULL;
- }
- return &file;
+ if (file.elf) {
+ WARN("won't handle more than one file at a time");
+ return NULL;
}
- objname = _objname;
- file.elf = elf_open_read(objname, O_RDWR);
+ file.elf = elf_open_read(filename, O_RDWR);
if (!file.elf)
return NULL;
- if (opts.backup && !objtool_create_backup(objname)) {
- WARN("can't create backup file");
- return NULL;
- }
-
hash_init(file.insn_hash);
INIT_LIST_HEAD(&file.retpoline_call_list);
INIT_LIST_HEAD(&file.return_thunk_list);
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index a62247efb64f..05ef0e297837 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -10,7 +10,7 @@
#include <objtool/warn.h>
#include <objtool/endianness.h>
-int orc_dump(const char *_objname)
+int orc_dump(const char *filename)
{
int fd, nr_entries, i, *orc_ip = NULL, orc_size = 0;
struct orc_entry *orc = NULL;
@@ -26,12 +26,9 @@ int orc_dump(const char *_objname)
Elf_Data *data, *symtab = NULL, *rela_orc_ip = NULL;
struct elf dummy_elf = {};
-
- objname = _objname;
-
elf_version(EV_CURRENT);
- fd = open(objname, O_RDONLY);
+ fd = open(filename, O_RDONLY);
if (fd == -1) {
perror("open");
return -1;
diff --git a/tools/pci/Build b/tools/pci/Build
deleted file mode 100644
index c375aea21790..000000000000
--- a/tools/pci/Build
+++ /dev/null
@@ -1 +0,0 @@
-pcitest-y += pcitest.o
diff --git a/tools/pci/Makefile b/tools/pci/Makefile
deleted file mode 100644
index 62d41f1a1e2c..000000000000
--- a/tools/pci/Makefile
+++ /dev/null
@@ -1,58 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-include ../scripts/Makefile.include
-
-bindir ?= /usr/bin
-
-ifeq ($(srctree),)
-srctree := $(patsubst %/,%,$(dir $(CURDIR)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-endif
-
-# Do not use make's built-in rules
-# (this improves performance and avoids hard-to-debug behaviour);
-MAKEFLAGS += -r
-
-CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
-
-ALL_TARGETS := pcitest
-ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
-
-SCRIPTS := pcitest.sh
-
-all: $(ALL_PROGRAMS)
-
-export srctree OUTPUT CC LD CFLAGS
-include $(srctree)/tools/build/Makefile.include
-
-#
-# We need the following to be outside of kernel tree
-#
-$(OUTPUT)include/linux/: ../../include/uapi/linux/
- mkdir -p $(OUTPUT)include/linux/ 2>&1 || true
- ln -sf $(CURDIR)/../../include/uapi/linux/pcitest.h $@
-
-prepare: $(OUTPUT)include/linux/
-
-PCITEST_IN := $(OUTPUT)pcitest-in.o
-$(PCITEST_IN): prepare FORCE
- $(Q)$(MAKE) $(build)=pcitest
-$(OUTPUT)pcitest: $(PCITEST_IN)
- $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
-
-clean:
- rm -f $(ALL_PROGRAMS)
- rm -rf $(OUTPUT)include/
- find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
-
-install: $(ALL_PROGRAMS)
- install -d -m 755 $(DESTDIR)$(bindir); \
- for program in $(ALL_PROGRAMS); do \
- install $$program $(DESTDIR)$(bindir); \
- done; \
- for script in $(SCRIPTS); do \
- install $$script $(DESTDIR)$(bindir); \
- done
-
-FORCE:
-
-.PHONY: all install clean FORCE prepare
diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c
deleted file mode 100644
index 7b530d838d40..000000000000
--- a/tools/pci/pcitest.c
+++ /dev/null
@@ -1,250 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/**
- * Userspace PCI Endpoint Test Module
- *
- * Copyright (C) 2017 Texas Instruments
- * Author: Kishon Vijay Abraham I <kishon@ti.com>
- */
-
-#include <errno.h>
-#include <fcntl.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-
-#include <linux/pcitest.h>
-
-static char *result[] = { "NOT OKAY", "OKAY" };
-static char *irq[] = { "LEGACY", "MSI", "MSI-X" };
-
-struct pci_test {
- char *device;
- char barnum;
- bool legacyirq;
- unsigned int msinum;
- unsigned int msixnum;
- int irqtype;
- bool set_irqtype;
- bool get_irqtype;
- bool clear_irq;
- bool read;
- bool write;
- bool copy;
- unsigned long size;
- bool use_dma;
-};
-
-static int run_test(struct pci_test *test)
-{
- struct pci_endpoint_test_xfer_param param = {};
- int ret = -EINVAL;
- int fd;
-
- fd = open(test->device, O_RDWR);
- if (fd < 0) {
- perror("can't open PCI Endpoint Test device");
- return -ENODEV;
- }
-
- if (test->barnum >= 0 && test->barnum <= 5) {
- ret = ioctl(fd, PCITEST_BAR, test->barnum);
- fprintf(stdout, "BAR%d:\t\t", test->barnum);
- if (ret < 0)
- fprintf(stdout, "TEST FAILED\n");
- else
- fprintf(stdout, "%s\n", result[ret]);
- }
-
- if (test->set_irqtype) {
- ret = ioctl(fd, PCITEST_SET_IRQTYPE, test->irqtype);
- fprintf(stdout, "SET IRQ TYPE TO %s:\t\t", irq[test->irqtype]);
- if (ret < 0)
- fprintf(stdout, "FAILED\n");
- else
- fprintf(stdout, "%s\n", result[ret]);
- }
-
- if (test->get_irqtype) {
- ret = ioctl(fd, PCITEST_GET_IRQTYPE);
- fprintf(stdout, "GET IRQ TYPE:\t\t");
- if (ret < 0)
- fprintf(stdout, "FAILED\n");
- else
- fprintf(stdout, "%s\n", irq[ret]);
- }
-
- if (test->clear_irq) {
- ret = ioctl(fd, PCITEST_CLEAR_IRQ);
- fprintf(stdout, "CLEAR IRQ:\t\t");
- if (ret < 0)
- fprintf(stdout, "FAILED\n");
- else
- fprintf(stdout, "%s\n", result[ret]);
- }
-
- if (test->legacyirq) {
- ret = ioctl(fd, PCITEST_LEGACY_IRQ, 0);
- fprintf(stdout, "LEGACY IRQ:\t");
- if (ret < 0)
- fprintf(stdout, "TEST FAILED\n");
- else
- fprintf(stdout, "%s\n", result[ret]);
- }
-
- if (test->msinum > 0 && test->msinum <= 32) {
- ret = ioctl(fd, PCITEST_MSI, test->msinum);
- fprintf(stdout, "MSI%u:\t\t", test->msinum);
- if (ret < 0)
- fprintf(stdout, "TEST FAILED\n");
- else
- fprintf(stdout, "%s\n", result[ret]);
- }
-
- if (test->msixnum > 0 && test->msixnum <= 2048) {
- ret = ioctl(fd, PCITEST_MSIX, test->msixnum);
- fprintf(stdout, "MSI-X%u:\t\t", test->msixnum);
- if (ret < 0)
- fprintf(stdout, "TEST FAILED\n");
- else
- fprintf(stdout, "%s\n", result[ret]);
- }
-
- if (test->write) {
- param.size = test->size;
- if (test->use_dma)
- param.flags = PCITEST_FLAGS_USE_DMA;
- ret = ioctl(fd, PCITEST_WRITE, &param);
- fprintf(stdout, "WRITE (%7lu bytes):\t\t", test->size);
- if (ret < 0)
- fprintf(stdout, "TEST FAILED\n");
- else
- fprintf(stdout, "%s\n", result[ret]);
- }
-
- if (test->read) {
- param.size = test->size;
- if (test->use_dma)
- param.flags = PCITEST_FLAGS_USE_DMA;
- ret = ioctl(fd, PCITEST_READ, &param);
- fprintf(stdout, "READ (%7lu bytes):\t\t", test->size);
- if (ret < 0)
- fprintf(stdout, "TEST FAILED\n");
- else
- fprintf(stdout, "%s\n", result[ret]);
- }
-
- if (test->copy) {
- param.size = test->size;
- if (test->use_dma)
- param.flags = PCITEST_FLAGS_USE_DMA;
- ret = ioctl(fd, PCITEST_COPY, &param);
- fprintf(stdout, "COPY (%7lu bytes):\t\t", test->size);
- if (ret < 0)
- fprintf(stdout, "TEST FAILED\n");
- else
- fprintf(stdout, "%s\n", result[ret]);
- }
-
- fflush(stdout);
- close(fd);
- return (ret < 0) ? ret : 1 - ret; /* return 0 if test succeeded */
-}
-
-int main(int argc, char **argv)
-{
- int c;
- struct pci_test *test;
-
- test = calloc(1, sizeof(*test));
- if (!test) {
- perror("Fail to allocate memory for pci_test\n");
- return -ENOMEM;
- }
-
- /* since '0' is a valid BAR number, initialize it to -1 */
- test->barnum = -1;
-
- /* set default size as 100KB */
- test->size = 0x19000;
-
- /* set default endpoint device */
- test->device = "/dev/pci-endpoint-test.0";
-
- while ((c = getopt(argc, argv, "D:b:m:x:i:deIlhrwcs:")) != EOF)
- switch (c) {
- case 'D':
- test->device = optarg;
- continue;
- case 'b':
- test->barnum = atoi(optarg);
- if (test->barnum < 0 || test->barnum > 5)
- goto usage;
- continue;
- case 'l':
- test->legacyirq = true;
- continue;
- case 'm':
- test->msinum = atoi(optarg);
- if (test->msinum < 1 || test->msinum > 32)
- goto usage;
- continue;
- case 'x':
- test->msixnum = atoi(optarg);
- if (test->msixnum < 1 || test->msixnum > 2048)
- goto usage;
- continue;
- case 'i':
- test->irqtype = atoi(optarg);
- if (test->irqtype < 0 || test->irqtype > 2)
- goto usage;
- test->set_irqtype = true;
- continue;
- case 'I':
- test->get_irqtype = true;
- continue;
- case 'r':
- test->read = true;
- continue;
- case 'w':
- test->write = true;
- continue;
- case 'c':
- test->copy = true;
- continue;
- case 'e':
- test->clear_irq = true;
- continue;
- case 's':
- test->size = strtoul(optarg, NULL, 0);
- continue;
- case 'd':
- test->use_dma = true;
- continue;
- case 'h':
- default:
-usage:
- fprintf(stderr,
- "usage: %s [options]\n"
- "Options:\n"
- "\t-D <dev> PCI endpoint test device {default: /dev/pci-endpoint-test.0}\n"
- "\t-b <bar num> BAR test (bar number between 0..5)\n"
- "\t-m <msi num> MSI test (msi number between 1..32)\n"
- "\t-x <msix num> \tMSI-X test (msix number between 1..2048)\n"
- "\t-i <irq type> \tSet IRQ type (0 - Legacy, 1 - MSI, 2 - MSI-X)\n"
- "\t-e Clear IRQ\n"
- "\t-I Get current IRQ type configured\n"
- "\t-d Use DMA\n"
- "\t-l Legacy IRQ test\n"
- "\t-r Read buffer test\n"
- "\t-w Write buffer test\n"
- "\t-c Copy buffer test\n"
- "\t-s <size> Size of buffer {default: 100KB}\n"
- "\t-h Print this help message\n",
- argv[0]);
- return -EINVAL;
- }
-
- return run_test(test);
-}
diff --git a/tools/pci/pcitest.sh b/tools/pci/pcitest.sh
deleted file mode 100644
index 75ed48ff2990..000000000000
--- a/tools/pci/pcitest.sh
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-echo "BAR tests"
-echo
-
-bar=0
-
-while [ $bar -lt 6 ]
-do
- pcitest -b $bar
- bar=`expr $bar + 1`
-done
-echo
-
-echo "Interrupt tests"
-echo
-
-pcitest -i 0
-pcitest -l
-
-pcitest -i 1
-msi=1
-
-while [ $msi -lt 33 ]
-do
- pcitest -m $msi
- msi=`expr $msi + 1`
-done
-echo
-
-pcitest -i 2
-msix=1
-
-while [ $msix -lt 2049 ]
-do
- pcitest -x $msix
- msix=`expr $msix + 1`
-done
-echo
-
-echo "Read Tests"
-echo
-
-pcitest -i 1
-
-pcitest -r -s 1
-pcitest -r -s 1024
-pcitest -r -s 1025
-pcitest -r -s 1024000
-pcitest -r -s 1024001
-echo
-
-echo "Write Tests"
-echo
-
-pcitest -w -s 1
-pcitest -w -s 1024
-pcitest -w -s 1025
-pcitest -w -s 1024000
-pcitest -w -s 1024001
-echo
-
-echo "Copy Tests"
-echo
-
-pcitest -c -s 1
-pcitest -c -s 1024
-pcitest -c -s 1025
-pcitest -c -s 1024000
-pcitest -c -s 1024001
-echo
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 55d6ce9ea52f..05c083bb1122 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -161,47 +161,6 @@ export VPATH
SOURCE := $(shell ln -sf $(srctree)/tools/perf $(OUTPUT)/source)
endif
-# Beautify output
-# ---------------------------------------------------------------------------
-#
-# Most of build commands in Kbuild start with "cmd_". You can optionally define
-# "quiet_cmd_*". If defined, the short log is printed. Otherwise, no log from
-# that command is printed by default.
-#
-# e.g.)
-# quiet_cmd_depmod = DEPMOD $(MODLIB)
-# cmd_depmod = $(srctree)/scripts/depmod.sh $(DEPMOD) $(KERNELRELEASE)
-#
-# A simple variant is to prefix commands with $(Q) - that's useful
-# for commands that shall be hidden in non-verbose mode.
-#
-# $(Q)$(MAKE) $(build)=scripts/basic
-#
-# To put more focus on warnings, be less verbose as default
-# Use 'make V=1' to see the full commands
-
-ifeq ($(V),1)
- quiet =
- Q =
-else
- quiet=quiet_
- Q=@
-endif
-
-# If the user is running make -s (silent mode), suppress echoing of commands
-# make-4.0 (and later) keep single letter options in the 1st word of MAKEFLAGS.
-ifeq ($(filter 3.%,$(MAKE_VERSION)),)
-short-opts := $(firstword -$(MAKEFLAGS))
-else
-short-opts := $(filter-out --%,$(MAKEFLAGS))
-endif
-
-ifneq ($(findstring s,$(short-opts)),)
- quiet=silent_
-endif
-
-export quiet Q
-
# Do not use make's built-in rules
# (this improves performance and avoids hard-to-debug behaviour);
MAKEFLAGS += -r
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d7c7d29291fb..d466447ae928 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2107,8 +2107,12 @@ static int trace__read_syscall_info(struct trace *trace, int id)
return PTR_ERR(sc->tp_format);
}
+ /*
+ * The tracepoint format contains __syscall_nr field, so it's one more
+ * than the actual number of syscall arguments.
+ */
if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ?
- RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields))
+ RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields - 1))
return -ENOMEM;
sc->args = sc->tp_format->format.fields;
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index d3c6e10dce73..a4499e5a6f9c 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -26,8 +26,6 @@ FILES=(
"include/linux/hash.h"
"include/linux/list-sort.h"
"include/uapi/linux/hw_breakpoint.h"
- "arch/x86/include/asm/disabled-features.h"
- "arch/x86/include/asm/required-features.h"
"arch/x86/include/asm/cpufeatures.h"
"arch/x86/include/asm/inat_types.h"
"arch/x86/include/asm/emulate_prefix.h"
diff --git a/tools/perf/tests/shell/trace_btf_enum.sh b/tools/perf/tests/shell/trace_btf_enum.sh
index 5a3b8a5a9b5c..8d1e6bbeac90 100755
--- a/tools/perf/tests/shell/trace_btf_enum.sh
+++ b/tools/perf/tests/shell/trace_btf_enum.sh
@@ -26,8 +26,12 @@ check_vmlinux() {
trace_landlock() {
echo "Tracing syscall ${syscall}"
- # test flight just to see if landlock_add_rule and libbpf are available
- $TESTPROG
+ # test flight just to see if landlock_add_rule is available
+ if ! perf trace $TESTPROG 2>&1 | grep -q landlock
+ then
+ echo "No landlock system call found, skipping to non-syscall tracing."
+ return
+ fi
if perf trace -e $syscall $TESTPROG 2>&1 | \
grep -q -E ".*landlock_add_rule\(ruleset_fd: 11, rule_type: (LANDLOCK_RULE_PATH_BENEATH|LANDLOCK_RULE_NET_PORT), rule_attr: 0x[a-f0-9]+, flags: 45\) = -1.*"
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 0d2ea22bd9e4..31bb326b07a6 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2100,6 +2100,57 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
return 0;
}
+const char * const perf_disassembler__strs[] = {
+ [PERF_DISASM_UNKNOWN] = "unknown",
+ [PERF_DISASM_LLVM] = "llvm",
+ [PERF_DISASM_CAPSTONE] = "capstone",
+ [PERF_DISASM_OBJDUMP] = "objdump",
+};
+
+
+static void annotation_options__add_disassembler(struct annotation_options *options,
+ enum perf_disassembler dis)
+{
+ for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers); i++) {
+ if (options->disassemblers[i] == dis) {
+ /* Disassembler is already present then don't add again. */
+ return;
+ }
+ if (options->disassemblers[i] == PERF_DISASM_UNKNOWN) {
+ /* Found a free slot. */
+ options->disassemblers[i] = dis;
+ return;
+ }
+ }
+ pr_err("Failed to add disassembler %d\n", dis);
+}
+
+static int annotation_options__add_disassemblers_str(struct annotation_options *options,
+ const char *str)
+{
+ while (str && *str != '\0') {
+ const char *comma = strchr(str, ',');
+ int len = comma ? comma - str : (int)strlen(str);
+ bool match = false;
+
+ for (u8 i = 0; i < ARRAY_SIZE(perf_disassembler__strs); i++) {
+ const char *dis_str = perf_disassembler__strs[i];
+
+ if (len == (int)strlen(dis_str) && !strncmp(str, dis_str, len)) {
+ annotation_options__add_disassembler(options, i);
+ match = true;
+ break;
+ }
+ }
+ if (!match) {
+ pr_err("Invalid disassembler '%.*s'\n", len, str);
+ return -1;
+ }
+ str = comma ? comma + 1 : NULL;
+ }
+ return 0;
+}
+
static int annotation__config(const char *var, const char *value, void *data)
{
struct annotation_options *opt = data;
@@ -2115,11 +2166,10 @@ static int annotation__config(const char *var, const char *value, void *data)
else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL)
opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL;
} else if (!strcmp(var, "annotate.disassemblers")) {
- opt->disassemblers_str = strdup(value);
- if (!opt->disassemblers_str) {
- pr_err("Not enough memory for annotate.disassemblers\n");
- return -1;
- }
+ int err = annotation_options__add_disassemblers_str(opt, value);
+
+ if (err)
+ return err;
} else if (!strcmp(var, "annotate.hide_src_code")) {
opt->hide_src_code = perf_config_bool("hide_src_code", value);
} else if (!strcmp(var, "annotate.jump_arrows")) {
@@ -2185,9 +2235,25 @@ void annotation_options__exit(void)
zfree(&annotate_opts.objdump_path);
}
+static void annotation_options__default_init_disassemblers(struct annotation_options *options)
+{
+ if (options->disassemblers[0] != PERF_DISASM_UNKNOWN) {
+ /* Already initialized. */
+ return;
+ }
+#ifdef HAVE_LIBLLVM_SUPPORT
+ annotation_options__add_disassembler(options, PERF_DISASM_LLVM);
+#endif
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+ annotation_options__add_disassembler(options, PERF_DISASM_CAPSTONE);
+#endif
+ annotation_options__add_disassembler(options, PERF_DISASM_OBJDUMP);
+}
+
void annotation_config__init(void)
{
perf_config(annotation__config, &annotate_opts);
+ annotation_options__default_init_disassemblers(&annotate_opts);
}
static unsigned int parse_percent_type(char *str1, char *str2)
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 0ba5846dad4d..98db1b88daf4 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -34,8 +34,13 @@ struct annotated_data_type;
#define ANNOTATION__BR_CNTR_WIDTH 30
#define ANNOTATION_DUMMY_LEN 256
-// llvm, capstone, objdump
-#define MAX_DISASSEMBLERS 3
+enum perf_disassembler {
+ PERF_DISASM_UNKNOWN = 0,
+ PERF_DISASM_LLVM,
+ PERF_DISASM_CAPSTONE,
+ PERF_DISASM_OBJDUMP,
+};
+#define MAX_DISASSEMBLERS (PERF_DISASM_OBJDUMP + 1)
struct annotation_options {
bool hide_src_code,
@@ -52,14 +57,12 @@ struct annotation_options {
annotate_src,
full_addr;
u8 offset_level;
- u8 nr_disassemblers;
+ u8 disassemblers[MAX_DISASSEMBLERS];
int min_pcnt;
int max_lines;
int context;
char *objdump_path;
char *disassembler_style;
- const char *disassemblers_str;
- const char *disassemblers[MAX_DISASSEMBLERS];
const char *prefix;
const char *prefix_strip;
unsigned int percent_type;
@@ -134,6 +137,8 @@ struct disasm_line {
struct annotation_line al;
};
+extern const char * const perf_disassembler__strs[];
+
void annotation_line__add(struct annotation_line *al, struct list_head *head);
static inline double annotation_data__percent(struct annotation_data *data,
diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index 4a62ed593e84..e4352881e3fa 100644
--- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -431,9 +431,9 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
{
bool augmented, do_output = false;
- int zero = 0, size, aug_size, index,
- value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value);
+ int zero = 0, index, value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value);
u64 output = 0; /* has to be u64, otherwise it won't pass the verifier */
+ s64 aug_size, size;
unsigned int nr, *beauty_map;
struct beauty_payload_enter *payload;
void *arg, *payload_offset;
@@ -484,14 +484,11 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
} else if (size > 0 && size <= value_size) { /* struct */
if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg))
augmented = true;
- } else if (size < 0 && size >= -6) { /* buffer */
+ } else if ((int)size < 0 && size >= -6) { /* buffer */
index = -(size + 1);
barrier_var(index); // Prevent clang (noticed with v18) from removing the &= 7 trick.
index &= 7; // Satisfy the bounds checking with the verifier in some kernels.
- aug_size = args->args[index];
-
- if (aug_size > TRACE_AUG_MAX_BUF)
- aug_size = TRACE_AUG_MAX_BUF;
+ aug_size = args->args[index] > TRACE_AUG_MAX_BUF ? TRACE_AUG_MAX_BUF : args->args[index];
if (aug_size > 0) {
if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg))
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 27094211edd8..5c329ad614e9 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -293,7 +293,7 @@ struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data)
die = cpu__get_die_id(cpu);
/* There is no die_id on legacy system. */
- if (die == -1)
+ if (die < 0)
die = 0;
/*
@@ -322,7 +322,7 @@ struct aggr_cpu_id aggr_cpu_id__cluster(struct perf_cpu cpu, void *data)
struct aggr_cpu_id id;
/* There is no cluster_id on legacy system. */
- if (cluster == -1)
+ if (cluster < 0)
cluster = 0;
id = aggr_cpu_id__die(cpu, data);
diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
index b7de4d9fd004..50c5c206b70e 100644
--- a/tools/perf/util/disasm.c
+++ b/tools/perf/util/disasm.c
@@ -2216,56 +2216,6 @@ out_free_command:
return err;
}
-static int annotation_options__init_disassemblers(struct annotation_options *options)
-{
- char *disassembler;
-
- if (options->disassemblers_str == NULL) {
- const char *default_disassemblers_str =
-#ifdef HAVE_LIBLLVM_SUPPORT
- "llvm,"
-#endif
-#ifdef HAVE_LIBCAPSTONE_SUPPORT
- "capstone,"
-#endif
- "objdump";
-
- options->disassemblers_str = strdup(default_disassemblers_str);
- if (!options->disassemblers_str)
- goto out_enomem;
- }
-
- disassembler = strdup(options->disassemblers_str);
- if (disassembler == NULL)
- goto out_enomem;
-
- while (1) {
- char *comma = strchr(disassembler, ',');
-
- if (comma != NULL)
- *comma = '\0';
-
- options->disassemblers[options->nr_disassemblers++] = strim(disassembler);
-
- if (comma == NULL)
- break;
-
- disassembler = comma + 1;
-
- if (options->nr_disassemblers >= MAX_DISASSEMBLERS) {
- pr_debug("annotate.disassemblers can have at most %d entries, ignoring \"%s\"\n",
- MAX_DISASSEMBLERS, disassembler);
- break;
- }
- }
-
- return 0;
-
-out_enomem:
- pr_err("Not enough memory for annotate.disassemblers\n");
- return -1;
-}
-
int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
{
struct annotation_options *options = args->options;
@@ -2274,7 +2224,6 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
char symfs_filename[PATH_MAX];
bool delete_extract = false;
struct kcore_extract kce;
- const char *disassembler;
bool decomp = false;
int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
@@ -2334,28 +2283,26 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
}
}
- err = annotation_options__init_disassemblers(options);
- if (err)
- goto out_remove_tmp;
-
err = -1;
+ for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers) && err != 0; i++) {
+ enum perf_disassembler dis = options->disassemblers[i];
- for (int i = 0; i < options->nr_disassemblers && err != 0; ++i) {
- disassembler = options->disassemblers[i];
-
- if (!strcmp(disassembler, "llvm"))
+ switch (dis) {
+ case PERF_DISASM_LLVM:
err = symbol__disassemble_llvm(symfs_filename, sym, args);
- else if (!strcmp(disassembler, "capstone"))
+ break;
+ case PERF_DISASM_CAPSTONE:
err = symbol__disassemble_capstone(symfs_filename, sym, args);
- else if (!strcmp(disassembler, "objdump"))
+ break;
+ case PERF_DISASM_OBJDUMP:
err = symbol__disassemble_objdump(symfs_filename, sym, args);
- else
- pr_debug("Unknown disassembler %s, skipping...\n", disassembler);
- }
-
- if (err == 0) {
- pr_debug("Disassembled with %s\nannotate.disassemblers=%s\n",
- disassembler, options->disassemblers_str);
+ break;
+ case PERF_DISASM_UNKNOWN: /* End of disassemblers. */
+ default:
+ goto out_remove_tmp;
+ }
+ if (err == 0)
+ pr_debug("Disassembled with %s\n", perf_disassembler__strs[dis]);
}
out_remove_tmp:
if (decomp)
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index 51a95239fe06..835123add0ed 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -52,8 +52,11 @@ DESTDIR ?=
# and _should_ modify the PACKAGE_BUGREPORT definition
VERSION:= $(shell ./utils/version-gen.sh)
-LIB_MAJ= 0.0.1
-LIB_MIN= 1
+LIB_FIX= 1
+LIB_MIN= 0
+LIB_MAJ= 1
+LIB_VER= $(LIB_MAJ).$(LIB_MIN).$(LIB_FIX)
+
PACKAGE = cpupower
PACKAGE_BUGREPORT = linux-pm@vger.kernel.org
@@ -200,14 +203,14 @@ $(OUTPUT)lib/%.o: $(LIB_SRC) $(LIB_HEADERS)
$(ECHO) " CC " $@
$(QUIET) $(CC) $(CFLAGS) -fPIC -o $@ -c lib/$*.c
-$(OUTPUT)libcpupower.so.$(LIB_MAJ): $(LIB_OBJS)
+$(OUTPUT)libcpupower.so.$(LIB_VER): $(LIB_OBJS)
$(ECHO) " LD " $@
$(QUIET) $(CC) -shared $(CFLAGS) $(LDFLAGS) -o $@ \
- -Wl,-soname,libcpupower.so.$(LIB_MIN) $(LIB_OBJS)
+ -Wl,-soname,libcpupower.so.$(LIB_MAJ) $(LIB_OBJS)
@ln -sf $(@F) $(OUTPUT)libcpupower.so
- @ln -sf $(@F) $(OUTPUT)libcpupower.so.$(LIB_MIN)
+ @ln -sf $(@F) $(OUTPUT)libcpupower.so.$(LIB_MAJ)
-libcpupower: $(OUTPUT)libcpupower.so.$(LIB_MAJ)
+libcpupower: $(OUTPUT)libcpupower.so.$(LIB_VER)
# Let all .o files depend on its .c file and all headers
# Might be worth to put this into utils/Makefile at some point of time
@@ -217,7 +220,7 @@ $(OUTPUT)%.o: %.c
$(ECHO) " CC " $@
$(QUIET) $(CC) $(CFLAGS) -I./lib -I ./utils -o $@ -c $*.c
-$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_MAJ)
+$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_VER)
$(ECHO) " CC " $@
ifeq ($(strip $(STATIC)),true)
$(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lrt -lpci -L$(OUTPUT) -o $@
@@ -262,7 +265,7 @@ update-po: $(OUTPUT)po/$(PACKAGE).pot
done;
endif
-compile-bench: $(OUTPUT)libcpupower.so.$(LIB_MAJ)
+compile-bench: $(OUTPUT)libcpupower.so.$(LIB_VER)
@V=$(V) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT)
# we compile into subdirectories. if the target directory is not the
diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c
index 080678d9d74e..bd67c758b33a 100644
--- a/tools/power/cpupower/bench/parse.c
+++ b/tools/power/cpupower/bench/parse.c
@@ -121,6 +121,10 @@ out_dir:
struct config *prepare_default_config()
{
struct config *config = malloc(sizeof(struct config));
+ if (!config) {
+ perror("malloc");
+ return NULL;
+ }
dprintf("loading defaults\n");
diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c
index 7a2ef691b20e..ce8dfb8e46ab 100644
--- a/tools/power/cpupower/lib/cpupower.c
+++ b/tools/power/cpupower/lib/cpupower.c
@@ -10,6 +10,7 @@
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
+#include <string.h>
#include "cpupower.h"
#include "cpupower_intern.h"
@@ -150,15 +151,25 @@ static int __compare(const void *t1, const void *t2)
return 0;
}
+static int __compare_core_cpu_list(const void *t1, const void *t2)
+{
+ struct cpuid_core_info *top1 = (struct cpuid_core_info *)t1;
+ struct cpuid_core_info *top2 = (struct cpuid_core_info *)t2;
+
+ return strcmp(top1->core_cpu_list, top2->core_cpu_list);
+}
+
/*
* Returns amount of cpus, negative on error, cpu_top must be
* passed to cpu_topology_release to free resources
*
- * Array is sorted after ->pkg, ->core, then ->cpu
+ * Array is sorted after ->cpu_smt_list ->pkg, ->core
*/
int get_cpu_topology(struct cpupower_topology *cpu_top)
{
int cpu, last_pkg, cpus = sysconf(_SC_NPROCESSORS_CONF);
+ char path[SYSFS_PATH_MAX];
+ char *last_cpu_list;
cpu_top->core_info = malloc(sizeof(struct cpuid_core_info) * cpus);
if (cpu_top->core_info == NULL)
@@ -183,6 +194,34 @@ int get_cpu_topology(struct cpupower_topology *cpu_top)
cpu_top->core_info[cpu].core = -1;
continue;
}
+ if (cpu_top->core_info[cpu].core == -1) {
+ strncpy(cpu_top->core_info[cpu].core_cpu_list, "-1", CPULIST_BUFFER);
+ continue;
+ }
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s",
+ cpu, "core_cpus_list");
+ if (cpupower_read_sysfs(
+ path,
+ cpu_top->core_info[cpu].core_cpu_list,
+ CPULIST_BUFFER) < 1) {
+ printf("Warning CPU%u has a 0 size core_cpus_list string", cpu);
+ }
+ }
+
+ /* Count the number of distinct cpu lists to get the physical core
+ * count.
+ */
+ qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info),
+ __compare_core_cpu_list);
+
+ last_cpu_list = cpu_top->core_info[0].core_cpu_list;
+ cpu_top->cores = 1;
+ for (cpu = 1; cpu < cpus; cpu++) {
+ if (strcmp(cpu_top->core_info[cpu].core_cpu_list, last_cpu_list) != 0 &&
+ cpu_top->core_info[cpu].pkg != -1) {
+ last_cpu_list = cpu_top->core_info[cpu].core_cpu_list;
+ cpu_top->cores++;
+ }
}
qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info),
@@ -203,13 +242,6 @@ int get_cpu_topology(struct cpupower_topology *cpu_top)
if (!(cpu_top->core_info[0].pkg == -1))
cpu_top->pkgs++;
- /* Intel's cores count is not consecutively numbered, there may
- * be a core_id of 3, but none of 2. Assume there always is 0
- * Get amount of cores by counting duplicates in a package
- for (cpu = 0; cpu_top->core_info[cpu].pkg = 0 && cpu < cpus; cpu++) {
- if (cpu_top->core_info[cpu].core == 0)
- cpu_top->cores++;
- */
return cpus;
}
diff --git a/tools/power/cpupower/lib/cpupower.h b/tools/power/cpupower/lib/cpupower.h
index e4e4292eacec..2e67a080f203 100644
--- a/tools/power/cpupower/lib/cpupower.h
+++ b/tools/power/cpupower/lib/cpupower.h
@@ -2,6 +2,8 @@
#ifndef __CPUPOWER_CPUPOWER_H__
#define __CPUPOWER_CPUPOWER_H__
+#define CPULIST_BUFFER 5
+
struct cpupower_topology {
/* Amount of CPU cores, packages and threads per core in the system */
unsigned int cores;
@@ -16,6 +18,7 @@ struct cpuid_core_info {
int pkg;
int core;
int cpu;
+ char core_cpu_list[CPULIST_BUFFER];
/* flags */
unsigned int is_online:1;
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
index f746099b5dac..ad493157f826 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
@@ -6,6 +6,7 @@
*/
+#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
@@ -91,7 +92,11 @@ int fill_string_with_spaces(char *s, int n)
return 0;
}
-#define MAX_COL_WIDTH 6
+#define MAX_COL_WIDTH 6
+#define TOPOLOGY_DEPTH_PKG 3
+#define TOPOLOGY_DEPTH_CORE 2
+#define TOPOLOGY_DEPTH_CPU 1
+
void print_header(int topology_depth)
{
int unsigned mon;
@@ -113,12 +118,19 @@ void print_header(int topology_depth)
}
printf("\n");
- if (topology_depth > 2)
+ switch (topology_depth) {
+ case TOPOLOGY_DEPTH_PKG:
printf(" PKG|");
- if (topology_depth > 1)
+ break;
+ case TOPOLOGY_DEPTH_CORE:
printf("CORE|");
- if (topology_depth > 0)
+ break;
+ case TOPOLOGY_DEPTH_CPU:
printf(" CPU|");
+ break;
+ default:
+ return;
+ }
for (mon = 0; mon < avail_monitors; mon++) {
if (mon != 0)
@@ -152,12 +164,19 @@ void print_results(int topology_depth, int cpu)
cpu_top.core_info[cpu].pkg == -1)
return;
- if (topology_depth > 2)
+ switch (topology_depth) {
+ case TOPOLOGY_DEPTH_PKG:
printf("%4d|", cpu_top.core_info[cpu].pkg);
- if (topology_depth > 1)
+ break;
+ case TOPOLOGY_DEPTH_CORE:
printf("%4d|", cpu_top.core_info[cpu].core);
- if (topology_depth > 0)
+ break;
+ case TOPOLOGY_DEPTH_CPU:
printf("%4d|", cpu_top.core_info[cpu].cpu);
+ break;
+ default:
+ return;
+ }
for (mon = 0; mon < avail_monitors; mon++) {
if (mon != 0)
@@ -294,7 +313,10 @@ int fork_it(char **argv)
if (!child_pid) {
/* child */
- execvp(argv[0], argv);
+ if (execvp(argv[0], argv) == -1) {
+ printf("Invalid monitor command %s\n", argv[0]);
+ exit(errno);
+ }
} else {
/* parent */
if (child_pid == -1) {
@@ -423,11 +445,13 @@ int cmd_monitor(int argc, char **argv)
if (avail_monitors == 0) {
printf(_("No HW Cstate monitors found\n"));
+ cpu_topology_release(cpu_top);
return 1;
}
if (mode == list) {
list_monitors();
+ cpu_topology_release(cpu_top);
exit(EXIT_SUCCESS);
}
@@ -448,15 +472,15 @@ int cmd_monitor(int argc, char **argv)
/* ToDo: Topology parsing needs fixing first to do
this more generically */
if (cpu_top.pkgs > 1)
- print_header(3);
+ print_header(TOPOLOGY_DEPTH_PKG);
else
- print_header(1);
+ print_header(TOPOLOGY_DEPTH_CPU);
for (cpu = 0; cpu < cpu_count; cpu++) {
if (cpu_top.pkgs > 1)
- print_results(3, cpu);
+ print_results(TOPOLOGY_DEPTH_PKG, cpu);
else
- print_results(1, cpu);
+ print_results(TOPOLOGY_DEPTH_CPU, cpu);
}
for (num = 0; num < avail_monitors; num++) {
diff --git a/tools/power/pm-graph/config/custom-timeline-functions.cfg b/tools/power/pm-graph/config/custom-timeline-functions.cfg
index 4f80ad7d7275..0321b59518f3 100644
--- a/tools/power/pm-graph/config/custom-timeline-functions.cfg
+++ b/tools/power/pm-graph/config/custom-timeline-functions.cfg
@@ -122,13 +122,13 @@ freeze_processes:
freeze_kernel_threads:
pm_restrict_gfp_mask:
acpi_suspend_begin:
-suspend_console:
+console_suspend_all:
acpi_pm_prepare:
syscore_suspend:
arch_enable_nonboot_cpus_end:
syscore_resume:
acpi_pm_finish:
-resume_console:
+console_resume_all:
acpi_pm_end:
pm_restore_gfp_mask:
thaw_processes:
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 918eae58b0b4..e2261f33a082 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -210,13 +210,13 @@ class SystemValues:
'hibernate_preallocate_memory': {},
'create_basic_memory_bitmaps': {},
'swsusp_write': {},
- 'suspend_console': {},
+ 'console_suspend_all': {},
'acpi_pm_prepare': {},
'syscore_suspend': {},
'arch_enable_nonboot_cpus_end': {},
'syscore_resume': {},
'acpi_pm_finish': {},
- 'resume_console': {},
+ 'console_resume_all': {},
'acpi_pm_end': {},
'pm_restore_gfp_mask': {},
'thaw_processes': {},
@@ -3459,7 +3459,7 @@ def parseTraceLog(live=False):
tracewatch = ['irq_wakeup']
if sysvals.usekprobes:
tracewatch += ['sync_filesystems', 'freeze_processes', 'syscore_suspend',
- 'syscore_resume', 'resume_console', 'thaw_processes', 'CPU_ON',
+ 'syscore_resume', 'console_resume_all', 'thaw_processes', 'CPU_ON',
'CPU_OFF', 'acpi_suspend']
# extract the callgraph and traceevent data
diff --git a/tools/power/x86/intel-speed-select/Makefile b/tools/power/x86/intel-speed-select/Makefile
index 7221f2f55e8b..8d3a02a20f3d 100644
--- a/tools/power/x86/intel-speed-select/Makefile
+++ b/tools/power/x86/intel-speed-select/Makefile
@@ -13,7 +13,7 @@ endif
# Do not use make's built-in rules
# (this improves performance and avoids hard-to-debug behaviour);
MAKEFLAGS += -r
-override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I/usr/include/libnl3
+override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I$(shell $(CC) -print-sysroot)/usr/include/libnl3
override LDFLAGS += -lnl-genl-3 -lnl-3
ALL_TARGETS := intel-speed-select
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index fadfb02b8611..eaa420ac848d 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -16,7 +16,7 @@ struct process_cmd_struct {
int arg;
};
-static const char *version_str = "v1.21";
+static const char *version_str = "v1.22";
static const int supported_api_ver = 3;
static struct isst_if_platform_info isst_platform_info;
@@ -46,8 +46,9 @@ static int force_online_offline;
static int auto_mode;
static int fact_enable_fail;
static int cgroupv2;
+static int max_pkg_id;
static int max_die_id;
-static int max_punit_id;
+static int max_die_id_package_0;
/* clos related */
static int current_clos = -1;
@@ -557,6 +558,8 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void
if (id.pkg < 0 || id.die < 0 || id.punit < 0)
continue;
+ id.die = id.die % (max_die_id_package_0 + 1);
+
valid_mask[id.pkg][id.die] = 1;
if (cpus[id.pkg][id.die][id.punit] == -1)
@@ -564,11 +567,11 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void
}
for (i = 0; i < MAX_PACKAGE_COUNT; i++) {
- if (max_die_id == max_punit_id) {
+ if (max_die_id > max_pkg_id) {
for (k = 0; k < MAX_PUNIT_PER_DIE && k < MAX_DIE_PER_PACKAGE; k++) {
id.cpu = cpus[i][k][k];
id.pkg = i;
- id.die = k;
+ id.die = get_physical_die_id(id.cpu);
id.punit = k;
if (isst_is_punit_valid(&id))
callback(&id, arg1, arg2, arg3, arg4);
@@ -586,7 +589,10 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void
for (k = 0; k < MAX_PUNIT_PER_DIE; k++) {
id.cpu = cpus[i][j][k];
id.pkg = i;
- id.die = j;
+ if (id.cpu >= 0)
+ id.die = get_physical_die_id(id.cpu);
+ else
+ id.die = id.pkg;
id.punit = k;
if (isst_is_punit_valid(&id))
callback(&id, arg1, arg2, arg3, arg4);
@@ -788,6 +794,8 @@ static void create_cpu_map(void)
cpu_map[i].die_id = die_id;
cpu_map[i].core_id = core_id;
+ if (max_pkg_id < pkg_id)
+ max_pkg_id = pkg_id;
punit_id = 0;
@@ -812,8 +820,8 @@ static void create_cpu_map(void)
if (max_die_id < die_id)
max_die_id = die_id;
- if (max_punit_id < cpu_map[i].punit_id)
- max_punit_id = cpu_map[i].punit_id;
+ if (!pkg_id && max_die_id_package_0 < die_id)
+ max_die_id_package_0 = die_id;
debug_printf(
"map logical_cpu:%d core: %d die:%d pkg:%d punit:%d punit_cpu:%d punit_core:%d\n",
diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c
index 07ebd08f3202..da5a59a4c545 100644
--- a/tools/power/x86/intel-speed-select/isst-display.c
+++ b/tools/power/x86/intel-speed-select/isst-display.c
@@ -173,7 +173,11 @@ static int print_package_info(struct isst_id *id, FILE *outf)
if (out_format_is_json()) {
if (api_version() > 1) {
- if (id->cpu < 0)
+ if (id->die < 0 && id->cpu < 0)
+ snprintf(header, sizeof(header),
+ "package-%d:die-IO:powerdomain-%d:cpu-None",
+ id->pkg, id->punit);
+ else if (id->cpu < 0)
snprintf(header, sizeof(header),
"package-%d:die-%d:powerdomain-%d:cpu-None",
id->pkg, id->die, id->punit);
@@ -190,7 +194,10 @@ static int print_package_info(struct isst_id *id, FILE *outf)
}
snprintf(header, sizeof(header), "package-%d", id->pkg);
format_and_print(outf, level++, header, NULL);
- snprintf(header, sizeof(header), "die-%d", id->die);
+ if (id->die < 0)
+ snprintf(header, sizeof(header), "die-IO");
+ else
+ snprintf(header, sizeof(header), "die-%d", id->die);
format_and_print(outf, level++, header, NULL);
if (api_version() > 1) {
snprintf(header, sizeof(header), "powerdomain-%d", id->punit);
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index a7f7ed01421c..99bf905ade81 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -136,7 +136,7 @@ displays the statistics gathered since it was forked.
The system configuration dump (if --quiet is not used) is followed by statistics. The first row of the statistics labels the content of each column (below). The second row of statistics is the system summary line. The system summary line has a '-' in the columns for the Package, Core, and CPU. The contents of the system summary line depends on the type of column. Columns that count items (eg. IRQ) show the sum across all CPUs in the system. Columns that show a percentage show the average across all CPUs in the system. Columns that dump raw MSR values simply show 0 in the summary. After the system summary row, each row describes a specific Package/Core/CPU. Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system.
.SH COLUMN DESCRIPTIONS
.PP
-\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any. This counter is disabled by default, and is enabled with "--enable usec", or --debug. On the summary row, usec refers to the total elapsed time to collect the counters on all cpus.
+\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any. This counter is disabled by default, and is enabled with "--enable usec", or --debug. On the summary row, usec refers to the total elapsed time to snapshot the procfs/sysfs and collect the counters on all cpus.
.PP
\fBTime_Of_Day_Seconds\fP For each CPU, the gettimeofday(2) value (seconds.subsec since Epoch) when the counters ending the measurement interval were collected. This column is disabled by default, and can be enabled with "--enable Time_Of_Day_Seconds" or "--debug". On the summary row, Time_Of_Day_Seconds refers to the timestamp following collection of counters on the last CPU.
.PP
@@ -190,7 +190,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics
.PP
\fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors.
.PP
-\fBSysWatt\fP Watts consumed by the whole platform (RAPL PSYS). Disabled by default. Enable with --enable SysWatt.
+\fBSysWatt\fP Watts consumed by the whole platform (RAPL PSYS).
.PP
\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system. Note that the meaning of this field is model specific. For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits. Comparing PkgWatt and PkgTmp to system limits is necessary.
.PP
@@ -516,14 +516,40 @@ that they count at TSC rate, which is true on all processors tested to date.
Volume 3B: System Programming Guide"
https://www.intel.com/products/processor/manuals/
+.SH RUN THE LATEST VERSION
+If turbostat complains that it doesn't recognize your processor,
+please try the latest version.
+
+The latest version of turbostat does not require the latest version of the Linux kernel.
+However, some features, such as perf(1) counters, do require kernel support.
+
+The latest turbostat release is available in the upstream Linux Kernel source tree.
+eg. "git pull https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git"
+and run make in tools/power/x86/turbostat/.
+
+n.b. "make install" will update your system manually, but a distro update may subsequently downgrade your turbostat to an older version.
+For this reason, manually installing to /usr/local/bin may be what you want.
+
+Note that turbostat/Makefile has a "make snapshot" target, which will create a tar file
+that can build without a local kernel source tree.
+
+If the upstream version isn't new enough, the development tree can be found here:
+"git pull https://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbostat"
+
+If the development tree doesn't work, please contact the author via chat,
+or via email with the word "turbostat" on the Subject line.
+
.SH FILES
.ta
.nf
+/sys/bus/event_source/devices/
/dev/cpu/*/msr
+/sys/class/intel_pmt/
+/sys/devices/system/cpu/
.fi
.SH "SEE ALSO"
-msr(4), vmstat(8)
+perf(1), msr(4), vmstat(8)
.PP
.SH AUTHOR
.nf
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 58a487c225a7..26057af6b5a1 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3,7 +3,7 @@
* turbostat -- show CPU frequency and C-state residency
* on modern Intel and AMD processors.
*
- * Copyright (c) 2024 Intel Corporation.
+ * Copyright (c) 2025 Intel Corporation.
* Len Brown <len.brown@intel.com>
*/
@@ -95,6 +95,8 @@
#define INTEL_ECORE_TYPE 0x20
#define INTEL_PCORE_TYPE 0x40
+#define ROUND_UP_TO_PAGE_SIZE(n) (((n) + 0x1000UL-1UL) & ~(0x1000UL-1UL))
+
enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M };
enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE };
@@ -202,6 +204,8 @@ struct msr_counter bic[] = {
{ 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "SysWatt", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 },
+ { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 },
+ { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 },
};
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -266,14 +270,16 @@ struct msr_counter bic[] = {
#define BIC_Diec6 (1ULL << 58)
#define BIC_SysWatt (1ULL << 59)
#define BIC_Sys_J (1ULL << 60)
+#define BIC_NMI (1ULL << 61)
+#define BIC_CPU_c1e (1ULL << 62)
-#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
-#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
+#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die)
+#define BIC_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt)
#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
-#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
-#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
+#define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
+#define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
-#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_SysWatt | BIC_Sys_J)
+#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
@@ -326,6 +332,7 @@ unsigned int rapl_joules;
unsigned int summary_only;
unsigned int list_header_only;
unsigned int dump_only;
+unsigned int force_load;
unsigned int has_aperf;
unsigned int has_aperf_access;
unsigned int has_epb;
@@ -353,7 +360,7 @@ unsigned long long cpuidle_cur_sys_lpi_us;
unsigned int tj_max;
unsigned int tj_max_override;
double rapl_power_units, rapl_time_units;
-double rapl_dram_energy_units, rapl_energy_units;
+double rapl_dram_energy_units, rapl_energy_units, rapl_psys_energy_units;
double rapl_joule_counter_range;
unsigned int crystal_hz;
unsigned long long tsc_hz;
@@ -365,6 +372,9 @@ unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
unsigned int first_counter_read = 1;
+
+static struct timeval procsysfs_tv_begin;
+
int ignore_stdin;
bool no_msr;
bool no_perf;
@@ -419,6 +429,7 @@ struct platform_features {
bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */
bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */
bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */
+ bool has_fixed_rapl_psys_unit; /* Fixed Energy Unit used for PSYS RAPL Domain */
int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */
int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */
bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */
@@ -819,6 +830,7 @@ static const struct platform_features spr_features = {
.has_msr_core_c1_res = 1,
.has_irtl_msrs = 1,
.has_cst_prewake_bit = 1,
+ .has_fixed_rapl_psys_unit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
};
@@ -1024,6 +1036,7 @@ static const struct platform_data turbostat_pdata[] = {
{ INTEL_ARROWLAKE_U, &adl_features },
{ INTEL_ARROWLAKE, &adl_features },
{ INTEL_LUNARLAKE_M, &lnl_features },
+ { INTEL_PANTHERLAKE_L, &lnl_features },
{ INTEL_ATOM_SILVERMONT, &slv_features },
{ INTEL_ATOM_SILVERMONT_D, &slvd_features },
{ INTEL_ATOM_AIRMONT, &amt_features },
@@ -1036,13 +1049,14 @@ static const struct platform_data turbostat_pdata[] = {
{ INTEL_ATOM_GRACEMONT, &adl_features },
{ INTEL_ATOM_CRESTMONT_X, &srf_features },
{ INTEL_ATOM_CRESTMONT, &grr_features },
+ { INTEL_ATOM_DARKMONT_X, &srf_features },
{ INTEL_XEON_PHI_KNL, &knl_features },
{ INTEL_XEON_PHI_KNM, &knl_features },
/*
* Missing support for
* INTEL_ICELAKE
* INTEL_ATOM_SILVERMONT_MID
- * INTEL_ATOM_AIRMONT_MID
+ * INTEL_ATOM_SILVERMONT_MID2
* INTEL_ATOM_AIRMONT_NP
*/
{ 0, NULL },
@@ -1054,9 +1068,10 @@ void probe_platform_features(unsigned int family, unsigned int model)
{
int i;
- platform = &default_features;
if (authentic_amd || hygon_genuine) {
+ /* fallback to default features on unsupported models */
+ force_load++;
if (max_extended_level >= 0x80000007) {
unsigned int eax, ebx, ecx, edx;
@@ -1065,11 +1080,11 @@ void probe_platform_features(unsigned int family, unsigned int model)
if ((edx & (1 << 14)) && family >= 0x17)
platform = &amd_features_with_rapl;
}
- return;
+ goto end;
}
if (!genuine_intel)
- return;
+ goto end;
for (i = 0; turbostat_pdata[i].features; i++) {
if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) {
@@ -1077,6 +1092,19 @@ void probe_platform_features(unsigned int family, unsigned int model)
return;
}
}
+
+end:
+ if (force_load && !platform) {
+ fprintf(outf, "Forced to run on unsupported platform!\n");
+ platform = &default_features;
+ }
+
+ if (platform)
+ return;
+
+ fprintf(stderr, "Unsupported platform detected.\n"
+ "\tSee RUN THE LATEST VERSION on turbostat(8)\n");
+ exit(1);
}
/* Model specific support End */
@@ -1094,8 +1122,8 @@ int backwards_count;
char *progname;
#define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */
-cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
-size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
+cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
+size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
#define MAX_ADDED_THREAD_COUNTERS 24
#define MAX_ADDED_CORE_COUNTERS 8
#define MAX_ADDED_PACKAGE_COUNTERS 16
@@ -1271,7 +1299,7 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = {
.msr = MSR_PLATFORM_ENERGY_STATUS,
.msr_mask = 0x00000000FFFFFFFF,
.msr_shift = 0,
- .platform_rapl_msr_scale = &rapl_energy_units,
+ .platform_rapl_msr_scale = &rapl_psys_energy_units,
.rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM,
.bic = BIC_SysWatt | BIC_Sys_J,
.compat_scale = 1.0,
@@ -1510,6 +1538,17 @@ static struct msr_counter_arch_info msr_counter_arch_infos[] = {
#define PMT_COUNTER_MTL_DC6_LSB 0
#define PMT_COUNTER_MTL_DC6_MSB 63
#define PMT_MTL_DC6_GUID 0x1a067102
+#define PMT_MTL_DC6_SEQ 0
+
+#define PMT_COUNTER_CWF_MC1E_OFFSET_BASE 20936
+#define PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT 24
+#define PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE 12
+#define PMT_COUNTER_CWF_CPUS_PER_MODULE 4
+#define PMT_COUNTER_CWF_MC1E_LSB 0
+#define PMT_COUNTER_CWF_MC1E_MSB 63
+#define PMT_CWF_MC1E_GUID 0x14421519
+
+unsigned long long tcore_clock_freq_hz = 800000000;
#define PMT_COUNTER_NAME_SIZE_BYTES 16
#define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32
@@ -1533,6 +1572,7 @@ struct pmt_mmio {
enum pmt_datatype {
PMT_TYPE_RAW,
PMT_TYPE_XTAL_TIME,
+ PMT_TYPE_TCORE_CLOCK,
};
struct pmt_domain_info {
@@ -1562,6 +1602,93 @@ struct pmt_counter {
struct pmt_domain_info *domains;
};
+/*
+ * PMT telemetry directory iterator.
+ * Used to iterate telemetry files in sysfs in correct order.
+ */
+struct pmt_diriter_t {
+ DIR *dir;
+ struct dirent **namelist;
+ unsigned int num_names;
+ unsigned int current_name_idx;
+};
+
+int pmt_telemdir_filter(const struct dirent *e)
+{
+ unsigned int dummy;
+
+ return sscanf(e->d_name, "telem%u", &dummy);
+}
+
+int pmt_telemdir_sort(const struct dirent **a, const struct dirent **b)
+{
+ unsigned int aidx = 0, bidx = 0;
+
+ sscanf((*a)->d_name, "telem%u", &aidx);
+ sscanf((*b)->d_name, "telem%u", &bidx);
+
+ return aidx >= bidx;
+}
+
+const struct dirent *pmt_diriter_next(struct pmt_diriter_t *iter)
+{
+ const struct dirent *ret = NULL;
+
+ if (!iter->dir)
+ return NULL;
+
+ if (iter->current_name_idx >= iter->num_names)
+ return NULL;
+
+ ret = iter->namelist[iter->current_name_idx];
+ ++iter->current_name_idx;
+
+ return ret;
+}
+
+const struct dirent *pmt_diriter_begin(struct pmt_diriter_t *iter, const char *pmt_root_path)
+{
+ int num_names = iter->num_names;
+
+ if (!iter->dir) {
+ iter->dir = opendir(pmt_root_path);
+ if (iter->dir == NULL)
+ return NULL;
+
+ num_names = scandir(pmt_root_path, &iter->namelist, pmt_telemdir_filter, pmt_telemdir_sort);
+ if (num_names == -1)
+ return NULL;
+ }
+
+ iter->current_name_idx = 0;
+ iter->num_names = num_names;
+
+ return pmt_diriter_next(iter);
+}
+
+void pmt_diriter_init(struct pmt_diriter_t *iter)
+{
+ memset(iter, 0, sizeof(*iter));
+}
+
+void pmt_diriter_remove(struct pmt_diriter_t *iter)
+{
+ if (iter->namelist) {
+ for (unsigned int i = 0; i < iter->num_names; i++) {
+ free(iter->namelist[i]);
+ iter->namelist[i] = NULL;
+ }
+ }
+
+ free(iter->namelist);
+ iter->namelist = NULL;
+ iter->num_names = 0;
+ iter->current_name_idx = 0;
+
+ closedir(iter->dir);
+ iter->dir = NULL;
+}
+
unsigned int pmt_counter_get_width(const struct pmt_counter *p)
{
return (p->msb - p->lsb) + 1;
@@ -1611,6 +1738,7 @@ struct thread_data {
unsigned long long c1;
unsigned long long instr_count;
unsigned long long irq_count;
+ unsigned long long nmi_count;
unsigned int smi_count;
unsigned int cpu_id;
unsigned int apic_id;
@@ -1917,6 +2045,7 @@ struct timeval tv_even, tv_odd, tv_delta;
int *irq_column_2_cpu; /* /proc/interrupts column numbers */
int *irqs_per_cpu; /* indexed by cpu_num */
+int *nmi_per_cpu; /* indexed by cpu_num */
void setup_all_buffers(bool startup);
@@ -1944,6 +2073,8 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk
{
int retval, pkg_no, core_no, thread_no, node_no;
+ retval = 0;
+
for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
@@ -1959,14 +2090,12 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk
c = GET_CORE(core_base, core_no, node_no, pkg_no);
p = GET_PKG(pkg_base, pkg_no);
- retval = func(t, c, p);
- if (retval)
- return retval;
+ retval |= func(t, c, p);
}
}
}
}
- return 0;
+ return retval;
}
int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p)
@@ -2085,13 +2214,17 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
int probe_msr(int cpu, off_t offset)
{
ssize_t retval;
- unsigned long long dummy;
+ unsigned long long value;
assert(!no_msr);
- retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset);
+ retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset);
- if (retval != sizeof(dummy))
+ /*
+ * Expect MSRs to accumulate some non-zero value since the system was powered on.
+ * Treat zero as a read failure.
+ */
+ if (retval != sizeof(value) || value == 0)
return 1;
return 0;
@@ -2135,41 +2268,54 @@ void help(void)
"when COMMAND completes.\n"
"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
"to print statistics, until interrupted.\n"
- " -a, --add add a counter\n"
+ " -a, --add counter\n"
+ " add a counter\n"
" eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
" eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n"
" eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n"
- " -c, --cpu cpu-set limit output to summary plus cpu-set:\n"
+ " -c, --cpu cpu-set\n"
+ " limit output to summary plus cpu-set:\n"
" {core | package | j,k,l..m,n-p }\n"
- " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n"
+ " -d, --debug\n"
+ " displays usec, Time_Of_Day_Seconds and more debugging\n"
" debug messages are printed to stderr\n"
- " -D, --Dump displays the raw counter values\n"
- " -e, --enable [all | column]\n"
+ " -D, --Dump\n"
+ " displays the raw counter values\n"
+ " -e, --enable [all | column]\n"
" shows all or the specified disabled column\n"
- " -H, --hide [column|column,column,...]\n"
+ " -f, --force\n"
+ " force load turbostat with minimum default features on unsupported platforms.\n"
+ " -H, --hide [column | column,column,...]\n"
" hide the specified column(s)\n"
" -i, --interval sec.subsec\n"
- " Override default 5-second measurement interval\n"
- " -J, --Joules displays energy in Joules instead of Watts\n"
- " -l, --list list column headers only\n"
- " -M, --no-msr Disable all uses of the MSR driver\n"
- " -P, --no-perf Disable all uses of the perf API\n"
+ " override default 5-second measurement interval\n"
+ " -J, --Joules\n"
+ " displays energy in Joules instead of Watts\n"
+ " -l, --list\n"
+ " list column headers only\n"
+ " -M, --no-msr\n"
+ " disable all uses of the MSR driver\n"
+ " -P, --no-perf\n"
+ " disable all uses of the perf API\n"
" -n, --num_iterations num\n"
" number of the measurement iterations\n"
" -N, --header_iterations num\n"
" print header every num iterations\n"
" -o, --out file\n"
" create or truncate \"file\" for all output\n"
- " -q, --quiet skip decoding system configuration header\n"
- " -s, --show [column|column,column,...]\n"
+ " -q, --quiet\n"
+ " skip decoding system configuration header\n"
+ " -s, --show [column | column,column,...]\n"
" show only the specified column(s)\n"
" -S, --Summary\n"
" limits output to 1-line system summary per interval\n"
" -T, --TCC temperature\n"
" sets the Thermal Control Circuit temperature in\n"
" degrees Celsius\n"
- " -h, --help print this help message\n"
- " -v, --version print version information\n" "\n" "For more help, run \"man turbostat\"\n");
+ " -h, --help\n"
+ " print this help message\n"
+ " -v, --version\n"
+ " print version information\n\nFor more help, run \"man turbostat\"\n");
}
/*
@@ -2289,6 +2435,12 @@ void print_header(char *delim)
else
outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
}
+ if (DO_BIC(BIC_NMI)) {
+ if (sums_need_wide_columns)
+ outp += sprintf(outp, "%s NMI", (printed++ ? delim : ""));
+ else
+ outp += sprintf(outp, "%sNMI", (printed++ ? delim : ""));
+ }
if (DO_BIC(BIC_SMI))
outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
@@ -2335,6 +2487,7 @@ void print_header(char *delim)
break;
case PMT_TYPE_XTAL_TIME:
+ case PMT_TYPE_TCORE_CLOCK:
outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
break;
}
@@ -2409,6 +2562,7 @@ void print_header(char *delim)
break;
case PMT_TYPE_XTAL_TIME:
+ case PMT_TYPE_TCORE_CLOCK:
outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
break;
}
@@ -2540,6 +2694,7 @@ void print_header(char *delim)
break;
case PMT_TYPE_XTAL_TIME:
+ case PMT_TYPE_TCORE_CLOCK:
outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
break;
}
@@ -2575,6 +2730,8 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
if (DO_BIC(BIC_IRQ))
outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
+ if (DO_BIC(BIC_NMI))
+ outp += sprintf(outp, "IRQ: %lld\n", t->nmi_count);
if (DO_BIC(BIC_SMI))
outp += sprintf(outp, "SMI: %d\n", t->smi_count);
@@ -2794,6 +2951,14 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
}
+ /* NMI */
+ if (DO_BIC(BIC_NMI)) {
+ if (sums_need_wide_columns)
+ outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->nmi_count);
+ else
+ outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->nmi_count);
+ }
+
/* SMI */
if (DO_BIC(BIC_SMI))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
@@ -2848,7 +3013,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) {
const unsigned long value_raw = t->pmt_counter[i];
- const double value_converted = 100.0 * value_raw / crystal_hz / interval_float;
+ double value_converted;
switch (ppmt->type) {
case PMT_TYPE_RAW:
if (pmt_counter_get_width(ppmt) <= 32)
@@ -2860,8 +3025,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
break;
case PMT_TYPE_XTAL_TIME:
+ value_converted = 100.0 * value_raw / crystal_hz / interval_float;
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
break;
+
+ case PMT_TYPE_TCORE_CLOCK:
+ value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
}
}
@@ -2928,7 +3098,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) {
const unsigned long value_raw = c->pmt_counter[i];
- const double value_converted = 100.0 * value_raw / crystal_hz / interval_float;
+ double value_converted;
switch (ppmt->type) {
case PMT_TYPE_RAW:
if (pmt_counter_get_width(ppmt) <= 32)
@@ -2940,8 +3110,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
break;
case PMT_TYPE_XTAL_TIME:
+ value_converted = 100.0 * value_raw / crystal_hz / interval_float;
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
break;
+
+ case PMT_TYPE_TCORE_CLOCK:
+ value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
}
}
@@ -3126,7 +3301,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) {
const unsigned long value_raw = p->pmt_counter[i];
- const double value_converted = 100.0 * value_raw / crystal_hz / interval_float;
+ double value_converted;
switch (ppmt->type) {
case PMT_TYPE_RAW:
if (pmt_counter_get_width(ppmt) <= 32)
@@ -3138,8 +3313,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
break;
case PMT_TYPE_XTAL_TIME:
+ value_converted = 100.0 * value_raw / crystal_hz / interval_float;
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
break;
+
+ case PMT_TYPE_TCORE_CLOCK:
+ value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
}
}
@@ -3409,6 +3589,9 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d
if (DO_BIC(BIC_IRQ))
old->irq_count = new->irq_count - old->irq_count;
+ if (DO_BIC(BIC_NMI))
+ old->nmi_count = new->nmi_count - old->nmi_count;
+
if (DO_BIC(BIC_SMI))
old->smi_count = new->smi_count - old->smi_count;
@@ -3447,12 +3630,10 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
/* always calculate thread delta */
retval = delta_thread(t, t2, c2); /* c2 is core delta */
- if (retval)
- return retval;
/* calculate package delta only for 1st core in package */
if (is_cpu_first_core_in_package(t, c, p))
- retval = delta_package(p, p2);
+ retval |= delta_package(p, p2);
return retval;
}
@@ -3489,6 +3670,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
t->instr_count = 0;
t->irq_count = 0;
+ t->nmi_count = 0;
t->smi_count = 0;
c->c3 = 0;
@@ -3580,7 +3762,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
/* remember first tv_begin */
if (average.threads.tv_begin.tv_sec == 0)
- average.threads.tv_begin = t->tv_begin;
+ average.threads.tv_begin = procsysfs_tv_begin;
/* remember last tv_end */
average.threads.tv_end = t->tv_end;
@@ -3593,6 +3775,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
average.threads.instr_count += t->instr_count;
average.threads.irq_count += t->irq_count;
+ average.threads.nmi_count += t->nmi_count;
average.threads.smi_count += t->smi_count;
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
@@ -3734,6 +3917,9 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
if (average.threads.irq_count > 9999999)
sums_need_wide_columns = 1;
+ if (average.threads.nmi_count > 9999999)
+ sums_need_wide_columns = 1;
+
average.cores.c3 /= topo.allowed_cores;
average.cores.c6 /= topo.allowed_cores;
@@ -4546,7 +4732,8 @@ unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb)
unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id)
{
- assert(domain_id < ppmt->num_domains);
+ if (domain_id >= ppmt->num_domains)
+ return 0;
const unsigned long *pmmio = ppmt->domains[domain_id].pcounter;
const unsigned long value = pmmio ? *pmmio : 0;
@@ -4590,6 +4777,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (DO_BIC(BIC_IRQ))
t->irq_count = irqs_per_cpu[cpu];
+ if (DO_BIC(BIC_NMI))
+ t->nmi_count = nmi_per_cpu[cpu];
get_cstate_counters(cpu, t, c, p);
@@ -5335,6 +5524,7 @@ void free_all_buffers(void)
free(irq_column_2_cpu);
free(irqs_per_cpu);
+ free(nmi_per_cpu);
for (i = 0; i <= topo.max_cpu_num; ++i) {
if (cpus[i].put_ids)
@@ -5566,6 +5756,8 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
{
int retval, pkg_no, node_no, core_no, thread_no;
+ retval = 0;
+
for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
@@ -5587,14 +5779,12 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
p = GET_PKG(pkg_base, pkg_no);
p2 = GET_PKG(pkg_base2, pkg_no);
- retval = func(t, c, p, t2, c2, p2);
- if (retval)
- return retval;
+ retval |= func(t, c, p, t2, c2, p2);
}
}
}
}
- return 0;
+ return retval;
}
/*
@@ -5791,31 +5981,37 @@ int snapshot_proc_interrupts(void)
irq_column_2_cpu[column] = cpu_number;
irqs_per_cpu[cpu_number] = 0;
+ nmi_per_cpu[cpu_number] = 0;
}
/* read /proc/interrupt count lines and sum up irqs per cpu */
while (1) {
int column;
char buf[64];
+ int this_row_is_nmi = 0;
- retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */
+ retval = fscanf(fp, " %s:", buf); /* irq# "N:" */
if (retval != 1)
break;
+ if (strncmp(buf, "NMI", strlen("NMI")) == 0)
+ this_row_is_nmi = 1;
+
/* read the count per cpu */
for (column = 0; column < topo.num_cpus; ++column) {
int cpu_number, irq_count;
retval = fscanf(fp, " %d", &irq_count);
+
if (retval != 1)
break;
cpu_number = irq_column_2_cpu[column];
irqs_per_cpu[cpu_number] += irq_count;
-
+ if (this_row_is_nmi)
+ nmi_per_cpu[cpu_number] += irq_count;
}
-
while (getc(fp) != '\n') ; /* flush interrupt description */
}
@@ -5912,7 +6108,9 @@ int snapshot_sys_lpi_us(void)
*/
int snapshot_proc_sysfs_files(void)
{
- if (DO_BIC(BIC_IRQ))
+ gettimeofday(&procsysfs_tv_begin, (struct timezone *)NULL);
+
+ if (DO_BIC(BIC_IRQ) || DO_BIC(BIC_NMI))
if (snapshot_proc_interrupts())
return 1;
@@ -7043,6 +7241,11 @@ void rapl_probe_intel(void)
else
rapl_dram_energy_units = rapl_energy_units;
+ if (platform->has_fixed_rapl_psys_unit)
+ rapl_psys_energy_units = 1.0;
+ else
+ rapl_psys_energy_units = rapl_energy_units;
+
time_unit = msr >> 16 & 0xF;
if (time_unit == 0)
time_unit = 0xA;
@@ -8233,6 +8436,7 @@ void process_cpuid()
aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1;
BIC_PRESENT(BIC_IRQ);
+ BIC_PRESENT(BIC_NMI);
BIC_PRESENT(BIC_TSC_MHz);
}
@@ -8292,6 +8496,33 @@ int dir_filter(const struct dirent *dirp)
return 0;
}
+char *possible_file = "/sys/devices/system/cpu/possible";
+char possible_buf[1024];
+
+int initialize_cpu_possible_set(void)
+{
+ FILE *fp;
+
+ fp = fopen(possible_file, "r");
+ if (!fp) {
+ warn("open %s", possible_file);
+ return -1;
+ }
+ if (fread(possible_buf, sizeof(char), 1024, fp) == 0) {
+ warn("read %s", possible_file);
+ goto err;
+ }
+ if (parse_cpu_str(possible_buf, cpu_possible_set, cpu_possible_setsize)) {
+ warnx("%s: cpu str malformat %s\n", possible_file, cpu_effective_str);
+ goto err;
+ }
+ return 0;
+
+err:
+ fclose(fp);
+ return -1;
+}
+
void topology_probe(bool startup)
{
int i;
@@ -8324,6 +8555,16 @@ void topology_probe(bool startup)
for_all_proc_cpus(mark_cpu_present);
/*
+ * Allocate and initialize cpu_possible_set
+ */
+ cpu_possible_set = CPU_ALLOC((topo.max_cpu_num + 1));
+ if (cpu_possible_set == NULL)
+ err(3, "CPU_ALLOC");
+ cpu_possible_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+ CPU_ZERO_S(cpu_possible_setsize, cpu_possible_set);
+ initialize_cpu_possible_set();
+
+ /*
* Allocate and initialize cpu_effective_set
*/
cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1));
@@ -8583,7 +8824,11 @@ void allocate_irq_buffers(void)
irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
if (irqs_per_cpu == NULL)
- err(-1, "calloc %d", topo.max_cpu_num + 1);
+ err(-1, "calloc %d IRQ", topo.max_cpu_num + 1);
+
+ nmi_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
+ if (nmi_per_cpu == NULL)
+ err(-1, "calloc %d NMI", topo.max_cpu_num + 1);
}
int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p)
@@ -8854,49 +9099,36 @@ int parse_telem_info_file(int fd_dir, const char *info_filename, const char *for
struct pmt_mmio *pmt_mmio_open(unsigned int target_guid)
{
- DIR *dirp;
- struct dirent *entry;
+ struct pmt_diriter_t pmt_iter;
+ const struct dirent *entry;
struct stat st;
- unsigned int telem_idx;
int fd_telem_dir, fd_pmt;
unsigned long guid, size, offset;
size_t mmap_size;
void *mmio;
- struct pmt_mmio *ret = NULL;
+ struct pmt_mmio *head = NULL, *last = NULL;
+ struct pmt_mmio *new_pmt = NULL;
if (stat(SYSFS_TELEM_PATH, &st) == -1)
return NULL;
- dirp = opendir(SYSFS_TELEM_PATH);
- if (dirp == NULL)
+ pmt_diriter_init(&pmt_iter);
+ entry = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH);
+ if (!entry) {
+ pmt_diriter_remove(&pmt_iter);
return NULL;
+ }
- for (;;) {
- entry = readdir(dirp);
-
- if (entry == NULL)
- break;
-
- if (strcmp(entry->d_name, ".") == 0)
- continue;
-
- if (strcmp(entry->d_name, "..") == 0)
- continue;
-
- if (sscanf(entry->d_name, "telem%u", &telem_idx) != 1)
- continue;
-
- if (fstatat(dirfd(dirp), entry->d_name, &st, 0) == -1) {
+ for ( ; entry != NULL; entry = pmt_diriter_next(&pmt_iter)) {
+ if (fstatat(dirfd(pmt_iter.dir), entry->d_name, &st, 0) == -1)
break;
- }
if (!S_ISDIR(st.st_mode))
continue;
- fd_telem_dir = openat(dirfd(dirp), entry->d_name, O_RDONLY);
- if (fd_telem_dir == -1) {
+ fd_telem_dir = openat(dirfd(pmt_iter.dir), entry->d_name, O_RDONLY);
+ if (fd_telem_dir == -1)
break;
- }
if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) {
close(fd_telem_dir);
@@ -8924,38 +9156,54 @@ struct pmt_mmio *pmt_mmio_open(unsigned int target_guid)
if (fd_pmt == -1)
goto loop_cleanup_and_break;
- mmap_size = (size + 0x1000UL) & (~0x1000UL);
+ mmap_size = ROUND_UP_TO_PAGE_SIZE(size);
mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0);
if (mmio != MAP_FAILED) {
-
if (debug)
fprintf(stderr, "%s: 0x%lx mmaped at: %p\n", __func__, guid, mmio);
- ret = calloc(1, sizeof(*ret));
+ new_pmt = calloc(1, sizeof(*new_pmt));
- if (!ret) {
+ if (!new_pmt) {
fprintf(stderr, "%s: Failed to allocate pmt_mmio\n", __func__);
exit(1);
}
- ret->guid = guid;
- ret->mmio_base = mmio;
- ret->pmt_offset = offset;
- ret->size = size;
+ /*
+ * Create linked list of mmaped regions,
+ * but preserve the ordering from sysfs.
+ * Ordering is important for the user to
+ * use the seq=%u parameter when adding a counter.
+ */
+ new_pmt->guid = guid;
+ new_pmt->mmio_base = mmio;
+ new_pmt->pmt_offset = offset;
+ new_pmt->size = size;
+ new_pmt->next = pmt_mmios;
+
+ if (last)
+ last->next = new_pmt;
+ else
+ head = new_pmt;
- ret->next = pmt_mmios;
- pmt_mmios = ret;
+ last = new_pmt;
}
loop_cleanup_and_break:
close(fd_pmt);
close(fd_telem_dir);
- break;
}
- closedir(dirp);
+ pmt_diriter_remove(&pmt_iter);
- return ret;
+ /*
+ * If we found something, stick just
+ * created linked list to the front.
+ */
+ if (head)
+ pmt_mmios = head;
+
+ return head;
}
struct pmt_mmio *pmt_mmio_find(unsigned int guid)
@@ -8992,7 +9240,7 @@ void *pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offs
return ret;
}
-struct pmt_mmio *pmt_add_guid(unsigned int guid)
+struct pmt_mmio *pmt_add_guid(unsigned int guid, unsigned int seq)
{
struct pmt_mmio *ret;
@@ -9000,6 +9248,11 @@ struct pmt_mmio *pmt_add_guid(unsigned int guid)
if (!ret)
ret = pmt_mmio_open(guid);
+ while (ret && seq) {
+ ret = ret->next;
+ --seq;
+ }
+
return ret;
}
@@ -9046,7 +9299,7 @@ void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio,
pcounter->domains[domain_id].pcounter = pmmio;
}
-int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type,
+int pmt_add_counter(unsigned int guid, unsigned int seq, const char *name, enum pmt_datatype type,
unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope,
enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode)
{
@@ -9066,10 +9319,10 @@ int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type,
exit(1);
}
- mmio = pmt_add_guid(guid);
+ mmio = pmt_add_guid(guid, seq);
if (!mmio) {
if (mode != PMT_OPEN_TRY) {
- fprintf(stderr, "%s: failed to map PMT MMIO for guid %x\n", __func__, guid);
+ fprintf(stderr, "%s: failed to map PMT MMIO for guid %x, seq %u\n", __func__, guid, seq);
exit(1);
}
@@ -9124,10 +9377,68 @@ int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type,
void pmt_init(void)
{
+ int cpu_num;
+ unsigned long seq, offset, mod_num;
+
if (BIC_IS_ENABLED(BIC_Diec6)) {
- pmt_add_counter(PMT_MTL_DC6_GUID, "Die%c6", PMT_TYPE_XTAL_TIME, PMT_COUNTER_MTL_DC6_LSB,
- PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET, SCOPE_PACKAGE, FORMAT_DELTA,
- 0, PMT_OPEN_TRY);
+ pmt_add_counter(PMT_MTL_DC6_GUID, PMT_MTL_DC6_SEQ, "Die%c6", PMT_TYPE_XTAL_TIME,
+ PMT_COUNTER_MTL_DC6_LSB, PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET,
+ SCOPE_PACKAGE, FORMAT_DELTA, 0, PMT_OPEN_TRY);
+ }
+
+ if (BIC_IS_ENABLED(BIC_CPU_c1e)) {
+ seq = 0;
+ offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE;
+ mod_num = 0; /* Relative module number for current PMT file. */
+
+ /* Open the counter for each CPU. */
+ for (cpu_num = 0; cpu_num < topo.max_cpu_num;) {
+
+ if (cpu_is_not_allowed(cpu_num))
+ goto next_loop_iter;
+
+ /*
+ * Set the scope to CPU, even though CWF report the counter per module.
+ * CPUs inside the same module will read from the same location, instead of reporting zeros.
+ *
+ * CWF with newer firmware might require a PMT_TYPE_XTAL_TIME intead of PMT_TYPE_TCORE_CLOCK.
+ */
+ pmt_add_counter(PMT_CWF_MC1E_GUID, seq, "CPU%c1e", PMT_TYPE_TCORE_CLOCK,
+ PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU,
+ FORMAT_DELTA, cpu_num, PMT_OPEN_TRY);
+
+ /*
+ * Rather complex logic for each time we go to the next loop iteration,
+ * so keep it as a label.
+ */
+next_loop_iter:
+ /*
+ * Advance the cpu number and check if we should also advance offset to
+ * the next counter inside the PMT file.
+ *
+ * On Clearwater Forest platform, the counter is reported per module,
+ * so open the same counter for all of the CPUs inside the module.
+ * That way, reported table show the correct value for all of the CPUs inside the module,
+ * instead of zeros.
+ */
+ ++cpu_num;
+ if (cpu_num % PMT_COUNTER_CWF_CPUS_PER_MODULE == 0) {
+ offset += PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT;
+ ++mod_num;
+ }
+
+ /*
+ * There are PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE in each PMT file.
+ *
+ * If that number is reached, seq must be incremented to advance to the next file in a sequence.
+ * Offset inside that file and a module counter has to be reset.
+ */
+ if (mod_num == PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE) {
+ ++seq;
+ offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE;
+ mod_num = 0;
+ }
+ }
}
}
@@ -9163,6 +9474,18 @@ void turbostat_init()
}
}
+void affinitize_child(void)
+{
+ /* Prefer cpu_possible_set, if available */
+ if (sched_setaffinity(0, cpu_possible_setsize, cpu_possible_set)) {
+ warn("sched_setaffinity cpu_possible_set");
+
+ /* Otherwise, allow child to run on same cpu set as turbostat */
+ if (sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set))
+ warn("sched_setaffinity cpu_allowed_set");
+ }
+}
+
int fork_it(char **argv)
{
pid_t child_pid;
@@ -9178,6 +9501,7 @@ int fork_it(char **argv)
child_pid = fork();
if (!child_pid) {
/* child */
+ affinitize_child();
execvp(argv[0], argv);
err(errno, "exec %s", argv[0]);
} else {
@@ -9204,10 +9528,9 @@ int fork_it(char **argv)
timersub(&tv_odd, &tv_even, &tv_delta);
if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
fprintf(outf, "%s: Counter reset detected\n", progname);
- else {
- compute_average(EVEN_COUNTERS);
- format_all_counters(EVEN_COUNTERS);
- }
+
+ compute_average(EVEN_COUNTERS);
+ format_all_counters(EVEN_COUNTERS);
fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0);
@@ -9236,7 +9559,7 @@ int get_and_dump_counters(void)
void print_version()
{
- fprintf(outf, "turbostat version 2024.11.30 - Len Brown <lenb@kernel.org>\n");
+ fprintf(outf, "turbostat version 2025.02.02 - Len Brown <lenb@kernel.org>\n");
}
#define COMMAND_LINE_SIZE 2048
@@ -9561,7 +9884,7 @@ next:
}
if ((msr_num == 0) && (path == NULL) && (perf_device[0] == '\0' || perf_event[0] == '\0')) {
- fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event ) required\n");
+ fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event) required\n");
fail++;
}
@@ -9599,15 +9922,100 @@ bool starts_with(const char *str, const char *prefix)
return strncmp(prefix, str, strlen(prefix)) == 0;
}
+int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigned int *out_seq)
+{
+ struct pmt_diriter_t pmt_iter;
+ const struct dirent *dirname;
+ struct stat stat, target_stat;
+ int fd_telem_dir = -1;
+ int fd_target_dir;
+ unsigned int seq = 0;
+ unsigned long guid, target_guid;
+ int ret = -1;
+
+ fd_target_dir = open(target_path, O_RDONLY | O_DIRECTORY);
+ if (fd_target_dir == -1) {
+ return -1;
+ }
+
+ if (fstat(fd_target_dir, &target_stat) == -1) {
+ fprintf(stderr, "%s: Failed to stat the target: %s", __func__, strerror(errno));
+ exit(1);
+ }
+
+ if (parse_telem_info_file(fd_target_dir, "guid", "%lx", &target_guid)) {
+ fprintf(stderr, "%s: Failed to parse the target guid file: %s", __func__, strerror(errno));
+ exit(1);
+ }
+
+ close(fd_target_dir);
+
+ pmt_diriter_init(&pmt_iter);
+
+ for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL;
+ dirname = pmt_diriter_next(&pmt_iter)) {
+
+ fd_telem_dir = openat(dirfd(pmt_iter.dir), dirname->d_name, O_RDONLY | O_DIRECTORY);
+ if (fd_telem_dir == -1)
+ continue;
+
+ if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) {
+ fprintf(stderr, "%s: Failed to parse the guid file: %s", __func__, strerror(errno));
+ continue;
+ }
+
+ if (fstat(fd_telem_dir, &stat) == -1) {
+ fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__,
+ dirname->d_name, strerror(errno));
+ continue;
+ }
+
+ /*
+ * If reached the same directory as target, exit the loop.
+ * Seq has the correct value now.
+ */
+ if (stat.st_dev == target_stat.st_dev && stat.st_ino == target_stat.st_ino) {
+ ret = 0;
+ break;
+ }
+
+ /*
+ * If reached directory with the same guid,
+ * but it's not the target directory yet,
+ * increment seq and continue the search.
+ */
+ if (guid == target_guid)
+ ++seq;
+
+ close(fd_telem_dir);
+ fd_telem_dir = -1;
+ }
+
+ pmt_diriter_remove(&pmt_iter);
+
+ if (fd_telem_dir != -1)
+ close(fd_telem_dir);
+
+ if (!ret) {
+ *out_guid = target_guid;
+ *out_seq = seq;
+ }
+
+ return ret;
+}
+
void parse_add_command_pmt(char *add_command)
{
char *name = NULL;
char *type_name = NULL;
char *format_name = NULL;
+ char *direct_path = NULL;
+ static const char direct_path_prefix[] = "path=";
unsigned int offset;
unsigned int lsb;
unsigned int msb;
unsigned int guid;
+ unsigned int seq = 0; /* By default, pick first file in a sequence with a given GUID. */
unsigned int domain_id;
enum counter_scope scope = 0;
enum pmt_datatype type = PMT_TYPE_RAW;
@@ -9687,6 +10095,13 @@ void parse_add_command_pmt(char *add_command)
goto next;
}
+ if (sscanf(add_command, "seq=%x", &seq) == 1)
+ goto next;
+
+ if (strncmp(add_command, direct_path_prefix, strlen(direct_path_prefix)) == 0) {
+ direct_path = add_command + strlen(direct_path_prefix);
+ goto next;
+ }
next:
add_command = strchr(add_command, ',');
if (add_command) {
@@ -9737,6 +10152,11 @@ next:
has_type = true;
}
+ if (strcmp("tcore_clock", type_name) == 0) {
+ type = PMT_TYPE_TCORE_CLOCK;
+ has_type = true;
+ }
+
if (!has_type) {
printf("%s: invalid %s: %s\n", __func__, "type", type_name);
exit(1);
@@ -9758,8 +10178,24 @@ next:
exit(1);
}
+ if (direct_path && has_guid) {
+ printf("%s: path and guid+seq parameters are mutually exclusive\n"
+ "notice: passed guid=0x%x and path=%s\n", __func__, guid, direct_path);
+ exit(1);
+ }
+
+ if (direct_path) {
+ if (pmt_parse_from_path(direct_path, &guid, &seq)) {
+ printf("%s: failed to parse PMT file from %s\n", __func__, direct_path);
+ exit(1);
+ }
+
+ /* GUID was just infered from the direct path. */
+ has_guid = true;
+ }
+
if (!has_guid) {
- printf("%s: missing %s\n", __func__, "guid");
+ printf("%s: missing %s\n", __func__, "guid or path");
exit(1);
}
@@ -9773,7 +10209,7 @@ next:
exit(1);
}
- pmt_add_counter(guid, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED);
+ pmt_add_counter(guid, seq, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED);
}
void parse_add_command(char *add_command)
@@ -9921,6 +10357,7 @@ void cmdline(int argc, char **argv)
{ "Dump", no_argument, 0, 'D' },
{ "debug", no_argument, 0, 'd' }, /* internal, not documented */
{ "enable", required_argument, 0, 'e' },
+ { "force", no_argument, 0, 'f' },
{ "interval", required_argument, 0, 'i' },
{ "IPC", no_argument, 0, 'I' },
{ "num_iterations", required_argument, 0, 'n' },
@@ -9981,6 +10418,9 @@ void cmdline(int argc, char **argv)
/* --enable specified counter */
bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
break;
+ case 'f':
+ force_load++;
+ break;
case 'd':
debug++;
ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 0aa4005017c7..5158250988ce 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -91,6 +91,9 @@ LLVM_CONFIG ?= llvm-config
LLVM_OBJCOPY ?= llvm-objcopy
LLVM_STRIP ?= llvm-strip
+# Some tools require bpftool
+SYSTEM_BPFTOOL ?= bpftool
+
ifeq ($(CC_NO_CLANG), 1)
EXTRA_WARNINGS += -Wstrict-aliasing=3
@@ -136,6 +139,33 @@ else
NO_SUBDIR = :
endif
+# Beautify output
+# ---------------------------------------------------------------------------
+#
+# Most of build commands in Kbuild start with "cmd_". You can optionally define
+# "quiet_cmd_*". If defined, the short log is printed. Otherwise, no log from
+# that command is printed by default.
+#
+# e.g.)
+# quiet_cmd_depmod = DEPMOD $(MODLIB)
+# cmd_depmod = $(srctree)/scripts/depmod.sh $(DEPMOD) $(KERNELRELEASE)
+#
+# A simple variant is to prefix commands with $(Q) - that's useful
+# for commands that shall be hidden in non-verbose mode.
+#
+# $(Q)$(MAKE) $(build)=scripts/basic
+#
+# To put more focus on warnings, be less verbose as default
+# Use 'make V=1' to see the full commands
+
+ifeq ($(V),1)
+ quiet =
+ Q =
+else
+ quiet = quiet_
+ Q = @
+endif
+
# If the user is running make -s (silent mode), suppress echoing of commands
# make-4.0 (and later) keep single letter options in the 1st word of MAKEFLAGS.
ifeq ($(filter 3.%,$(MAKE_VERSION)),)
@@ -146,8 +176,11 @@ endif
ifneq ($(findstring s,$(short-opts)),)
silent=1
+ quiet=silent_
endif
+export quiet Q
+
#
# Define a callable command for descending to a new directory
#
diff --git a/tools/sound/dapm-graph b/tools/sound/dapm-graph
index f14bdfedee8f..b6196ee5065a 100755
--- a/tools/sound/dapm-graph
+++ b/tools/sound/dapm-graph
@@ -10,7 +10,7 @@ set -eu
STYLE_COMPONENT_ON="color=dodgerblue;style=bold"
STYLE_COMPONENT_OFF="color=gray40;style=filled;fillcolor=gray90"
-STYLE_NODE_ON="shape=box,style=bold,color=green4"
+STYLE_NODE_ON="shape=box,style=bold,color=green4,fillcolor=white"
STYLE_NODE_OFF="shape=box,style=filled,color=gray30,fillcolor=gray95"
# Print usage and exit
diff --git a/tools/testing/crypto/chacha20-s390/test-cipher.c b/tools/testing/crypto/chacha20-s390/test-cipher.c
index 8141d45df51a..35ea65c54ffa 100644
--- a/tools/testing/crypto/chacha20-s390/test-cipher.c
+++ b/tools/testing/crypto/chacha20-s390/test-cipher.c
@@ -66,7 +66,7 @@ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain)
}
/* Encrypt */
- chacha_init_arch(chacha_state, (u32*)key, iv);
+ chacha_init(chacha_state, (u32 *)key, iv);
start = ktime_get_ns();
chacha_crypt_arch(chacha_state, cipher, plain, data_size, 20);
@@ -81,7 +81,7 @@ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain)
pr_info("lib encryption took: %lld nsec", end - start);
/* Decrypt */
- chacha_init_arch(chacha_state, (u32 *)key, iv);
+ chacha_init(chacha_state, (u32 *)key, iv);
start = ktime_get_ns();
chacha_crypt_arch(chacha_state, revert, cipher, data_size, 20);
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index b1256fee3567..0a6572ab6f37 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -63,6 +63,7 @@ cxl_core-y += $(CXL_CORE_SRC)/pmu.o
cxl_core-y += $(CXL_CORE_SRC)/cdat.o
cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o
cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
+cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o
cxl_core-y += config_check.o
cxl_core-y += cxl_core_test.o
cxl_core-y += cxl_core_exports.o
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index d0337c11f9ee..cc8948f49117 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -725,7 +725,7 @@ static void default_mock_decoder(struct cxl_decoder *cxld)
cxld->reset = mock_decoder_reset;
}
-static int first_decoder(struct device *dev, void *data)
+static int first_decoder(struct device *dev, const void *data)
{
struct cxl_decoder *cxld;
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 347c1e7b37bd..9495dbcc03a7 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -45,6 +45,18 @@ static struct cxl_cel_entry mock_cel[] = {
.effect = CXL_CMD_EFFECT_NONE,
},
{
+ .opcode = cpu_to_le16(CXL_MBOX_OP_GET_SUPPORTED_FEATURES),
+ .effect = CXL_CMD_EFFECT_NONE,
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_GET_FEATURE),
+ .effect = CXL_CMD_EFFECT_NONE,
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_SET_FEATURE),
+ .effect = cpu_to_le16(EFFECT(CONF_CHANGE_IMMEDIATE)),
+ },
+ {
.opcode = cpu_to_le16(CXL_MBOX_OP_IDENTIFY),
.effect = CXL_CMD_EFFECT_NONE,
},
@@ -145,6 +157,10 @@ struct mock_event_store {
u32 ev_status;
};
+struct vendor_test_feat {
+ __le32 data;
+} __packed;
+
struct cxl_mockmem_data {
void *lsa;
void *fw;
@@ -161,6 +177,7 @@ struct cxl_mockmem_data {
u8 event_buf[SZ_4K];
u64 timestamp;
unsigned long sanitize_timeout;
+ struct vendor_test_feat test_feat;
};
static struct mock_event_log *event_find_log(struct device *dev, int log_type)
@@ -401,6 +418,10 @@ struct cxl_test_gen_media gen_media = {
.channel = 1,
.rank = 30,
},
+ .component_id = { 0x3, 0x74, 0xc5, 0x8, 0x9a, 0x1a, 0xb, 0xfc, 0xd2, 0x7e, 0x2f, 0x31, 0x9b, 0x3c, 0x81, 0x4d },
+ .cme_threshold_ev_flags = 3,
+ .cme_count = { 33, 0, 0 },
+ .sub_type = 0x2,
},
};
@@ -429,6 +450,11 @@ struct cxl_test_dram dram = {
.bank_group = 5,
.bank = 2,
.column = {0xDE, 0xAD},
+ .component_id = { 0x1, 0x74, 0xc5, 0x8, 0x9a, 0x1a, 0xb, 0xfc, 0xd2, 0x7e, 0x2f, 0x31, 0x9b, 0x3c, 0x81, 0x4d },
+ .sub_channel = 8,
+ .cme_threshold_ev_flags = 2,
+ .cvme_count = { 14, 0, 0 },
+ .sub_type = 0x5,
},
};
@@ -456,7 +482,10 @@ struct cxl_test_mem_module mem_module = {
.dirty_shutdown_cnt = { 0xde, 0xad, 0xbe, 0xef },
.cor_vol_err_cnt = { 0xde, 0xad, 0xbe, 0xef },
.cor_per_err_cnt = { 0xde, 0xad, 0xbe, 0xef },
- }
+ },
+ /* .validity_flags = <set below> */
+ .component_id = { 0x2, 0x74, 0xc5, 0x8, 0x9a, 0x1a, 0xb, 0xfc, 0xd2, 0x7e, 0x2f, 0x31, 0x9b, 0x3c, 0x81, 0x4d },
+ .event_sub_type = 0x3,
},
};
@@ -478,13 +507,18 @@ static int mock_set_timestamp(struct cxl_dev_state *cxlds,
static void cxl_mock_add_event_logs(struct mock_event_store *mes)
{
- put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK,
+ put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK |
+ CXL_GMER_VALID_COMPONENT | CXL_GMER_VALID_COMPONENT_ID_FORMAT,
&gen_media.rec.media_hdr.validity_flags);
put_unaligned_le16(CXL_DER_VALID_CHANNEL | CXL_DER_VALID_BANK_GROUP |
- CXL_DER_VALID_BANK | CXL_DER_VALID_COLUMN,
+ CXL_DER_VALID_BANK | CXL_DER_VALID_COLUMN | CXL_DER_VALID_SUB_CHANNEL |
+ CXL_DER_VALID_COMPONENT | CXL_DER_VALID_COMPONENT_ID_FORMAT,
&dram.rec.media_hdr.validity_flags);
+ put_unaligned_le16(CXL_MMER_VALID_COMPONENT | CXL_MMER_VALID_COMPONENT_ID_FORMAT,
+ &mem_module.rec.validity_flags);
+
mes_add_event(mes, CXL_EVENT_TYPE_INFO, &maint_needed);
mes_add_event(mes, CXL_EVENT_TYPE_INFO,
(struct cxl_event_record_raw *)&gen_media);
@@ -1337,6 +1371,151 @@ static int mock_activate_fw(struct cxl_mockmem_data *mdata,
return -EINVAL;
}
+#define CXL_VENDOR_FEATURE_TEST \
+ UUID_INIT(0xffffffff, 0xffff, 0xffff, 0xff, 0xff, 0xff, 0xff, 0xff, \
+ 0xff, 0xff, 0xff)
+
+static void fill_feature_vendor_test(struct cxl_feat_entry *feat)
+{
+ feat->uuid = CXL_VENDOR_FEATURE_TEST;
+ feat->id = 0;
+ feat->get_feat_size = cpu_to_le16(0x4);
+ feat->set_feat_size = cpu_to_le16(0x4);
+ feat->flags = cpu_to_le32(CXL_FEATURE_F_CHANGEABLE |
+ CXL_FEATURE_F_DEFAULT_SEL |
+ CXL_FEATURE_F_SAVED_SEL);
+ feat->get_feat_ver = 1;
+ feat->set_feat_ver = 1;
+ feat->effects = cpu_to_le16(CXL_CMD_CONFIG_CHANGE_COLD_RESET |
+ CXL_CMD_EFFECTS_VALID);
+}
+
+#define MAX_CXL_TEST_FEATS 1
+
+static int mock_get_test_feature(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct vendor_test_feat *output = cmd->payload_out;
+ struct cxl_mbox_get_feat_in *input = cmd->payload_in;
+ u16 offset = le16_to_cpu(input->offset);
+ u16 count = le16_to_cpu(input->count);
+ u8 *ptr;
+
+ if (offset > sizeof(*output)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ if (offset + count > sizeof(*output)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ ptr = (u8 *)&mdata->test_feat + offset;
+ memcpy((u8 *)output + offset, ptr, count);
+
+ return 0;
+}
+
+static int mock_get_feature(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_get_feat_in *input = cmd->payload_in;
+
+ if (uuid_equal(&input->uuid, &CXL_VENDOR_FEATURE_TEST))
+ return mock_get_test_feature(mdata, cmd);
+
+ cmd->return_code = CXL_MBOX_CMD_RC_UNSUPPORTED;
+
+ return -EOPNOTSUPP;
+}
+
+static int mock_set_test_feature(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_set_feat_in *input = cmd->payload_in;
+ struct vendor_test_feat *test =
+ (struct vendor_test_feat *)input->feat_data;
+ u32 action;
+
+ action = FIELD_GET(CXL_SET_FEAT_FLAG_DATA_TRANSFER_MASK,
+ le32_to_cpu(input->hdr.flags));
+ /*
+ * While it is spec compliant to support other set actions, it is not
+ * necessary to add the complication in the emulation currently. Reject
+ * anything besides full xfer.
+ */
+ if (action != CXL_SET_FEAT_FLAG_FULL_DATA_TRANSFER) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ /* Offset should be reserved when doing full transfer */
+ if (input->hdr.offset) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ memcpy(&mdata->test_feat.data, &test->data, sizeof(u32));
+
+ return 0;
+}
+
+static int mock_set_feature(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_set_feat_in *input = cmd->payload_in;
+
+ if (uuid_equal(&input->hdr.uuid, &CXL_VENDOR_FEATURE_TEST))
+ return mock_set_test_feature(mdata, cmd);
+
+ cmd->return_code = CXL_MBOX_CMD_RC_UNSUPPORTED;
+
+ return -EOPNOTSUPP;
+}
+
+static int mock_get_supported_features(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_get_sup_feats_in *in = cmd->payload_in;
+ struct cxl_mbox_get_sup_feats_out *out = cmd->payload_out;
+ struct cxl_feat_entry *feat;
+ u16 start_idx, count;
+
+ if (cmd->size_out < sizeof(*out)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_PAYLOADLEN;
+ return -EINVAL;
+ }
+
+ /*
+ * Current emulation only supports 1 feature
+ */
+ start_idx = le16_to_cpu(in->start_idx);
+ if (start_idx != 0) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ count = le16_to_cpu(in->count);
+ if (count < struct_size(out, ents, 0)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_PAYLOADLEN;
+ return -EINVAL;
+ }
+
+ out->supported_feats = cpu_to_le16(MAX_CXL_TEST_FEATS);
+ cmd->return_code = 0;
+ if (count < struct_size(out, ents, MAX_CXL_TEST_FEATS)) {
+ out->num_entries = 0;
+ return 0;
+ }
+
+ out->num_entries = cpu_to_le16(MAX_CXL_TEST_FEATS);
+ feat = out->ents;
+ fill_feature_vendor_test(feat);
+
+ return 0;
+}
+
static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox,
struct cxl_mbox_cmd *cmd)
{
@@ -1422,6 +1601,15 @@ static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox,
case CXL_MBOX_OP_ACTIVATE_FW:
rc = mock_activate_fw(mdata, cmd);
break;
+ case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
+ rc = mock_get_supported_features(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_GET_FEATURE:
+ rc = mock_get_feature(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_SET_FEATURE:
+ rc = mock_set_feature(mdata, cmd);
+ break;
default:
break;
}
@@ -1469,6 +1657,11 @@ static int cxl_mock_mailbox_create(struct cxl_dev_state *cxlds)
return 0;
}
+static void cxl_mock_test_feat_init(struct cxl_mockmem_data *mdata)
+{
+ mdata->test_feat.data = cpu_to_le32(0xdeadbeef);
+}
+
static int cxl_mock_mem_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
@@ -1541,6 +1734,10 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (rc)
return rc;
+ rc = devm_cxl_setup_features(cxlds);
+ if (rc)
+ dev_dbg(dev, "No CXL Features discovered\n");
+
cxl_mock_add_event_logs(&mdata->mes);
cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds);
@@ -1555,7 +1752,12 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (rc)
return rc;
+ rc = devm_cxl_setup_fwctl(cxlmd);
+ if (rc)
+ dev_dbg(dev, "No CXL FWCTL setup\n");
+
cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL);
+ cxl_mock_test_feat_init(mdata);
return 0;
}
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 450c7566c33f..af2594e4f35d 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -228,16 +228,16 @@ int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, "CXL");
-int __wrap_cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port,
+int __wrap_cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds,
struct cxl_endpoint_dvsec_info *info)
{
int rc = 0, index;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
- if (ops && ops->is_mock_dev(dev))
+ if (ops && ops->is_mock_dev(cxlds->dev))
rc = 0;
else
- rc = cxl_dvsec_rr_decode(dev, port, info);
+ rc = cxl_dvsec_rr_decode(cxlds, info);
put_cxl_mock_ops(index);
return rc;
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 8c8da966c641..a5f7fdd0c1fb 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -4303,6 +4303,14 @@ if (defined($opt{"LOG_FILE"})) {
if ($opt{"CLEAR_LOG"}) {
unlink $opt{"LOG_FILE"};
}
+
+ if (! -e $opt{"LOG_FILE"} && $opt{"LOG_FILE"} =~ m,^(.*/),) {
+ my $dir = $1;
+ if (! -d $dir) {
+ mkpath($dir) or die "Failed to create directories '$dir': $!";
+ print "\nThe log directory $dir did not exist, so it was created.\n";
+ }
+ }
open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
LOG->autoflush(1);
}
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index b3b00269a52a..cdd9782f9646 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -38,12 +38,11 @@ CONFIG_IWLWIFI=y
CONFIG_DAMON=y
CONFIG_DAMON_VADDR=y
CONFIG_DAMON_PADDR=y
-CONFIG_DEBUG_FS=y
-CONFIG_DAMON_DBGFS=y
-CONFIG_DAMON_DBGFS_DEPRECATED=y
CONFIG_REGMAP_BUILD=y
+CONFIG_AUDIT=y
+
CONFIG_SECURITY=y
CONFIG_SECURITY_APPARMOR=y
CONFIG_SECURITY_LANDLOCK=y
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index d30f90eae9a4..d3f39bc1ceec 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -72,8 +72,8 @@ class LinuxSourceTreeOperations:
raise ConfigError(e.output.decode())
def make(self, jobs: int, build_dir: str, make_options: Optional[List[str]]) -> None:
- command = ['make', 'all', 'compile_commands.json', 'ARCH=' + self._linux_arch,
- 'O=' + build_dir, '--jobs=' + str(jobs)]
+ command = ['make', 'all', 'compile_commands.json', 'scripts_gdb',
+ 'ARCH=' + self._linux_arch, 'O=' + build_dir, '--jobs=' + str(jobs)]
if make_options:
command.extend(make_options)
if self._cross_compile:
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index 29fc27e8949b..da53a709773a 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -759,7 +759,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest:
# If parsing the main/top-level test, parse KTAP version line and
# test plan
test.name = "main"
- ktap_line = parse_ktap_header(lines, test, printer)
+ parse_ktap_header(lines, test, printer)
test.log.extend(parse_diagnostic(lines))
parse_test_plan(lines, test)
parent_test = True
@@ -768,13 +768,12 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest:
# the KTAP version line and/or subtest header line
ktap_line = parse_ktap_header(lines, test, printer)
subtest_line = parse_test_header(lines, test)
+ test.log.extend(parse_diagnostic(lines))
+ parse_test_plan(lines, test)
parent_test = (ktap_line or subtest_line)
if parent_test:
- # If KTAP version line and/or subtest header is found, attempt
- # to parse test plan and print test header
- test.log.extend(parse_diagnostic(lines))
- parse_test_plan(lines, test)
print_test_header(test, printer)
+
expected_count = test.expected_count
subtests = []
test_num = 1
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 0bcb0cc002f8..5ff4f6ffd873 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -363,6 +363,17 @@ class KUnitParserTest(unittest.TestCase):
self.print_mock.assert_any_call(StrContains(' Indented more.'))
self.noPrintCallContains('not ok 1 test1')
+ def test_parse_late_test_plan(self):
+ output = """
+ TAP version 13
+ ok 4 test4
+ 1..4
+ """
+ result = kunit_parser.parse_run_tests(output.splitlines(), stdout)
+ # Missing test results after test plan should alert a suspected test crash.
+ self.assertEqual(kunit_parser.TestStatus.TEST_CRASHED, result.status)
+ self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, crashed=1, errors=1))
+
def line_stream_from_strs(strs: Iterable[str]) -> kunit_parser.LineStream:
return kunit_parser.LineStream(enumerate(strs, start=1))
diff --git a/tools/testing/kunit/qemu_configs/sparc.py b/tools/testing/kunit/qemu_configs/sparc.py
index e975c4331a7c..256d9573b446 100644
--- a/tools/testing/kunit/qemu_configs/sparc.py
+++ b/tools/testing/kunit/qemu_configs/sparc.py
@@ -2,8 +2,9 @@ from ..qemu_config import QemuArchParams
QEMU_ARCH = QemuArchParams(linux_arch='sparc',
kconfig='''
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y''',
+CONFIG_SERIAL_SUNZILOG=y
+CONFIG_SERIAL_SUNZILOG_CONSOLE=y
+''',
qemu_arch='sparc',
kernel_path='arch/sparc/boot/zImage',
kernel_command_line='console=ttyS0 mem=256M',
diff --git a/tools/testing/kunit/qemu_configs/x86_64.py b/tools/testing/kunit/qemu_configs/x86_64.py
index dc7949076863..4a6bf4e048f5 100644
--- a/tools/testing/kunit/qemu_configs/x86_64.py
+++ b/tools/testing/kunit/qemu_configs/x86_64.py
@@ -7,4 +7,6 @@ CONFIG_SERIAL_8250_CONSOLE=y''',
qemu_arch='x86_64',
kernel_path='arch/x86/boot/bzImage',
kernel_command_line='console=ttyS0',
- extra_qemu_params=[])
+ # qboot is faster than SeaBIOS and doesn't mess up
+ # the terminal.
+ extra_qemu_params=['-bios', 'qboot.rom'])
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index cffaf2245d4f..eaff1b036989 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -227,6 +227,7 @@ static void *load_creator(void *ptr)
unsigned long index = (3 << RADIX_TREE_MAP_SHIFT) -
(1 << order);
item_insert_order(tree, index, order);
+ xa_set_mark(tree, index, XA_MARK_1);
item_delete_rcu(tree, index);
}
}
@@ -242,8 +243,11 @@ static void *load_worker(void *ptr)
rcu_register_thread();
while (!stop_iteration) {
+ unsigned long find_index = (2 << RADIX_TREE_MAP_SHIFT) + 1;
struct item *item = xa_load(ptr, index);
assert(!xa_is_internal(item));
+ item = xa_find(ptr, &find_index, index, XA_MARK_1);
+ assert(!xa_is_internal(item));
}
rcu_unregister_thread();
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 2401e973c359..2694344274bf 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -18,6 +18,7 @@ TARGETS += devices/error_logs
TARGETS += devices/probe
TARGETS += dmabuf-heaps
TARGETS += drivers/dma-buf
+TARGETS += drivers/ntsync
TARGETS += drivers/s390x/uvdevice
TARGETS += drivers/net
TARGETS += drivers/net/bonding
@@ -34,6 +35,7 @@ TARGETS += filesystems/epoll
TARGETS += filesystems/fat
TARGETS += filesystems/overlayfs
TARGETS += filesystems/statmount
+TARGETS += filesystems/mount-notify
TARGETS += firmware
TARGETS += fpu
TARGETS += ftrace
@@ -72,6 +74,7 @@ TARGETS += net/packetdrill
TARGETS += net/rds
TARGETS += net/tcp_ao
TARGETS += nsfs
+TARGETS += pci_endpoint
TARGETS += pcie_bwctrl
TARGETS += perf_events
TARGETS += pidfd
@@ -111,6 +114,7 @@ endif
TARGETS += tmpfs
TARGETS += tpm2
TARGETS += tty
+TARGETS += ublk
TARGETS += uevent
TARGETS += user_events
TARGETS += vDSO
diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c
index 348e8bef62c7..e3cec3723ffa 100644
--- a/tools/testing/selftests/arm64/fp/kernel-test.c
+++ b/tools/testing/selftests/arm64/fp/kernel-test.c
@@ -46,7 +46,6 @@ static void handle_kick_signal(int sig, siginfo_t *info, void *context)
}
static char *drivers[] = {
- "crct10dif-arm64",
"sha1-ce",
"sha224-arm64",
"sha224-arm64-neon",
diff --git a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
index 303260a6dc65..3bfcd3848432 100644
--- a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
+++ b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
@@ -227,6 +227,8 @@ static int check_child_hugetlb_memory_mapping(int mem_type, int mode, int mappin
int main(int argc, char *argv[])
{
int err;
+ void *map_ptr;
+ unsigned long map_size;
err = mte_default_setup();
if (err)
@@ -243,6 +245,15 @@ int main(int argc, char *argv[])
return KSFT_FAIL;
}
+ /* Check if MTE supports hugetlb mappings */
+ map_size = default_huge_page_size();
+ map_ptr = mmap(NULL, map_size, PROT_READ | PROT_MTE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+ if (map_ptr == MAP_FAILED)
+ ksft_exit_skip("PROT_MTE not supported with MAP_HUGETLB mappings\n");
+ else
+ munmap(map_ptr, map_size);
+
/* Set test plan */
ksft_set_plan(12);
@@ -270,13 +281,13 @@ int main(int argc, char *argv[])
"Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n");
evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
- "Check child hugetlb memory with private mapping, precise mode and mmap memory\n");
+ "Check child hugetlb memory with private mapping, sync error mode and mmap memory\n");
evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
- "Check child hugetlb memory with private mapping, precise mode and mmap memory\n");
+ "Check child hugetlb memory with private mapping, async error mode and mmap memory\n");
evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
- "Check child hugetlb memory with private mapping, precise mode and mmap/mprotect memory\n");
+ "Check child hugetlb memory with private mapping, sync error mode and mmap/mprotect memory\n");
evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
- "Check child hugetlb memory with private mapping, precise mode and mmap/mprotect memory\n");
+ "Check child hugetlb memory with private mapping, async error mode and mmap/mprotect memory\n");
mte_restore_setup();
free_hugetlb();
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 901349da680f..6d8feda27ce9 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -1,12 +1,3 @@
-bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
-bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
-kprobe_multi_bench_attach # needs CONFIG_FPROBE
-kprobe_multi_test # needs CONFIG_FPROBE
-module_attach # prog 'kprobe_multi': failed to auto-attach: -95
fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524
fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524
tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524
-fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 87551628e112..66bb50356be0 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -95,18 +95,12 @@ TEST_GEN_PROGS += test_progs-cpuv4
TEST_INST_SUBDIRS += cpuv4
endif
-TEST_GEN_FILES = test_lwt_ip_encap.bpf.o test_tc_edt.bpf.o
+TEST_GEN_FILES = test_tc_edt.bpf.o
TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c)
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
- test_xdp_redirect_multi.sh \
- test_tunnel.sh \
- test_lwt_seg6local.sh \
test_lirc_mode2.sh \
- test_xdp_vlan_mode_generic.sh \
- test_xdp_vlan_mode_native.sh \
- test_lwt_ip_encap.sh \
test_tc_tunnel.sh \
test_tc_edt.sh \
test_xdping.sh \
@@ -117,9 +111,9 @@ TEST_PROGS := test_kmod.sh \
test_xsk.sh \
test_xdp_features.sh
-TEST_PROGS_EXTENDED := with_addr.sh \
- with_tunnels.sh ima_setup.sh verify_sig_setup.sh \
- test_xdp_vlan.sh test_bpftool.py
+TEST_PROGS_EXTENDED := \
+ ima_setup.sh verify_sig_setup.sh \
+ test_bpftool.py
TEST_KMODS := bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \
bpf_test_modorder_y.ko
@@ -135,7 +129,6 @@ TEST_GEN_PROGS_EXTENDED = \
veristat \
xdp_features \
xdp_hw_metadata \
- xdp_redirect_multi \
xdp_synproxy \
xdping \
xskxceiver
@@ -184,9 +177,14 @@ ifeq ($(feature-llvm),1)
LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets
# both llvm-config and lib.mk add -D_GNU_SOURCE, which ends up as conflict
LLVM_CFLAGS += $(filter-out -D_GNU_SOURCE,$(shell $(LLVM_CONFIG) --cflags))
- LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS))
- LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS))
- LLVM_LDLIBS += -lstdc++
+ # Prefer linking statically if it's available, otherwise fallback to shared
+ ifeq ($(shell $(LLVM_CONFIG) --link-static --libs >/dev/null 2>&1 && echo static),static)
+ LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS))
+ LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS))
+ LLVM_LDLIBS += -lstdc++
+ else
+ LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-shared --libs $(LLVM_CONFIG_LIB_COMPONENTS))
+ endif
LLVM_LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags)
endif
@@ -306,6 +304,7 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf/ \
BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \
+ BPF_TARGET_ENDIAN=$(BPF_TARGET_ENDIAN) \
EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \
EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' && \
cp $(RUNQSLOWER_OUTPUT)runqslower $@
@@ -684,6 +683,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
$(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \
$(RESOLVE_BTFIDS) \
$(TRUNNER_BPFTOOL) \
+ $(OUTPUT)/veristat \
| $(TRUNNER_BINARY)-extras
$$(call msg,BINARY,,$$@)
$(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LDFLAGS) -o $$@
diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs
index eb6a4fea8c79..f7f9e7088bb3 100644
--- a/tools/testing/selftests/bpf/Makefile.docs
+++ b/tools/testing/selftests/bpf/Makefile.docs
@@ -7,12 +7,6 @@ INSTALL ?= install
RM ?= rm -f
RMDIR ?= rmdir --ignore-fail-on-non-empty
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
prefix ?= /usr/local
mandir ?= $(prefix)/man
man2dir = $(mandir)/man2
diff --git a/tools/testing/selftests/bpf/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/bpf_arena_spin_lock.h
new file mode 100644
index 000000000000..fb8dc0768999
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_spin_lock.h
@@ -0,0 +1,533 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#ifndef BPF_ARENA_SPIN_LOCK_H
+#define BPF_ARENA_SPIN_LOCK_H
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_atomic.h"
+
+#define arch_mcs_spin_lock_contended_label(l, label) smp_cond_load_acquire_label(l, VAL, label)
+#define arch_mcs_spin_unlock_contended(l) smp_store_release((l), 1)
+
+#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+
+#define EBUSY 16
+#define EOPNOTSUPP 95
+#define ETIMEDOUT 110
+
+#ifndef __arena
+#define __arena __attribute__((address_space(1)))
+#endif
+
+extern unsigned long CONFIG_NR_CPUS __kconfig;
+
+/*
+ * Typically, we'd just rely on the definition in vmlinux.h for qspinlock, but
+ * PowerPC overrides the definition to define lock->val as u32 instead of
+ * atomic_t, leading to compilation errors. Import a local definition below so
+ * that we don't depend on the vmlinux.h version.
+ */
+
+struct __qspinlock {
+ union {
+ atomic_t val;
+ struct {
+ u8 locked;
+ u8 pending;
+ };
+ struct {
+ u16 locked_pending;
+ u16 tail;
+ };
+ };
+};
+
+#define arena_spinlock_t struct __qspinlock
+/* FIXME: Using typedef causes CO-RE relocation error */
+/* typedef struct qspinlock arena_spinlock_t; */
+
+struct arena_mcs_spinlock {
+ struct arena_mcs_spinlock __arena *next;
+ int locked;
+ int count;
+};
+
+struct arena_qnode {
+ struct arena_mcs_spinlock mcs;
+};
+
+#define _Q_MAX_NODES 4
+#define _Q_PENDING_LOOPS 1
+
+/*
+ * Bitfields in the atomic value:
+ *
+ * 0- 7: locked byte
+ * 8: pending
+ * 9-15: not used
+ * 16-17: tail index
+ * 18-31: tail cpu (+1)
+ */
+#define _Q_MAX_CPUS 1024
+
+#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\
+ << _Q_ ## type ## _OFFSET)
+#define _Q_LOCKED_OFFSET 0
+#define _Q_LOCKED_BITS 8
+#define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED)
+
+#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
+#define _Q_PENDING_BITS 8
+#define _Q_PENDING_MASK _Q_SET_MASK(PENDING)
+
+#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS)
+#define _Q_TAIL_IDX_BITS 2
+#define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX)
+
+#define _Q_TAIL_CPU_OFFSET (_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS)
+#define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET)
+#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU)
+
+#define _Q_TAIL_OFFSET _Q_TAIL_IDX_OFFSET
+#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK)
+
+#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET)
+#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET)
+
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+
+struct arena_qnode __arena qnodes[_Q_MAX_CPUS][_Q_MAX_NODES];
+
+static inline u32 encode_tail(int cpu, int idx)
+{
+ u32 tail;
+
+ tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
+ tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
+
+ return tail;
+}
+
+static inline struct arena_mcs_spinlock __arena *decode_tail(u32 tail)
+{
+ u32 cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
+ u32 idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
+
+ return &qnodes[cpu][idx].mcs;
+}
+
+static inline
+struct arena_mcs_spinlock __arena *grab_mcs_node(struct arena_mcs_spinlock __arena *base, int idx)
+{
+ return &((struct arena_qnode __arena *)base + idx)->mcs;
+}
+
+#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
+
+/**
+ * xchg_tail - Put in the new queue tail code word & retrieve previous one
+ * @lock : Pointer to queued spinlock structure
+ * @tail : The new queue tail code word
+ * Return: The previous queue tail code word
+ *
+ * xchg(lock, tail)
+ *
+ * p,*,* -> n,*,* ; prev = xchg(lock, node)
+ */
+static __always_inline u32 xchg_tail(arena_spinlock_t __arena *lock, u32 tail)
+{
+ u32 old, new;
+
+ old = atomic_read(&lock->val);
+ do {
+ new = (old & _Q_LOCKED_PENDING_MASK) | tail;
+ /*
+ * We can use relaxed semantics since the caller ensures that
+ * the MCS node is properly initialized before updating the
+ * tail.
+ */
+ /* These loops are not expected to stall, but we still need to
+ * prove to the verifier they will terminate eventually.
+ */
+ cond_break_label(out);
+ } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));
+
+ return old;
+out:
+ bpf_printk("RUNTIME ERROR: %s unexpected cond_break exit!!!", __func__);
+ return old;
+}
+
+/**
+ * clear_pending - clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,* -> *,0,*
+ */
+static __always_inline void clear_pending(arena_spinlock_t __arena *lock)
+{
+ WRITE_ONCE(lock->pending, 0);
+}
+
+/**
+ * clear_pending_set_locked - take ownership and clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,0 -> *,0,1
+ *
+ * Lock stealing is not allowed if this function is used.
+ */
+static __always_inline void clear_pending_set_locked(arena_spinlock_t __arena *lock)
+{
+ WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
+}
+
+/**
+ * set_locked - Set the lock bit and own the lock
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,*,0 -> *,0,1
+ */
+static __always_inline void set_locked(arena_spinlock_t __arena *lock)
+{
+ WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
+}
+
+static __always_inline
+u32 arena_fetch_set_pending_acquire(arena_spinlock_t __arena *lock)
+{
+ u32 old, new;
+
+ old = atomic_read(&lock->val);
+ do {
+ new = old | _Q_PENDING_VAL;
+ /*
+ * These loops are not expected to stall, but we still need to
+ * prove to the verifier they will terminate eventually.
+ */
+ cond_break_label(out);
+ } while (!atomic_try_cmpxchg_acquire(&lock->val, &old, new));
+
+ return old;
+out:
+ bpf_printk("RUNTIME ERROR: %s unexpected cond_break exit!!!", __func__);
+ return old;
+}
+
+/**
+ * arena_spin_trylock - try to acquire the queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ * Return: 1 if lock acquired, 0 if failed
+ */
+static __always_inline int arena_spin_trylock(arena_spinlock_t __arena *lock)
+{
+ int val = atomic_read(&lock->val);
+
+ if (unlikely(val))
+ return 0;
+
+ return likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL));
+}
+
+__noinline
+int arena_spin_lock_slowpath(arena_spinlock_t __arena __arg_arena *lock, u32 val)
+{
+ struct arena_mcs_spinlock __arena *prev, *next, *node0, *node;
+ int ret = -ETIMEDOUT;
+ u32 old, tail;
+ int idx;
+
+ /*
+ * Wait for in-progress pending->locked hand-overs with a bounded
+ * number of spins so that we guarantee forward progress.
+ *
+ * 0,1,0 -> 0,0,1
+ */
+ if (val == _Q_PENDING_VAL) {
+ int cnt = _Q_PENDING_LOOPS;
+ val = atomic_cond_read_relaxed_label(&lock->val,
+ (VAL != _Q_PENDING_VAL) || !cnt--,
+ release_err);
+ }
+
+ /*
+ * If we observe any contention; queue.
+ */
+ if (val & ~_Q_LOCKED_MASK)
+ goto queue;
+
+ /*
+ * trylock || pending
+ *
+ * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock
+ */
+ val = arena_fetch_set_pending_acquire(lock);
+
+ /*
+ * If we observe contention, there is a concurrent locker.
+ *
+ * Undo and queue; our setting of PENDING might have made the
+ * n,0,0 -> 0,0,0 transition fail and it will now be waiting
+ * on @next to become !NULL.
+ */
+ if (unlikely(val & ~_Q_LOCKED_MASK)) {
+
+ /* Undo PENDING if we set it. */
+ if (!(val & _Q_PENDING_MASK))
+ clear_pending(lock);
+
+ goto queue;
+ }
+
+ /*
+ * We're pending, wait for the owner to go away.
+ *
+ * 0,1,1 -> *,1,0
+ *
+ * this wait loop must be a load-acquire such that we match the
+ * store-release that clears the locked bit and create lock
+ * sequentiality; this is because not all
+ * clear_pending_set_locked() implementations imply full
+ * barriers.
+ */
+ if (val & _Q_LOCKED_MASK)
+ smp_cond_load_acquire_label(&lock->locked, !VAL, release_err);
+
+ /*
+ * take ownership and clear the pending bit.
+ *
+ * 0,1,0 -> 0,0,1
+ */
+ clear_pending_set_locked(lock);
+ return 0;
+
+ /*
+ * End of pending bit optimistic spinning and beginning of MCS
+ * queuing.
+ */
+queue:
+ node0 = &(qnodes[bpf_get_smp_processor_id()])[0].mcs;
+ idx = node0->count++;
+ tail = encode_tail(bpf_get_smp_processor_id(), idx);
+
+ /*
+ * 4 nodes are allocated based on the assumption that there will not be
+ * nested NMIs taking spinlocks. That may not be true in some
+ * architectures even though the chance of needing more than 4 nodes
+ * will still be extremely unlikely. When that happens, we simply return
+ * an error. Original qspinlock has a trylock fallback in this case.
+ */
+ if (unlikely(idx >= _Q_MAX_NODES)) {
+ ret = -EBUSY;
+ goto release_node_err;
+ }
+
+ node = grab_mcs_node(node0, idx);
+
+ /*
+ * Ensure that we increment the head node->count before initialising
+ * the actual node. If the compiler is kind enough to reorder these
+ * stores, then an IRQ could overwrite our assignments.
+ */
+ barrier();
+
+ node->locked = 0;
+ node->next = NULL;
+
+ /*
+ * We touched a (possibly) cold cacheline in the per-cpu queue node;
+ * attempt the trylock once more in the hope someone let go while we
+ * weren't watching.
+ */
+ if (arena_spin_trylock(lock))
+ goto release;
+
+ /*
+ * Ensure that the initialisation of @node is complete before we
+ * publish the updated tail via xchg_tail() and potentially link
+ * @node into the waitqueue via WRITE_ONCE(prev->next, node) below.
+ */
+ smp_wmb();
+
+ /*
+ * Publish the updated tail.
+ * We have already touched the queueing cacheline; don't bother with
+ * pending stuff.
+ *
+ * p,*,* -> n,*,*
+ */
+ old = xchg_tail(lock, tail);
+ next = NULL;
+
+ /*
+ * if there was a previous node; link it and wait until reaching the
+ * head of the waitqueue.
+ */
+ if (old & _Q_TAIL_MASK) {
+ prev = decode_tail(old);
+
+ /* Link @node into the waitqueue. */
+ WRITE_ONCE(prev->next, node);
+
+ arch_mcs_spin_lock_contended_label(&node->locked, release_node_err);
+
+ /*
+ * While waiting for the MCS lock, the next pointer may have
+ * been set by another lock waiter. We cannot prefetch here
+ * due to lack of equivalent instruction in BPF ISA.
+ */
+ next = READ_ONCE(node->next);
+ }
+
+ /*
+ * we're at the head of the waitqueue, wait for the owner & pending to
+ * go away.
+ *
+ * *,x,y -> *,0,0
+ *
+ * this wait loop must use a load-acquire such that we match the
+ * store-release that clears the locked bit and create lock
+ * sequentiality; this is because the set_locked() function below
+ * does not imply a full barrier.
+ */
+ val = atomic_cond_read_acquire_label(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK),
+ release_node_err);
+
+ /*
+ * claim the lock:
+ *
+ * n,0,0 -> 0,0,1 : lock, uncontended
+ * *,*,0 -> *,*,1 : lock, contended
+ *
+ * If the queue head is the only one in the queue (lock value == tail)
+ * and nobody is pending, clear the tail code and grab the lock.
+ * Otherwise, we only need to grab the lock.
+ */
+
+ /*
+ * In the PV case we might already have _Q_LOCKED_VAL set, because
+ * of lock stealing; therefore we must also allow:
+ *
+ * n,0,1 -> 0,0,1
+ *
+ * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the
+ * above wait condition, therefore any concurrent setting of
+ * PENDING will make the uncontended transition fail.
+ */
+ if ((val & _Q_TAIL_MASK) == tail) {
+ if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL))
+ goto release; /* No contention */
+ }
+
+ /*
+ * Either somebody is queued behind us or _Q_PENDING_VAL got set
+ * which will then detect the remaining tail and queue behind us
+ * ensuring we'll see a @next.
+ */
+ set_locked(lock);
+
+ /*
+ * contended path; wait for next if not observed yet, release.
+ */
+ if (!next)
+ next = smp_cond_load_relaxed_label(&node->next, (VAL), release_node_err);
+
+ arch_mcs_spin_unlock_contended(&next->locked);
+
+release:;
+ /*
+ * release the node
+ *
+ * Doing a normal dec vs this_cpu_dec is fine. An upper context always
+ * decrements count it incremented before returning, thus we're fine.
+ * For contexts interrupting us, they either observe our dec or not.
+ * Just ensure the compiler doesn't reorder this statement, as a
+ * this_cpu_dec implicitly implied that.
+ */
+ barrier();
+ node0->count--;
+ return 0;
+release_node_err:
+ barrier();
+ node0->count--;
+ goto release_err;
+release_err:
+ return ret;
+}
+
+/**
+ * arena_spin_lock - acquire a queued spinlock
+ * @lock: Pointer to queued spinlock structure
+ *
+ * On error, returned value will be negative.
+ * On success, zero is returned.
+ *
+ * The return value _must_ be tested against zero for success,
+ * instead of checking it against negative, for passing the
+ * BPF verifier.
+ *
+ * The user should do:
+ * if (arena_spin_lock(...) != 0) // failure
+ * or
+ * if (arena_spin_lock(...) == 0) // success
+ * or
+ * if (arena_spin_lock(...)) // failure
+ * or
+ * if (!arena_spin_lock(...)) // success
+ * instead of:
+ * if (arena_spin_lock(...) < 0) // failure
+ *
+ * The return value can still be inspected later.
+ */
+static __always_inline int arena_spin_lock(arena_spinlock_t __arena *lock)
+{
+ int val = 0;
+
+ if (CONFIG_NR_CPUS > 1024)
+ return -EOPNOTSUPP;
+
+ bpf_preempt_disable();
+ if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
+ return 0;
+
+ val = arena_spin_lock_slowpath(lock, val);
+ /* FIXME: bpf_assert_range(-MAX_ERRNO, 0) once we have it working for all cases. */
+ if (val)
+ bpf_preempt_enable();
+ return val;
+}
+
+/**
+ * arena_spin_unlock - release a queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ */
+static __always_inline void arena_spin_unlock(arena_spinlock_t __arena *lock)
+{
+ /*
+ * unlock() needs release semantics:
+ */
+ smp_store_release(&lock->locked, 0);
+ bpf_preempt_enable();
+}
+
+#define arena_spin_lock_irqsave(lock, flags) \
+ ({ \
+ int __ret; \
+ bpf_local_irq_save(&(flags)); \
+ __ret = arena_spin_lock((lock)); \
+ if (__ret) \
+ bpf_local_irq_restore(&(flags)); \
+ (__ret); \
+ })
+
+#define arena_spin_unlock_irqrestore(lock, flags) \
+ ({ \
+ arena_spin_unlock((lock)); \
+ bpf_local_irq_restore(&(flags)); \
+ })
+
+#endif
+
+#endif /* BPF_ARENA_SPIN_LOCK_H */
diff --git a/tools/testing/selftests/bpf/bpf_atomic.h b/tools/testing/selftests/bpf/bpf_atomic.h
new file mode 100644
index 000000000000..a9674e544322
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_atomic.h
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#ifndef BPF_ATOMIC_H
+#define BPF_ATOMIC_H
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+
+extern bool CONFIG_X86_64 __kconfig __weak;
+
+/*
+ * __unqual_typeof(x) - Declare an unqualified scalar type, leaving
+ * non-scalar types unchanged,
+ *
+ * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char'
+ * is not type-compatible with 'signed char', and we define a separate case.
+ *
+ * This is copied verbatim from kernel's include/linux/compiler_types.h, but
+ * with default expression (for pointers) changed from (x) to (typeof(x)0).
+ *
+ * This is because LLVM has a bug where for lvalue (x), it does not get rid of
+ * an extra address_space qualifier, but does in case of rvalue (typeof(x)0).
+ * Hence, for pointers, we need to create an rvalue expression to get the
+ * desired type. See https://github.com/llvm/llvm-project/issues/53400.
+ */
+#define __scalar_type_to_expr_cases(type) \
+ unsigned type : (unsigned type)0, signed type : (signed type)0
+
+#define __unqual_typeof(x) \
+ typeof(_Generic((x), \
+ char: (char)0, \
+ __scalar_type_to_expr_cases(char), \
+ __scalar_type_to_expr_cases(short), \
+ __scalar_type_to_expr_cases(int), \
+ __scalar_type_to_expr_cases(long), \
+ __scalar_type_to_expr_cases(long long), \
+ default: (typeof(x))0))
+
+/* No-op for BPF */
+#define cpu_relax() ({})
+
+#define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *)&(x)) = (val))
+
+#define cmpxchg(p, old, new) __sync_val_compare_and_swap((p), old, new)
+
+#define try_cmpxchg(p, pold, new) \
+ ({ \
+ __unqual_typeof(*(pold)) __o = *(pold); \
+ __unqual_typeof(*(p)) __r = cmpxchg(p, __o, new); \
+ if (__r != __o) \
+ *(pold) = __r; \
+ __r == __o; \
+ })
+
+#define try_cmpxchg_relaxed(p, pold, new) try_cmpxchg(p, pold, new)
+
+#define try_cmpxchg_acquire(p, pold, new) try_cmpxchg(p, pold, new)
+
+#define smp_mb() \
+ ({ \
+ unsigned long __val; \
+ __sync_fetch_and_add(&__val, 0); \
+ })
+
+#define smp_rmb() \
+ ({ \
+ if (!CONFIG_X86_64) \
+ smp_mb(); \
+ else \
+ barrier(); \
+ })
+
+#define smp_wmb() \
+ ({ \
+ if (!CONFIG_X86_64) \
+ smp_mb(); \
+ else \
+ barrier(); \
+ })
+
+/* Control dependency provides LOAD->STORE, provide LOAD->LOAD */
+#define smp_acquire__after_ctrl_dep() ({ smp_rmb(); })
+
+#define smp_load_acquire(p) \
+ ({ \
+ __unqual_typeof(*(p)) __v = READ_ONCE(*(p)); \
+ if (!CONFIG_X86_64) \
+ smp_mb(); \
+ barrier(); \
+ __v; \
+ })
+
+#define smp_store_release(p, val) \
+ ({ \
+ if (!CONFIG_X86_64) \
+ smp_mb(); \
+ barrier(); \
+ WRITE_ONCE(*(p), val); \
+ })
+
+#define smp_cond_load_relaxed_label(p, cond_expr, label) \
+ ({ \
+ typeof(p) __ptr = (p); \
+ __unqual_typeof(*(p)) VAL; \
+ for (;;) { \
+ VAL = (__unqual_typeof(*(p)))READ_ONCE(*__ptr); \
+ if (cond_expr) \
+ break; \
+ cond_break_label(label); \
+ cpu_relax(); \
+ } \
+ (typeof(*(p)))VAL; \
+ })
+
+#define smp_cond_load_acquire_label(p, cond_expr, label) \
+ ({ \
+ __unqual_typeof(*p) __val = \
+ smp_cond_load_relaxed_label(p, cond_expr, label); \
+ smp_acquire__after_ctrl_dep(); \
+ (typeof(*(p)))__val; \
+ })
+
+#define atomic_read(p) READ_ONCE((p)->counter)
+
+#define atomic_cond_read_relaxed_label(p, cond_expr, label) \
+ smp_cond_load_relaxed_label(&(p)->counter, cond_expr, label)
+
+#define atomic_cond_read_acquire_label(p, cond_expr, label) \
+ smp_cond_load_acquire_label(&(p)->counter, cond_expr, label)
+
+#define atomic_try_cmpxchg_relaxed(p, pold, new) \
+ try_cmpxchg_relaxed(&(p)->counter, pold, new)
+
+#define atomic_try_cmpxchg_acquire(p, pold, new) \
+ try_cmpxchg_acquire(&(p)->counter, pold, new)
+
+#endif /* BPF_ATOMIC_H */
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index cd8ecd39c3f3..6535c8ae3c46 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -368,12 +368,12 @@ l_true: \
ret; \
})
-#define cond_break \
+#define __cond_break(expr) \
({ __label__ l_break, l_continue; \
asm volatile goto("may_goto %l[l_break]" \
:::: l_break); \
goto l_continue; \
- l_break: break; \
+ l_break: expr; \
l_continue:; \
})
#else
@@ -392,7 +392,7 @@ l_true: \
ret; \
})
-#define cond_break \
+#define __cond_break(expr) \
({ __label__ l_break, l_continue; \
asm volatile goto("1:.byte 0xe5; \
.byte 0; \
@@ -400,7 +400,7 @@ l_true: \
.short 0" \
:::: l_break); \
goto l_continue; \
- l_break: break; \
+ l_break: expr; \
l_continue:; \
})
#else
@@ -418,7 +418,7 @@ l_true: \
ret; \
})
-#define cond_break \
+#define __cond_break(expr) \
({ __label__ l_break, l_continue; \
asm volatile goto("1:.byte 0xe5; \
.byte 0; \
@@ -426,12 +426,15 @@ l_true: \
.short 0" \
:::: l_break); \
goto l_continue; \
- l_break: break; \
+ l_break: expr; \
l_continue:; \
})
#endif
#endif
+#define cond_break __cond_break(break)
+#define cond_break_label(label) __cond_break(goto label)
+
#ifndef bpf_nop_mov
#define bpf_nop_mov(var) \
asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var))
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index 2eb3483f2fb0..8215c9b3115e 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -87,4 +87,9 @@ struct dentry;
*/
extern int bpf_get_dentry_xattr(struct dentry *dentry, const char *name,
struct bpf_dynptr *value_ptr) __ksym __weak;
+
+extern int bpf_set_dentry_xattr(struct dentry *dentry, const char *name__str,
+ const struct bpf_dynptr *value_p, int flags) __ksym __weak;
+extern int bpf_remove_dentry_xattr(struct dentry *dentry, const char *name__str) __ksym __weak;
+
#endif
diff --git a/tools/testing/selftests/bpf/cap_helpers.c b/tools/testing/selftests/bpf/cap_helpers.c
index d5ac507401d7..98f840c3a38f 100644
--- a/tools/testing/selftests/bpf/cap_helpers.c
+++ b/tools/testing/selftests/bpf/cap_helpers.c
@@ -19,7 +19,7 @@ int cap_enable_effective(__u64 caps, __u64 *old_caps)
err = capget(&hdr, data);
if (err)
- return err;
+ return -errno;
if (old_caps)
*old_caps = (__u64)(data[1].effective) << 32 | data[0].effective;
@@ -32,7 +32,7 @@ int cap_enable_effective(__u64 caps, __u64 *old_caps)
data[1].effective |= cap1;
err = capset(&hdr, data);
if (err)
- return err;
+ return -errno;
return 0;
}
@@ -49,7 +49,7 @@ int cap_disable_effective(__u64 caps, __u64 *old_caps)
err = capget(&hdr, data);
if (err)
- return err;
+ return -errno;
if (old_caps)
*old_caps = (__u64)(data[1].effective) << 32 | data[0].effective;
@@ -61,7 +61,7 @@ int cap_disable_effective(__u64 caps, __u64 *old_caps)
data[1].effective &= ~cap1;
err = capset(&hdr, data);
if (err)
- return err;
+ return -errno;
return 0;
}
diff --git a/tools/testing/selftests/bpf/cap_helpers.h b/tools/testing/selftests/bpf/cap_helpers.h
index 6d163530cb0f..8dcb28557f76 100644
--- a/tools/testing/selftests/bpf/cap_helpers.h
+++ b/tools/testing/selftests/bpf/cap_helpers.h
@@ -4,6 +4,7 @@
#include <linux/types.h>
#include <linux/capability.h>
+#include <errno.h>
#ifndef CAP_PERFMON
#define CAP_PERFMON 38
diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
index 66191ae9863c..79c3ccadb962 100644
--- a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
@@ -120,11 +120,12 @@ static void validate_fetch_results(int outer_map_fd,
static void fetch_and_validate(int outer_map_fd,
struct bpf_map_batch_opts *opts,
- __u32 batch_size, bool delete_entries)
+ __u32 batch_size, bool delete_entries,
+ bool has_holes)
{
- __u32 *fetched_keys, *fetched_values, total_fetched = 0;
- __u32 batch_key = 0, fetch_count, step_size;
- int err, max_entries = OUTER_MAP_ENTRIES;
+ int err, max_entries = OUTER_MAP_ENTRIES - !!has_holes;
+ __u32 *fetched_keys, *fetched_values, total_fetched = 0, i;
+ __u32 batch_key = 0, fetch_count, step_size = batch_size;
__u32 value_size = sizeof(__u32);
/* Total entries needs to be fetched */
@@ -134,9 +135,8 @@ static void fetch_and_validate(int outer_map_fd,
"Memory allocation failed for fetched_keys or fetched_values",
"error=%s\n", strerror(errno));
- for (step_size = batch_size;
- step_size <= max_entries;
- step_size += batch_size) {
+ /* hash map may not always return full batch */
+ for (i = 0; i < OUTER_MAP_ENTRIES; i++) {
fetch_count = step_size;
err = delete_entries
? bpf_map_lookup_and_delete_batch(outer_map_fd,
@@ -155,6 +155,7 @@ static void fetch_and_validate(int outer_map_fd,
if (err && errno == ENOSPC) {
/* Fetch again with higher batch size */
total_fetched = 0;
+ step_size += batch_size;
continue;
}
@@ -184,18 +185,19 @@ static void fetch_and_validate(int outer_map_fd,
}
static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
- enum bpf_map_type inner_map_type)
+ enum bpf_map_type inner_map_type,
+ bool has_holes)
{
+ __u32 max_entries = OUTER_MAP_ENTRIES - !!has_holes;
__u32 *outer_map_keys, *inner_map_fds;
- __u32 max_entries = OUTER_MAP_ENTRIES;
LIBBPF_OPTS(bpf_map_batch_opts, opts);
__u32 value_size = sizeof(__u32);
int batch_size[2] = {5, 10};
__u32 map_index, op_index;
int outer_map_fd, ret;
- outer_map_keys = calloc(max_entries, value_size);
- inner_map_fds = calloc(max_entries, value_size);
+ outer_map_keys = calloc(OUTER_MAP_ENTRIES, value_size);
+ inner_map_fds = calloc(OUTER_MAP_ENTRIES, value_size);
CHECK((!outer_map_keys || !inner_map_fds),
"Memory allocation failed for outer_map_keys or inner_map_fds",
"error=%s\n", strerror(errno));
@@ -209,6 +211,24 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
((outer_map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
? 9 : 1000) - map_index;
+ /* This condition is only meaningful for array of maps.
+ *
+ * max_entries == OUTER_MAP_ENTRIES - 1 if it is true. Say
+ * max_entries is short for n, then outer_map_keys looks like:
+ *
+ * [n, n-1, ... 2, 1]
+ *
+ * We change it to
+ *
+ * [n, n-1, ... 2, 0]
+ *
+ * So it will leave key 1 as a hole. It will serve to test the
+ * correctness when batch on an array: a "non-exist" key might be
+ * actually allocated and returned from key iteration.
+ */
+ if (has_holes)
+ outer_map_keys[max_entries - 1]--;
+
/* batch operation - map_update */
ret = bpf_map_update_batch(outer_map_fd, outer_map_keys,
inner_map_fds, &max_entries, &opts);
@@ -219,15 +239,17 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
/* batch operation - map_lookup */
for (op_index = 0; op_index < 2; ++op_index)
fetch_and_validate(outer_map_fd, &opts,
- batch_size[op_index], false);
+ batch_size[op_index], false,
+ has_holes);
/* batch operation - map_lookup_delete */
if (outer_map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
fetch_and_validate(outer_map_fd, &opts,
- max_entries, true /*delete*/);
+ max_entries, true /*delete*/,
+ has_holes);
/* close all map fds */
- for (map_index = 0; map_index < max_entries; map_index++)
+ for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++)
close(inner_map_fds[map_index]);
close(outer_map_fd);
@@ -237,16 +259,20 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
void test_map_in_map_batch_ops_array(void)
{
- _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, false);
printf("%s:PASS with inner ARRAY map\n", __func__);
- _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, false);
printf("%s:PASS with inner HASH map\n", __func__);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, true);
+ printf("%s:PASS with inner ARRAY map with holes\n", __func__);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, true);
+ printf("%s:PASS with inner HASH map with holes\n", __func__);
}
void test_map_in_map_batch_ops_hash(void)
{
- _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY, false);
printf("%s:PASS with inner ARRAY map\n", __func__);
- _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH, false);
printf("%s:PASS with inner HASH map\n", __func__);
}
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 80844a5fb1fe..72b5c174ab3b 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -446,6 +446,23 @@ char *ping_command(int family)
return "ping";
}
+int append_tid(char *str, size_t sz)
+{
+ size_t end;
+
+ if (!str)
+ return -1;
+
+ end = strlen(str);
+ if (end + 8 > sz)
+ return -1;
+
+ sprintf(&str[end], "%07d", gettid());
+ str[end + 7] = '\0';
+
+ return 0;
+}
+
int remove_netns(const char *name)
{
char *cmd;
@@ -548,6 +565,34 @@ void close_netns(struct nstoken *token)
free(token);
}
+int open_tuntap(const char *dev_name, bool need_mac)
+{
+ int err = 0;
+ struct ifreq ifr;
+ int fd = open("/dev/net/tun", O_RDWR);
+
+ if (!ASSERT_GE(fd, 0, "open(/dev/net/tun)"))
+ return -1;
+
+ ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN);
+ strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1);
+ ifr.ifr_name[IFNAMSIZ - 1] = '\0';
+
+ err = ioctl(fd, TUNSETIFF, &ifr);
+ if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
+ close(fd);
+ return -1;
+ }
+
+ err = fcntl(fd, F_SETFL, O_NONBLOCK);
+ if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
int get_socket_local_port(int sock_fd)
{
struct sockaddr_storage addr;
@@ -733,6 +778,36 @@ struct tmonitor_ctx {
int pcap_fd;
};
+static int __base_pr(const char *format, va_list args)
+{
+ return vfprintf(stdout, format, args);
+}
+
+static tm_print_fn_t __tm_pr = __base_pr;
+
+tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn)
+{
+ tm_print_fn_t old_print_fn;
+
+ old_print_fn = __atomic_exchange_n(&__tm_pr, fn, __ATOMIC_RELAXED);
+
+ return old_print_fn;
+}
+
+void tm_print(const char *format, ...)
+{
+ tm_print_fn_t print_fn;
+ va_list args;
+
+ print_fn = __atomic_load_n(&__tm_pr, __ATOMIC_RELAXED);
+ if (!print_fn)
+ return;
+
+ va_start(args, format);
+ print_fn(format, args);
+ va_end(args);
+}
+
/* Is this packet captured with a Ethernet protocol type? */
static bool is_ethernet(const u_char *packet)
{
@@ -750,7 +825,7 @@ static bool is_ethernet(const u_char *packet)
case 770: /* ARPHRD_FRAD */
case 778: /* ARPHDR_IPGRE */
case 803: /* ARPHRD_IEEE80211_RADIOTAP */
- printf("Packet captured: arphdr_type=%d\n", arphdr_type);
+ tm_print("Packet captured: arphdr_type=%d\n", arphdr_type);
return false;
}
return true;
@@ -771,12 +846,13 @@ static const char *pkt_type_str(u16 pkt_type)
return "Unknown";
}
+#define MAX_FLAGS_STRLEN 21
/* Show the information of the transport layer in the packet */
static void show_transport(const u_char *packet, u16 len, u32 ifindex,
const char *src_addr, const char *dst_addr,
u16 proto, bool ipv6, u8 pkt_type)
{
- char *ifname, _ifname[IF_NAMESIZE];
+ char *ifname, _ifname[IF_NAMESIZE], flags[MAX_FLAGS_STRLEN] = "";
const char *transport_str;
u16 src_port, dst_port;
struct udphdr *udp;
@@ -799,47 +875,39 @@ static void show_transport(const u_char *packet, u16 len, u32 ifindex,
dst_port = ntohs(tcp->dest);
transport_str = "TCP";
} else if (proto == IPPROTO_ICMP) {
- printf("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n",
- ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
- packet[0], packet[1]);
+ tm_print("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n",
+ ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
+ packet[0], packet[1]);
return;
} else if (proto == IPPROTO_ICMPV6) {
- printf("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n",
- ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
- packet[0], packet[1]);
+ tm_print("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n",
+ ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
+ packet[0], packet[1]);
return;
} else {
- printf("%-7s %-3s %s %s > %s: protocol %d\n",
- ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4",
- src_addr, dst_addr, proto);
+ tm_print("%-7s %-3s %s %s > %s: protocol %d\n",
+ ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4",
+ src_addr, dst_addr, proto);
return;
}
/* TCP or UDP*/
- flockfile(stdout);
+ if (proto == IPPROTO_TCP)
+ snprintf(flags, MAX_FLAGS_STRLEN, "%s%s%s%s",
+ tcp->fin ? ", FIN" : "",
+ tcp->syn ? ", SYN" : "",
+ tcp->rst ? ", RST" : "",
+ tcp->ack ? ", ACK" : "");
+
if (ipv6)
- printf("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d",
- ifname, pkt_type_str(pkt_type), src_addr, src_port,
- dst_addr, dst_port, transport_str, len);
+ tm_print("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n",
+ ifname, pkt_type_str(pkt_type), src_addr, src_port,
+ dst_addr, dst_port, transport_str, len, flags);
else
- printf("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d",
- ifname, pkt_type_str(pkt_type), src_addr, src_port,
- dst_addr, dst_port, transport_str, len);
-
- if (proto == IPPROTO_TCP) {
- if (tcp->fin)
- printf(", FIN");
- if (tcp->syn)
- printf(", SYN");
- if (tcp->rst)
- printf(", RST");
- if (tcp->ack)
- printf(", ACK");
- }
-
- printf("\n");
- funlockfile(stdout);
+ tm_print("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n",
+ ifname, pkt_type_str(pkt_type), src_addr, src_port,
+ dst_addr, dst_port, transport_str, len, flags);
}
static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
@@ -954,8 +1022,8 @@ static void *traffic_monitor_thread(void *arg)
ifname = _ifname;
}
- printf("%-7s %-3s Unknown network protocol type 0x%x\n",
- ifname, pkt_type_str(ptype), proto);
+ tm_print("%-7s %-3s Unknown network protocol type 0x%x\n",
+ ifname, pkt_type_str(ptype), proto);
}
}
@@ -1155,8 +1223,9 @@ void traffic_monitor_stop(struct tmonitor_ctx *ctx)
write(ctx->wake_fd, &w, sizeof(w));
pthread_join(ctx->thread, NULL);
- printf("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);
+ tm_print("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);
traffic_monitor_release(ctx);
}
+
#endif /* TRAFFIC_MONITOR */
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index ebec8a8d6f81..ef208eefd571 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -8,6 +8,7 @@
typedef __u16 __sum16;
#include <linux/if_ether.h>
#include <linux/if_packet.h>
+#include <linux/if_tun.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/ethtool.h>
@@ -17,6 +18,7 @@ typedef __u16 __sum16;
#include <netinet/udp.h>
#include <bpf/bpf_endian.h>
#include <net/if.h>
+#include <stdio.h>
#define MAGIC_VAL 0x1234
#define NUM_ITER 100000
@@ -85,6 +87,8 @@ int get_socket_local_port(int sock_fd);
int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param);
int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param);
+int open_tuntap(const char *dev_name, bool need_mac);
+
struct nstoken;
/**
* open_netns() - Switch to specified network namespace by name.
@@ -98,6 +102,18 @@ int send_recv_data(int lfd, int fd, uint32_t total_bytes);
int make_netns(const char *name);
int remove_netns(const char *name);
+/**
+ * append_tid() - Append thread ID to the given string.
+ *
+ * @str: string to extend
+ * @sz: string's size
+ *
+ * 8 characters are used to append the thread ID (7 digits + '\0')
+ *
+ * Returns -1 on errors, 0 otherwise
+ */
+int append_tid(char *str, size_t sz);
+
static __u16 csum_fold(__u32 csum)
{
csum = (csum & 0xffff) + (csum >> 16);
@@ -237,10 +253,13 @@ static inline __sum16 build_udp_v6_csum(const struct ipv6hdr *ip6h,
struct tmonitor_ctx;
+typedef int (*tm_print_fn_t)(const char *format, va_list args);
+
#ifdef TRAFFIC_MONITOR
struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
const char *subtest_name);
void traffic_monitor_stop(struct tmonitor_ctx *ctx);
+tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn);
#else
static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
const char *subtest_name)
@@ -251,6 +270,11 @@ static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns, cons
static inline void traffic_monitor_stop(struct tmonitor_ctx *ctx)
{
}
+
+static inline tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn)
+{
+ return NULL;
+}
#endif
#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 4ebd0da898f5..1d53a8561ee2 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -610,9 +610,11 @@ static int do_test_single(struct bpf_align_test *test)
.log_size = sizeof(bpf_vlog),
.log_level = 2,
);
+ const char *main_pass_start = "0: R1=ctx() R10=fp0";
const char *line_ptr;
int cur_line = -1;
int prog_len, i;
+ char *start;
int fd_prog;
int ret;
@@ -632,7 +634,13 @@ static int do_test_single(struct bpf_align_test *test)
ret = 0;
/* We make a local copy so that we can strtok() it */
strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy));
- line_ptr = strtok(bpf_vlog_copy, "\n");
+ start = strstr(bpf_vlog_copy, main_pass_start);
+ if (!start) {
+ ret = 1;
+ printf("Can't find initial line '%s'\n", main_pass_start);
+ goto out;
+ }
+ line_ptr = strtok(start, "\n");
for (i = 0; i < MAX_MATCHES; i++) {
struct bpf_reg_match m = test->matches[i];
const char *p;
@@ -682,6 +690,7 @@ static int do_test_single(struct bpf_align_test *test)
break;
}
}
+out:
if (fd_prog >= 0)
close(fd_prog);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
index 26e7c06c6cb4..d98577a6babc 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
@@ -162,6 +162,66 @@ static void test_uaf(struct arena_atomics *skel)
ASSERT_EQ(skel->arena->uaf_recovery_fails, 0, "uaf_recovery_fails");
}
+static void test_load_acquire(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ if (skel->data->skip_lacq_srel_tests) {
+ printf("%s:SKIP: ENABLE_ATOMICS_TESTS not defined, Clang doesn't support addr_space_cast, and/or JIT doesn't support load-acquire\n",
+ __func__);
+ test__skip();
+ return;
+ }
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.load_acquire);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->load_acquire8_result, 0x12,
+ "load_acquire8_result");
+ ASSERT_EQ(skel->arena->load_acquire16_result, 0x1234,
+ "load_acquire16_result");
+ ASSERT_EQ(skel->arena->load_acquire32_result, 0x12345678,
+ "load_acquire32_result");
+ ASSERT_EQ(skel->arena->load_acquire64_result, 0x1234567890abcdef,
+ "load_acquire64_result");
+}
+
+static void test_store_release(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ if (skel->data->skip_lacq_srel_tests) {
+ printf("%s:SKIP: ENABLE_ATOMICS_TESTS not defined, Clang doesn't support addr_space_cast, and/or JIT doesn't support store-release\n",
+ __func__);
+ test__skip();
+ return;
+ }
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.store_release);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->store_release8_result, 0x12,
+ "store_release8_result");
+ ASSERT_EQ(skel->arena->store_release16_result, 0x1234,
+ "store_release16_result");
+ ASSERT_EQ(skel->arena->store_release32_result, 0x12345678,
+ "store_release32_result");
+ ASSERT_EQ(skel->arena->store_release64_result, 0x1234567890abcdef,
+ "store_release64_result");
+}
+
void test_arena_atomics(void)
{
struct arena_atomics *skel;
@@ -171,7 +231,7 @@ void test_arena_atomics(void)
if (!ASSERT_OK_PTR(skel, "arena atomics skeleton open"))
return;
- if (skel->data->skip_tests) {
+ if (skel->data->skip_all_tests) {
printf("%s:SKIP:no ENABLE_ATOMICS_TESTS or no addr_space_cast support in clang",
__func__);
test__skip();
@@ -198,6 +258,10 @@ void test_arena_atomics(void)
test_xchg(skel);
if (test__start_subtest("uaf"))
test_uaf(skel);
+ if (test__start_subtest("load_acquire"))
+ test_load_acquire(skel);
+ if (test__start_subtest("store_release"))
+ test_store_release(skel);
cleanup:
arena_atomics__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
new file mode 100644
index 000000000000..7565fc7690c2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <sys/sysinfo.h>
+
+struct __qspinlock { int val; };
+typedef struct __qspinlock arena_spinlock_t;
+
+struct arena_qnode {
+ unsigned long next;
+ int count;
+ int locked;
+};
+
+#include "arena_spin_lock.skel.h"
+
+static long cpu;
+static int repeat;
+
+pthread_barrier_t barrier;
+
+static void *spin_lock_thread(void *arg)
+{
+ int err, prog_fd = *(u32 *)arg;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = repeat,
+ );
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset);
+ ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset), "cpu affinity");
+
+ err = pthread_barrier_wait(&barrier);
+ if (err != PTHREAD_BARRIER_SERIAL_THREAD && err != 0)
+ ASSERT_FALSE(true, "pthread_barrier");
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run err");
+ ASSERT_EQ((int)topts.retval, 0, "test_run retval");
+
+ pthread_exit(arg);
+}
+
+static void test_arena_spin_lock_size(int size)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct arena_spin_lock *skel;
+ pthread_t thread_id[16];
+ int prog_fd, i, err;
+ void *ret;
+
+ if (get_nprocs() < 2) {
+ test__skip();
+ return;
+ }
+
+ skel = arena_spin_lock__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_spin_lock__open_and_load"))
+ return;
+ if (skel->data->test_skip == 2) {
+ test__skip();
+ goto end;
+ }
+ skel->bss->cs_count = size;
+ skel->bss->limit = repeat * 16;
+
+ ASSERT_OK(pthread_barrier_init(&barrier, NULL, 16), "barrier init");
+
+ prog_fd = bpf_program__fd(skel->progs.prog);
+ for (i = 0; i < 16; i++) {
+ err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto end_barrier;
+ }
+
+ for (i = 0; i < 16; i++) {
+ if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join"))
+ goto end_barrier;
+ if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd"))
+ goto end_barrier;
+ }
+
+ ASSERT_EQ(skel->bss->counter, repeat * 16, "check counter value");
+
+end_barrier:
+ pthread_barrier_destroy(&barrier);
+end:
+ arena_spin_lock__destroy(skel);
+ return;
+}
+
+void test_arena_spin_lock(void)
+{
+ repeat = 1000;
+ if (test__start_subtest("arena_spin_lock_1"))
+ test_arena_spin_lock_size(1);
+ cpu = 0;
+ if (test__start_subtest("arena_spin_lock_1000"))
+ test_arena_spin_lock_size(1000);
+ cpu = 0;
+ repeat = 100;
+ if (test__start_subtest("arena_spin_lock_50000"))
+ test_arena_spin_lock_size(50000);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
index cc184e4420f6..67557cda2208 100644
--- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
@@ -6,6 +6,10 @@
#include <test_progs.h>
#include "bloom_filter_map.skel.h"
+#ifndef NUMA_NO_NODE
+#define NUMA_NO_NODE (-1)
+#endif
+
static void test_fail_cases(void)
{
LIBBPF_OPTS(bpf_map_create_opts, opts);
@@ -69,6 +73,7 @@ static void test_success_cases(void)
/* Create a map */
opts.map_flags = BPF_F_ZERO_SEED | BPF_F_NUMA_NODE;
+ opts.numa_node = NUMA_NO_NODE;
fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, &opts);
if (!ASSERT_GE(fd, 0, "bpf_map_create bloom filter success case"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 6f1bfacd7375..add4a18c33bd 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -323,19 +323,87 @@ static void test_task_pidfd(void)
static void test_task_sleepable(void)
{
struct bpf_iter_tasks *skel;
+ int pid, status, err, data_pipe[2], finish_pipe[2], c;
+ char *test_data = NULL;
+ char *test_data_long = NULL;
+ char *data[2];
+
+ if (!ASSERT_OK(pipe(data_pipe), "data_pipe") ||
+ !ASSERT_OK(pipe(finish_pipe), "finish_pipe"))
+ return;
skel = bpf_iter_tasks__open_and_load();
if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load"))
return;
+ pid = fork();
+ if (!ASSERT_GE(pid, 0, "fork"))
+ return;
+
+ if (pid == 0) {
+ /* child */
+ close(data_pipe[0]);
+ close(finish_pipe[1]);
+
+ test_data = malloc(sizeof(char) * 10);
+ strncpy(test_data, "test_data", 10);
+ test_data[9] = '\0';
+
+ test_data_long = malloc(sizeof(char) * 5000);
+ for (int i = 0; i < 5000; ++i) {
+ if (i % 2 == 0)
+ test_data_long[i] = 'b';
+ else
+ test_data_long[i] = 'a';
+ }
+ test_data_long[4999] = '\0';
+
+ data[0] = test_data;
+ data[1] = test_data_long;
+
+ write(data_pipe[1], &data, sizeof(data));
+
+ /* keep child alive until after the test */
+ err = read(finish_pipe[0], &c, 1);
+ if (err != 1)
+ exit(-1);
+
+ close(data_pipe[1]);
+ close(finish_pipe[0]);
+ _exit(0);
+ }
+
+ /* parent */
+ close(data_pipe[1]);
+ close(finish_pipe[0]);
+
+ err = read(data_pipe[0], &data, sizeof(data));
+ ASSERT_EQ(err, sizeof(data), "read_check");
+
+ skel->bss->user_ptr = data[0];
+ skel->bss->user_ptr_long = data[1];
+ skel->bss->pid = pid;
+
do_dummy_read(skel->progs.dump_task_sleepable);
ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0,
"num_expected_failure_copy_from_user_task");
ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0,
"num_success_copy_from_user_task");
+ ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task_str, 0,
+ "num_expected_failure_copy_from_user_task_str");
+ ASSERT_GT(skel->bss->num_success_copy_from_user_task_str, 0,
+ "num_success_copy_from_user_task_str");
bpf_iter_tasks__destroy(skel);
+
+ write(finish_pipe[1], &c, 1);
+ err = waitpid(pid, &status, 0);
+ ASSERT_EQ(err, pid, "waitpid");
+ ASSERT_EQ(status, 0, "zero_child_exit");
+
+ close(data_pipe[0]);
+ close(finish_pipe[1]);
}
static void test_task_stack(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index a4a1f93878d4..dbd13f8e42a7 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -72,11 +72,14 @@ static void test_bpf_nf_ct(int mode)
if (!ASSERT_OK(system(cmd), cmd))
goto end;
- srv_port = (mode == TEST_XDP) ? 5005 : 5006;
- srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", srv_port, TIMEOUT_MS);
+ srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", 0, TIMEOUT_MS);
if (!ASSERT_GE(srv_fd, 0, "start_server"))
goto end;
+ srv_port = get_socket_local_port(srv_fd);
+ if (!ASSERT_GE(srv_port, 0, "get_sock_local_port"))
+ goto end;
+
client_fd = connect_to_server(srv_fd);
if (!ASSERT_GE(client_fd, 0, "connect_to_server"))
goto end;
@@ -91,7 +94,7 @@ static void test_bpf_nf_ct(int mode)
skel->bss->saddr = peer_addr.sin_addr.s_addr;
skel->bss->sport = peer_addr.sin_port;
skel->bss->daddr = peer_addr.sin_addr.s_addr;
- skel->bss->dport = htons(srv_port);
+ skel->bss->dport = srv_port;
if (mode == TEST_XDP)
prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index e63d74ce046f..8a9ba4292109 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3866,11 +3866,11 @@ static struct btf_raw_test raw_tests[] = {
.err_str = "vlen != 0",
},
{
- .descr = "decl_tag test #8, invalid kflag",
+ .descr = "decl_tag test #8, tag with kflag",
.raw_types = {
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
- BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), 2), (-1),
+ BTF_DECL_ATTR_ENC(NAME_TBD, 2, -1),
BTF_END_RAW,
},
BTF_STR_SEC("\0local\0tag1"),
@@ -3881,8 +3881,6 @@ static struct btf_raw_test raw_tests[] = {
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 1,
- .btf_load_err = true,
- .err_str = "Invalid btf_info kind_flag",
},
{
.descr = "decl_tag test #9, var, invalid component_idx",
@@ -4207,6 +4205,23 @@ static struct btf_raw_test raw_tests[] = {
.err_str = "Type tags don't precede modifiers",
},
{
+ .descr = "type_tag test #7, tag with kflag",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ATTR_ENC(NAME_TBD, 1), /* [2] */
+ BTF_PTR_ENC(2), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
+{
.descr = "enum64 test #1, unsigned, size 8",
.raw_types = {
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index b293b8501fd6..c0a776feec23 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -126,26 +126,69 @@ done:
return err;
}
-static char *dump_buf;
-static size_t dump_buf_sz;
-static FILE *dump_buf_file;
+struct test_ctx {
+ struct btf *btf;
+ struct btf_dump *d;
+ char *dump_buf;
+ size_t dump_buf_sz;
+ FILE *dump_buf_file;
+};
-static void test_btf_dump_incremental(void)
+static void test_ctx__free(struct test_ctx *t)
{
- struct btf *btf = NULL;
- struct btf_dump *d = NULL;
- int id, err, i;
+ fclose(t->dump_buf_file);
+ free(t->dump_buf);
+ btf_dump__free(t->d);
+ btf__free(t->btf);
+}
- dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
- if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
- return;
- btf = btf__new_empty();
- if (!ASSERT_OK_PTR(btf, "new_empty"))
+static int test_ctx__init(struct test_ctx *t)
+{
+ t->dump_buf_file = open_memstream(&t->dump_buf, &t->dump_buf_sz);
+ if (!ASSERT_OK_PTR(t->dump_buf_file, "dump_memstream"))
+ return -1;
+ t->btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(t->btf, "new_empty"))
goto err_out;
- d = btf_dump__new(btf, btf_dump_printf, dump_buf_file, NULL);
- if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new"))
+ t->d = btf_dump__new(t->btf, btf_dump_printf, t->dump_buf_file, NULL);
+ if (!ASSERT_OK(libbpf_get_error(t->d), "btf_dump__new"))
goto err_out;
+ return 0;
+
+err_out:
+ test_ctx__free(t);
+ return -1;
+}
+
+static void test_ctx__dump_and_compare(struct test_ctx *t,
+ const char *expected_output,
+ const char *message)
+{
+ int i, err;
+
+ for (i = 1; i < btf__type_cnt(t->btf); i++) {
+ err = btf_dump__dump_type(t->d, i);
+ ASSERT_OK(err, "dump_type_ok");
+ }
+
+ fflush(t->dump_buf_file);
+ t->dump_buf[t->dump_buf_sz] = 0; /* some libc implementations don't do this */
+
+ ASSERT_STREQ(t->dump_buf, expected_output, message);
+}
+
+static void test_btf_dump_incremental(void)
+{
+ struct test_ctx t = {};
+ struct btf *btf;
+ int id, err;
+
+ if (test_ctx__init(&t))
+ return;
+
+ btf = t.btf;
+
/* First, generate BTF corresponding to the following C code:
*
* enum x;
@@ -182,15 +225,7 @@ static void test_btf_dump_incremental(void)
err = btf__add_field(btf, "x", 4, 0, 0);
ASSERT_OK(err, "field_ok");
- for (i = 1; i < btf__type_cnt(btf); i++) {
- err = btf_dump__dump_type(d, i);
- ASSERT_OK(err, "dump_type_ok");
- }
-
- fflush(dump_buf_file);
- dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
-
- ASSERT_STREQ(dump_buf,
+ test_ctx__dump_and_compare(&t,
"enum x;\n"
"\n"
"enum x {\n"
@@ -221,7 +256,7 @@ static void test_btf_dump_incremental(void)
* enum values don't conflict;
*
*/
- fseek(dump_buf_file, 0, SEEK_SET);
+ fseek(t.dump_buf_file, 0, SEEK_SET);
id = btf__add_struct(btf, "s", 4);
ASSERT_EQ(id, 7, "struct_id");
@@ -232,14 +267,7 @@ static void test_btf_dump_incremental(void)
err = btf__add_field(btf, "s", 6, 64, 0);
ASSERT_OK(err, "field_ok");
- for (i = 1; i < btf__type_cnt(btf); i++) {
- err = btf_dump__dump_type(d, i);
- ASSERT_OK(err, "dump_type_ok");
- }
-
- fflush(dump_buf_file);
- dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
- ASSERT_STREQ(dump_buf,
+ test_ctx__dump_and_compare(&t,
"struct s___2 {\n"
" enum x x;\n"
" enum {\n"
@@ -248,11 +276,53 @@ static void test_btf_dump_incremental(void)
" struct s s;\n"
"};\n\n" , "c_dump1");
-err_out:
- fclose(dump_buf_file);
- free(dump_buf);
- btf_dump__free(d);
- btf__free(btf);
+ test_ctx__free(&t);
+}
+
+static void test_btf_dump_type_tags(void)
+{
+ struct test_ctx t = {};
+ struct btf *btf;
+ int id, err;
+
+ if (test_ctx__init(&t))
+ return;
+
+ btf = t.btf;
+
+ /* Generate BTF corresponding to the following C code:
+ *
+ * struct s {
+ * void __attribute__((btf_type_tag(\"void_tag\"))) *p1;
+ * void __attribute__((void_attr)) *p2;
+ * };
+ *
+ */
+
+ id = btf__add_type_tag(btf, "void_tag", 0);
+ ASSERT_EQ(id, 1, "type_tag_id");
+ id = btf__add_ptr(btf, id);
+ ASSERT_EQ(id, 2, "void_ptr_id1");
+
+ id = btf__add_type_attr(btf, "void_attr", 0);
+ ASSERT_EQ(id, 3, "type_attr_id");
+ id = btf__add_ptr(btf, id);
+ ASSERT_EQ(id, 4, "void_ptr_id2");
+
+ id = btf__add_struct(btf, "s", 8);
+ ASSERT_EQ(id, 5, "struct_id");
+ err = btf__add_field(btf, "p1", 2, 0, 0);
+ ASSERT_OK(err, "field_ok1");
+ err = btf__add_field(btf, "p2", 4, 0, 0);
+ ASSERT_OK(err, "field_ok2");
+
+ test_ctx__dump_and_compare(&t,
+"struct s {\n"
+" void __attribute__((btf_type_tag(\"void_tag\"))) *p1;\n"
+" void __attribute__((void_attr)) *p2;\n"
+"};\n\n", "dump_and_compare");
+
+ test_ctx__free(&t);
}
#define STRSIZE 4096
@@ -874,6 +944,9 @@ void test_btf_dump() {
if (test__start_subtest("btf_dump: incremental"))
test_btf_dump_incremental();
+ if (test__start_subtest("btf_dump: type_tags"))
+ test_btf_dump_type_tags();
+
btf = libbpf_find_kernel_btf();
if (!ASSERT_OK_PTR(btf, "no kernel BTF found"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c
new file mode 100644
index 000000000000..d4d583872fa2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "cgroup_preorder.skel.h"
+
+static int run_getsockopt_test(int cg_parent, int cg_child, int sock_fd, bool all_preorder)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opts);
+ enum bpf_attach_type prog_c_atype, prog_c2_atype, prog_p_atype, prog_p2_atype;
+ int prog_c_fd, prog_c2_fd, prog_p_fd, prog_p2_fd;
+ struct cgroup_preorder *skel = NULL;
+ struct bpf_program *prog;
+ __u8 *result, buf;
+ socklen_t optlen;
+ int err = 0;
+
+ skel = cgroup_preorder__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load"))
+ return 0;
+
+ buf = 0x00;
+ err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+ if (!ASSERT_OK(err, "setsockopt"))
+ goto close_skel;
+
+ opts.flags = BPF_F_ALLOW_MULTI;
+ if (all_preorder)
+ opts.flags |= BPF_F_PREORDER;
+ prog = skel->progs.child;
+ prog_c_fd = bpf_program__fd(prog);
+ prog_c_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_c_fd, cg_child, prog_c_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-child"))
+ goto close_skel;
+
+ opts.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER;
+ prog = skel->progs.child_2;
+ prog_c2_fd = bpf_program__fd(prog);
+ prog_c2_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_c2_fd, cg_child, prog_c2_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-child_2"))
+ goto detach_child;
+
+ optlen = 1;
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto detach_child_2;
+
+ result = skel->bss->result;
+ if (all_preorder)
+ ASSERT_TRUE(result[0] == 1 && result[1] == 2, "child only");
+ else
+ ASSERT_TRUE(result[0] == 2 && result[1] == 1, "child only");
+
+ skel->bss->idx = 0;
+ memset(result, 0, 4);
+
+ opts.flags = BPF_F_ALLOW_MULTI;
+ if (all_preorder)
+ opts.flags |= BPF_F_PREORDER;
+ prog = skel->progs.parent;
+ prog_p_fd = bpf_program__fd(prog);
+ prog_p_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent"))
+ goto detach_child_2;
+
+ opts.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER;
+ prog = skel->progs.parent_2;
+ prog_p2_fd = bpf_program__fd(prog);
+ prog_p2_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2"))
+ goto detach_parent;
+
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto detach_parent_2;
+
+ if (all_preorder)
+ ASSERT_TRUE(result[0] == 3 && result[1] == 4 && result[2] == 1 && result[3] == 2,
+ "parent and child");
+ else
+ ASSERT_TRUE(result[0] == 4 && result[1] == 2 && result[2] == 1 && result[3] == 3,
+ "parent and child");
+
+detach_parent_2:
+ ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype),
+ "bpf_prog_detach2-parent_2");
+detach_parent:
+ ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype),
+ "bpf_prog_detach2-parent");
+detach_child_2:
+ ASSERT_OK(bpf_prog_detach2(prog_c2_fd, cg_child, prog_c2_atype),
+ "bpf_prog_detach2-child_2");
+detach_child:
+ ASSERT_OK(bpf_prog_detach2(prog_c_fd, cg_child, prog_c_atype),
+ "bpf_prog_detach2-child");
+close_skel:
+ cgroup_preorder__destroy(skel);
+ return err;
+}
+
+void test_cgroup_preorder(void)
+{
+ int cg_parent = -1, cg_child = -1, sock_fd = -1;
+
+ cg_parent = test__join_cgroup("/parent");
+ if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent"))
+ goto out;
+
+ cg_child = test__join_cgroup("/parent/child");
+ if (!ASSERT_GE(cg_child, 0, "join_cgroup /parent/child"))
+ goto out;
+
+ sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sock_fd, 0, "socket"))
+ goto out;
+
+ ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, false), "getsockopt_test_1");
+ ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, true), "getsockopt_test_2");
+
+out:
+ close(sock_fd);
+ close(cg_child);
+ close(cg_parent);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
index 64abba72ac10..37c1cc52ed98 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
@@ -10,12 +10,18 @@
static int run_test(int cgroup_fd, int server_fd, bool classid)
{
struct connect4_dropper *skel;
- int fd, err = 0;
+ int fd, err = 0, port;
skel = connect4_dropper__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open"))
return -1;
+ port = get_socket_local_port(server_fd);
+ if (!ASSERT_GE(port, 0, "get_socket_local_port"))
+ return -1;
+
+ skel->bss->port = ntohs(port);
+
skel->links.connect_v4_dropper =
bpf_program__attach_cgroup(skel->progs.connect_v4_dropper,
cgroup_fd);
@@ -48,10 +54,9 @@ void test_cgroup_v1v2(void)
{
struct network_helper_opts opts = {};
int server_fd, client_fd, cgroup_fd;
- static const int port = 60120;
/* Step 1: Check base connectivity works without any BPF. */
- server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
if (!ASSERT_GE(server_fd, 0, "server_fd"))
return;
client_fd = connect_to_fd_opts(server_fd, &opts);
@@ -66,7 +71,7 @@ void test_cgroup_v1v2(void)
cgroup_fd = test__join_cgroup("/connect_dropper");
if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd"))
return;
- server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
if (!ASSERT_GE(server_fd, 0, "server_fd")) {
close(cgroup_fd);
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c
deleted file mode 100644
index 7526de379081..000000000000
--- a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c
+++ /dev/null
@@ -1,107 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include "bpf/libbpf.h"
-#include "changes_pkt_data_freplace.skel.h"
-#include "changes_pkt_data.skel.h"
-#include <test_progs.h>
-
-static void print_verifier_log(const char *log)
-{
- if (env.verbosity >= VERBOSE_VERY)
- fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log);
-}
-
-static void test_aux(const char *main_prog_name,
- const char *to_be_replaced,
- const char *replacement,
- bool expect_load)
-{
- struct changes_pkt_data_freplace *freplace = NULL;
- struct bpf_program *freplace_prog = NULL;
- struct bpf_program *main_prog = NULL;
- LIBBPF_OPTS(bpf_object_open_opts, opts);
- struct changes_pkt_data *main = NULL;
- char log[16*1024];
- int err;
-
- opts.kernel_log_buf = log;
- opts.kernel_log_size = sizeof(log);
- if (env.verbosity >= VERBOSE_SUPER)
- opts.kernel_log_level = 1 | 2 | 4;
- main = changes_pkt_data__open_opts(&opts);
- if (!ASSERT_OK_PTR(main, "changes_pkt_data__open"))
- goto out;
- main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name);
- if (!ASSERT_OK_PTR(main_prog, "main_prog"))
- goto out;
- bpf_program__set_autoload(main_prog, true);
- err = changes_pkt_data__load(main);
- print_verifier_log(log);
- if (!ASSERT_OK(err, "changes_pkt_data__load"))
- goto out;
- freplace = changes_pkt_data_freplace__open_opts(&opts);
- if (!ASSERT_OK_PTR(freplace, "changes_pkt_data_freplace__open"))
- goto out;
- freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement);
- if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog"))
- goto out;
- bpf_program__set_autoload(freplace_prog, true);
- bpf_program__set_autoattach(freplace_prog, true);
- bpf_program__set_attach_target(freplace_prog,
- bpf_program__fd(main_prog),
- to_be_replaced);
- err = changes_pkt_data_freplace__load(freplace);
- print_verifier_log(log);
- if (expect_load) {
- ASSERT_OK(err, "changes_pkt_data_freplace__load");
- } else {
- ASSERT_ERR(err, "changes_pkt_data_freplace__load");
- ASSERT_HAS_SUBSTR(log, "Extension program changes packet data", "error log");
- }
-
-out:
- changes_pkt_data_freplace__destroy(freplace);
- changes_pkt_data__destroy(main);
-}
-
-/* There are two global subprograms in both changes_pkt_data.skel.h:
- * - one changes packet data;
- * - another does not.
- * It is ok to freplace subprograms that change packet data with those
- * that either do or do not. It is only ok to freplace subprograms
- * that do not change packet data with those that do not as well.
- * The below tests check outcomes for each combination of such freplace.
- * Also test a case when main subprogram itself is replaced and is a single
- * subprogram in a program.
- */
-void test_changes_pkt_data_freplace(void)
-{
- struct {
- const char *main;
- const char *to_be_replaced;
- bool changes;
- } mains[] = {
- { "main_with_subprogs", "changes_pkt_data", true },
- { "main_with_subprogs", "does_not_change_pkt_data", false },
- { "main_changes", "main_changes", true },
- { "main_does_not_change", "main_does_not_change", false },
- };
- struct {
- const char *func;
- bool changes;
- } replacements[] = {
- { "changes_pkt_data", true },
- { "does_not_change_pkt_data", false }
- };
- char buf[64];
-
- for (int i = 0; i < ARRAY_SIZE(mains); ++i) {
- for (int j = 0; j < ARRAY_SIZE(replacements); ++j) {
- snprintf(buf, sizeof(buf), "%s_with_%s",
- mains[i].to_be_replaced, replacements[j].func);
- if (!test__start_subtest(buf))
- continue;
- test_aux(mains[i].main, mains[i].to_be_replaced, replacements[j].func,
- mains[i].changes || !replacements[j].changes);
- }
- }
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c b/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c
new file mode 100644
index 000000000000..285f20241fe1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "compute_live_registers.skel.h"
+#include "test_progs.h"
+
+void test_compute_live_registers(void)
+{
+ RUN_TESTS(compute_live_registers);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index e10ea92c3fe2..08963c82f30b 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -85,11 +85,11 @@ static int duration = 0;
#define NESTING_ERR_CASE(name) { \
NESTING_CASE_COMMON(name), \
.fails = true, \
- .run_btfgen_fails = true, \
+ .run_btfgen_fails = true, \
}
#define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
- .a = { [2] = 1 }, \
+ .a = { [2] = 1, [3] = 11 }, \
.b = { [1] = { [2] = { [3] = 2 } } }, \
.c = { [1] = { .c = 3 } }, \
.d = { [0] = { [0] = { .d = 4 } } }, \
@@ -108,6 +108,7 @@ static int duration = 0;
.input_len = sizeof(struct core_reloc_##name), \
.output = STRUCT_TO_CHAR_PTR(core_reloc_arrays_output) { \
.a2 = 1, \
+ .a3 = 12, \
.b123 = 2, \
.c1c = 3, \
.d00d = 4, \
@@ -602,6 +603,7 @@ static const struct core_reloc_test_case test_cases[] = {
ARRAYS_ERR_CASE(arrays___err_non_array),
ARRAYS_ERR_CASE(arrays___err_wrong_val_type),
ARRAYS_ERR_CASE(arrays___err_bad_zero_sz_arr),
+ ARRAYS_ERR_CASE(arrays___err_bad_signed_arr_elem_sz),
/* enum/ptr/int handling scenarios */
PRIMITIVES_CASE(primitives),
diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c
index e58a04654238..6c45330a5ca3 100644
--- a/tools/testing/selftests/bpf/prog_tests/cpumask.c
+++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c
@@ -25,6 +25,10 @@ static const char * const cpumask_success_testcases[] = {
"test_global_mask_nested_deep_rcu",
"test_global_mask_nested_deep_array_rcu",
"test_cpumask_weight",
+ "test_refcount_null_tracking",
+ "test_populate_reject_small_mask",
+ "test_populate_reject_unaligned",
+ "test_populate",
};
static void verify_success(const char *prog_name)
@@ -78,6 +82,5 @@ void test_cpumask(void)
verify_success(cpumask_success_testcases[i]);
}
- RUN_TESTS(cpumask_success);
RUN_TESTS(cpumask_failure);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index b614a5272dfd..e29cc16124c2 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -10,6 +10,7 @@ enum test_setup_type {
SETUP_SYSCALL_SLEEP,
SETUP_SKB_PROG,
SETUP_SKB_PROG_TP,
+ SETUP_XDP_PROG,
};
static struct {
@@ -18,6 +19,8 @@ static struct {
} success_tests[] = {
{"test_read_write", SETUP_SYSCALL_SLEEP},
{"test_dynptr_data", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_copy", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_copy_xdp", SETUP_XDP_PROG},
{"test_ringbuf", SETUP_SYSCALL_SLEEP},
{"test_skb_readonly", SETUP_SKB_PROG},
{"test_dynptr_skb_data", SETUP_SKB_PROG},
@@ -120,6 +123,24 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
break;
}
+ case SETUP_XDP_PROG:
+ {
+ char data[5000];
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &data,
+ .data_size_in = sizeof(data),
+ .repeat = 1,
+ );
+
+ prog_fd = bpf_program__fd(prog);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+
+ if (!ASSERT_OK(err, "test_run"))
+ goto cleanup;
+
+ break;
+ }
}
ASSERT_EQ(skel->bss->err, 0, "err");
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c
index a1d52e73fb16..9add890c2d37 100644
--- a/tools/testing/selftests/bpf/prog_tests/fd_array.c
+++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c
@@ -83,8 +83,8 @@ static inline int bpf_prog_get_map_ids(int prog_fd, __u32 *nr_map_ids, __u32 *ma
int err;
memset(&info, 0, len);
- info.nr_map_ids = *nr_map_ids,
- info.map_ids = ptr_to_u64(map_ids),
+ info.nr_map_ids = *nr_map_ids;
+ info.map_ids = ptr_to_u64(map_ids);
err = bpf_prog_get_info_by_fd(prog_fd, &info, &len);
if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c b/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c
new file mode 100644
index 000000000000..568d3aa48a78
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "fexit_noreturns.skel.h"
+
+void test_fexit_noreturns(void)
+{
+ RUN_TESTS(fexit_noreturns);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c
index 3729fbfd3084..80b153d3ddec 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c
@@ -542,8 +542,12 @@ static void detach_program(struct bpf_flow *skel, int prog_fd)
static int set_port_drop(int pf, bool multi_port)
{
+ char dst_port[16];
+
+ snprintf(dst_port, sizeof(dst_port), "%d", CFG_PORT_INNER);
+
SYS(fail, "tc qdisc add dev lo ingress");
- SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s",
+ SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s %s %s",
"dev lo",
"parent FFFF:",
"protocol", pf == PF_INET6 ? "ipv6" : "ip",
@@ -551,6 +555,7 @@ static int set_port_drop(int pf, bool multi_port)
"flower",
"ip_proto udp",
"src_port", multi_port ? "8-10" : "9",
+ "dst_port", dst_port,
"action drop");
return 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
index 5a0b51157451..43a26ec69a8e 100644
--- a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
+++ b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
@@ -8,11 +8,12 @@
#include <unistd.h>
#include <test_progs.h>
#include "test_get_xattr.skel.h"
+#include "test_set_remove_xattr.skel.h"
#include "test_fsverity.skel.h"
static const char testfile[] = "/tmp/test_progs_fs_kfuncs";
-static void test_xattr(void)
+static void test_get_xattr(const char *name, const char *value, bool allow_access)
{
struct test_get_xattr *skel = NULL;
int fd = -1, err;
@@ -25,7 +26,7 @@ static void test_xattr(void)
close(fd);
fd = -1;
- err = setxattr(testfile, "user.kfuncs", "hello", sizeof("hello"), 0);
+ err = setxattr(testfile, name, value, strlen(value) + 1, 0);
if (err && errno == EOPNOTSUPP) {
printf("%s:SKIP:local fs doesn't support xattr (%d)\n"
"To run this test, make sure /tmp filesystem supports xattr.\n",
@@ -48,16 +49,23 @@ static void test_xattr(void)
goto out;
fd = open(testfile, O_RDONLY, 0644);
+
if (!ASSERT_GE(fd, 0, "open_file"))
goto out;
- ASSERT_EQ(skel->bss->found_xattr_from_file, 1, "found_xattr_from_file");
-
/* Trigger security_inode_getxattr */
- err = getxattr(testfile, "user.kfuncs", v, sizeof(v));
- ASSERT_EQ(err, -1, "getxattr_return");
- ASSERT_EQ(errno, EINVAL, "getxattr_errno");
- ASSERT_EQ(skel->bss->found_xattr_from_dentry, 1, "found_xattr_from_dentry");
+ err = getxattr(testfile, name, v, sizeof(v));
+
+ if (allow_access) {
+ ASSERT_EQ(err, -1, "getxattr_return");
+ ASSERT_EQ(errno, EINVAL, "getxattr_errno");
+ ASSERT_EQ(skel->bss->found_xattr_from_file, 1, "found_xattr_from_file");
+ ASSERT_EQ(skel->bss->found_xattr_from_dentry, 1, "found_xattr_from_dentry");
+ } else {
+ ASSERT_EQ(err, strlen(value) + 1, "getxattr_return");
+ ASSERT_EQ(skel->bss->found_xattr_from_file, 0, "found_xattr_from_file");
+ ASSERT_EQ(skel->bss->found_xattr_from_dentry, 0, "found_xattr_from_dentry");
+ }
out:
close(fd);
@@ -65,6 +73,127 @@ out:
remove(testfile);
}
+/* xattr value we will set to security.bpf.foo */
+static const char value_foo[] = "hello";
+
+static void read_and_validate_foo(struct test_set_remove_xattr *skel)
+{
+ char value_out[32];
+ int err;
+
+ err = getxattr(testfile, skel->rodata->xattr_foo, value_out, sizeof(value_out));
+ ASSERT_EQ(err, sizeof(value_foo), "getxattr size foo");
+ ASSERT_EQ(strncmp(value_out, value_foo, sizeof(value_foo)), 0, "strncmp value_foo");
+}
+
+static void set_foo(struct test_set_remove_xattr *skel)
+{
+ ASSERT_OK(setxattr(testfile, skel->rodata->xattr_foo, value_foo, strlen(value_foo) + 1, 0),
+ "setxattr foo");
+}
+
+static void validate_bar_match(struct test_set_remove_xattr *skel)
+{
+ char value_out[32];
+ int err;
+
+ err = getxattr(testfile, skel->rodata->xattr_bar, value_out, sizeof(value_out));
+ ASSERT_EQ(err, sizeof(skel->data->value_bar), "getxattr size bar");
+ ASSERT_EQ(strncmp(value_out, skel->data->value_bar, sizeof(skel->data->value_bar)), 0,
+ "strncmp value_bar");
+}
+
+static void validate_bar_removed(struct test_set_remove_xattr *skel)
+{
+ char value_out[32];
+ int err;
+
+ err = getxattr(testfile, skel->rodata->xattr_bar, value_out, sizeof(value_out));
+ ASSERT_LT(err, 0, "getxattr size bar should fail");
+}
+
+static void test_set_remove_xattr(void)
+{
+ struct test_set_remove_xattr *skel = NULL;
+ int fd = -1, err;
+
+ fd = open(testfile, O_CREAT | O_RDONLY, 0644);
+ if (!ASSERT_GE(fd, 0, "create_file"))
+ return;
+
+ close(fd);
+ fd = -1;
+
+ skel = test_set_remove_xattr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_set_remove_xattr__open_and_load"))
+ return;
+
+ /* Set security.bpf.foo to "hello" */
+ err = setxattr(testfile, skel->rodata->xattr_foo, value_foo, strlen(value_foo) + 1, 0);
+ if (err && errno == EOPNOTSUPP) {
+ printf("%s:SKIP:local fs doesn't support xattr (%d)\n"
+ "To run this test, make sure /tmp filesystem supports xattr.\n",
+ __func__, errno);
+ test__skip();
+ goto out;
+ }
+
+ if (!ASSERT_OK(err, "setxattr"))
+ goto out;
+
+ skel->bss->monitored_pid = getpid();
+ err = test_set_remove_xattr__attach(skel);
+ if (!ASSERT_OK(err, "test_set_remove_xattr__attach"))
+ goto out;
+
+ /* First, test not _locked version of the kfuncs with getxattr. */
+
+ /* Read security.bpf.foo and trigger test_inode_getxattr. This
+ * bpf program will set security.bpf.bar to "world".
+ */
+ read_and_validate_foo(skel);
+ validate_bar_match(skel);
+
+ /* Read security.bpf.foo and trigger test_inode_getxattr again.
+ * This will remove xattr security.bpf.bar.
+ */
+ read_and_validate_foo(skel);
+ validate_bar_removed(skel);
+
+ ASSERT_TRUE(skel->bss->set_security_bpf_bar_success, "set_security_bpf_bar_success");
+ ASSERT_TRUE(skel->bss->remove_security_bpf_bar_success, "remove_security_bpf_bar_success");
+ ASSERT_TRUE(skel->bss->set_security_selinux_fail, "set_security_selinux_fail");
+ ASSERT_TRUE(skel->bss->remove_security_selinux_fail, "remove_security_selinux_fail");
+
+ /* Second, test _locked version of the kfuncs, with setxattr */
+
+ /* Set security.bpf.foo and trigger test_inode_setxattr. This
+ * bpf program will set security.bpf.bar to "world".
+ */
+ set_foo(skel);
+ validate_bar_match(skel);
+
+ /* Set security.bpf.foo and trigger test_inode_setxattr again.
+ * This will remove xattr security.bpf.bar.
+ */
+ set_foo(skel);
+ validate_bar_removed(skel);
+
+ ASSERT_TRUE(skel->bss->locked_set_security_bpf_bar_success,
+ "locked_set_security_bpf_bar_success");
+ ASSERT_TRUE(skel->bss->locked_remove_security_bpf_bar_success,
+ "locked_remove_security_bpf_bar_success");
+ ASSERT_TRUE(skel->bss->locked_set_security_selinux_fail,
+ "locked_set_security_selinux_fail");
+ ASSERT_TRUE(skel->bss->locked_remove_security_selinux_fail,
+ "locked_remove_security_selinux_fail");
+
+out:
+ close(fd);
+ test_set_remove_xattr__destroy(skel);
+ remove(testfile);
+}
+
#ifndef SHA256_DIGEST_SIZE
#define SHA256_DIGEST_SIZE 32
#endif
@@ -141,8 +270,21 @@ out:
void test_fs_kfuncs(void)
{
- if (test__start_subtest("xattr"))
- test_xattr();
+ /* Matches xattr_names in progs/test_get_xattr.c */
+ if (test__start_subtest("user_xattr"))
+ test_get_xattr("user.kfuncs", "hello", true);
+
+ if (test__start_subtest("security_bpf_xattr"))
+ test_get_xattr("security.bpf.xxx", "hello", true);
+
+ if (test__start_subtest("security_bpf_xattr_error"))
+ test_get_xattr("security.bpf", "hello", false);
+
+ if (test__start_subtest("security_selinux_xattr_error"))
+ test_get_xattr("security.selinux", "hello", false);
+
+ if (test__start_subtest("set_remove_xattr"))
+ test_set_remove_xattr();
if (test__start_subtest("fsverity"))
test_fsverity();
diff --git a/tools/testing/selftests/bpf/prog_tests/kernel_flag.c b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
new file mode 100644
index 000000000000..a133354ac9bc
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Microsoft */
+#include <test_progs.h>
+#include "kfunc_call_test.skel.h"
+#include "kfunc_call_test.lskel.h"
+#include "test_kernel_flag.skel.h"
+
+void test_kernel_flag(void)
+{
+ struct test_kernel_flag *lsm_skel;
+ struct kfunc_call_test *skel = NULL;
+ struct kfunc_call_test_lskel *lskel = NULL;
+ int ret;
+
+ lsm_skel = test_kernel_flag__open_and_load();
+ if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel"))
+ return;
+
+ lsm_skel->bss->monitored_tid = gettid();
+
+ ret = test_kernel_flag__attach(lsm_skel);
+ if (!ASSERT_OK(ret, "test_kernel_flag__attach"))
+ goto close_prog;
+
+ /* Test with skel. This should pass the gatekeeper */
+ skel = kfunc_call_test__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto close_prog;
+
+ /* Test with lskel. This should fail due to blocking kernel-based bpf() invocations */
+ lskel = kfunc_call_test_lskel__open_and_load();
+ if (!ASSERT_ERR_PTR(lskel, "lskel"))
+ goto close_prog;
+
+close_prog:
+ if (skel)
+ kfunc_call_test__destroy(skel);
+ if (lskel)
+ kfunc_call_test_lskel__destroy(lskel);
+
+ lsm_skel->bss->monitored_tid = 0;
+ test_kernel_flag__destroy(lsm_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
index fb1eb8c67361..ccec0fcdabc1 100644
--- a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
@@ -5,7 +5,6 @@
#include <time.h>
#include <net/if.h>
-#include <linux/if_tun.h>
#include <linux/icmp.h>
#include "test_progs.h"
@@ -37,34 +36,6 @@ static inline int netns_delete(void)
return system("ip netns del " NETNS ">/dev/null 2>&1");
}
-static int open_tuntap(const char *dev_name, bool need_mac)
-{
- int err = 0;
- struct ifreq ifr;
- int fd = open("/dev/net/tun", O_RDWR);
-
- if (!ASSERT_GT(fd, 0, "open(/dev/net/tun)"))
- return -1;
-
- ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN);
- strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1);
- ifr.ifr_name[IFNAMSIZ - 1] = '\0';
-
- err = ioctl(fd, TUNSETIFF, &ifr);
- if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
- close(fd);
- return -1;
- }
-
- err = fcntl(fd, F_SETFL, O_NONBLOCK);
- if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
- close(fd);
- return -1;
- }
-
- return fd;
-}
-
#define ICMP_PAYLOAD_SIZE 100
/* Match an ICMP packet with payload len ICMP_PAYLOAD_SIZE */
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c b/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c
new file mode 100644
index 000000000000..b6391af5f6f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c
@@ -0,0 +1,540 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <netinet/in.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+
+#define BPF_FILE "test_lwt_ip_encap.bpf.o"
+
+#define NETNS_NAME_SIZE 32
+#define NETNS_BASE "ns-lwt-ip-encap"
+
+#define IP4_ADDR_1 "172.16.1.100"
+#define IP4_ADDR_2 "172.16.2.100"
+#define IP4_ADDR_3 "172.16.3.100"
+#define IP4_ADDR_4 "172.16.4.100"
+#define IP4_ADDR_5 "172.16.5.100"
+#define IP4_ADDR_6 "172.16.6.100"
+#define IP4_ADDR_7 "172.16.7.100"
+#define IP4_ADDR_8 "172.16.8.100"
+#define IP4_ADDR_GRE "172.16.16.100"
+
+#define IP4_ADDR_SRC IP4_ADDR_1
+#define IP4_ADDR_DST IP4_ADDR_4
+
+#define IP6_ADDR_1 "fb01::1"
+#define IP6_ADDR_2 "fb02::1"
+#define IP6_ADDR_3 "fb03::1"
+#define IP6_ADDR_4 "fb04::1"
+#define IP6_ADDR_5 "fb05::1"
+#define IP6_ADDR_6 "fb06::1"
+#define IP6_ADDR_7 "fb07::1"
+#define IP6_ADDR_8 "fb08::1"
+#define IP6_ADDR_GRE "fb10::1"
+
+#define IP6_ADDR_SRC IP6_ADDR_1
+#define IP6_ADDR_DST IP6_ADDR_4
+
+/* Setup/topology:
+ *
+ * NS1 NS2 NS3
+ * veth1 <---> veth2 veth3 <---> veth4 (the top route)
+ * veth5 <---> veth6 veth7 <---> veth8 (the bottom route)
+ *
+ * Each vethN gets IP[4|6]_ADDR_N address.
+ *
+ * IP*_ADDR_SRC = IP*_ADDR_1
+ * IP*_ADDR_DST = IP*_ADDR_4
+ *
+ * All tests test pings from IP*_ADDR__SRC to IP*_ADDR_DST.
+ *
+ * By default, routes are configured to allow packets to go
+ * IP*_ADDR_1 <=> IP*_ADDR_2 <=> IP*_ADDR_3 <=> IP*_ADDR_4 (the top route).
+ *
+ * A GRE device is installed in NS3 with IP*_ADDR_GRE, and
+ * NS1/NS2 are configured to route packets to IP*_ADDR_GRE via IP*_ADDR_8
+ * (the bottom route).
+ *
+ * Tests:
+ *
+ * 1. Routes NS2->IP*_ADDR_DST are brought down, so the only way a ping
+ * from IP*_ADDR_SRC to IP*_ADDR_DST can work is via IP*_ADDR_GRE.
+ *
+ * 2a. In an egress test, a bpf LWT_XMIT program is installed on veth1
+ * that encaps the packets with an IP/GRE header to route to IP*_ADDR_GRE.
+ *
+ * ping: SRC->[encap at veth1:egress]->GRE:decap->DST
+ * ping replies go DST->SRC directly
+ *
+ * 2b. In an ingress test, a bpf LWT_IN program is installed on veth2
+ * that encaps the packets with an IP/GRE header to route to IP*_ADDR_GRE.
+ *
+ * ping: SRC->[encap at veth2:ingress]->GRE:decap->DST
+ * ping replies go DST->SRC directly
+ */
+
+static int create_ns(char *name, size_t name_sz)
+{
+ if (!name)
+ goto fail;
+
+ if (!ASSERT_OK(append_tid(name, name_sz), "append TID"))
+ goto fail;
+
+ SYS(fail, "ip netns add %s", name);
+
+ /* rp_filter gets confused by what these tests are doing, so disable it */
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.conf.all.rp_filter=0", name);
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.conf.default.rp_filter=0", name);
+ /* Disable IPv6 DAD because it sometimes takes too long and fails tests */
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.all.accept_dad=0", name);
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.default.accept_dad=0", name);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int set_top_addr(const char *ns1, const char *ns2, const char *ns3)
+{
+ SYS(fail, "ip -n %s a add %s/24 dev veth1", ns1, IP4_ADDR_1);
+ SYS(fail, "ip -n %s a add %s/24 dev veth2", ns2, IP4_ADDR_2);
+ SYS(fail, "ip -n %s a add %s/24 dev veth3", ns2, IP4_ADDR_3);
+ SYS(fail, "ip -n %s a add %s/24 dev veth4", ns3, IP4_ADDR_4);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth1", ns1, IP6_ADDR_1);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth2", ns2, IP6_ADDR_2);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth3", ns2, IP6_ADDR_3);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth4", ns3, IP6_ADDR_4);
+
+ SYS(fail, "ip -n %s link set dev veth1 up", ns1);
+ SYS(fail, "ip -n %s link set dev veth2 up", ns2);
+ SYS(fail, "ip -n %s link set dev veth3 up", ns2);
+ SYS(fail, "ip -n %s link set dev veth4 up", ns3);
+
+ return 0;
+fail:
+ return 1;
+}
+
+static int set_bottom_addr(const char *ns1, const char *ns2, const char *ns3)
+{
+ SYS(fail, "ip -n %s a add %s/24 dev veth5", ns1, IP4_ADDR_5);
+ SYS(fail, "ip -n %s a add %s/24 dev veth6", ns2, IP4_ADDR_6);
+ SYS(fail, "ip -n %s a add %s/24 dev veth7", ns2, IP4_ADDR_7);
+ SYS(fail, "ip -n %s a add %s/24 dev veth8", ns3, IP4_ADDR_8);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth5", ns1, IP6_ADDR_5);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth6", ns2, IP6_ADDR_6);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth7", ns2, IP6_ADDR_7);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth8", ns3, IP6_ADDR_8);
+
+ SYS(fail, "ip -n %s link set dev veth5 up", ns1);
+ SYS(fail, "ip -n %s link set dev veth6 up", ns2);
+ SYS(fail, "ip -n %s link set dev veth7 up", ns2);
+ SYS(fail, "ip -n %s link set dev veth8 up", ns3);
+
+ return 0;
+fail:
+ return 1;
+}
+
+static int configure_vrf(const char *ns1, const char *ns2)
+{
+ if (!ns1 || !ns2)
+ goto fail;
+
+ SYS(fail, "ip -n %s link add red type vrf table 1001", ns1);
+ SYS(fail, "ip -n %s link set red up", ns1);
+ SYS(fail, "ip -n %s route add table 1001 unreachable default metric 8192", ns1);
+ SYS(fail, "ip -n %s -6 route add table 1001 unreachable default metric 8192", ns1);
+ SYS(fail, "ip -n %s link set veth1 vrf red", ns1);
+ SYS(fail, "ip -n %s link set veth5 vrf red", ns1);
+
+ SYS(fail, "ip -n %s link add red type vrf table 1001", ns2);
+ SYS(fail, "ip -n %s link set red up", ns2);
+ SYS(fail, "ip -n %s route add table 1001 unreachable default metric 8192", ns2);
+ SYS(fail, "ip -n %s -6 route add table 1001 unreachable default metric 8192", ns2);
+ SYS(fail, "ip -n %s link set veth2 vrf red", ns2);
+ SYS(fail, "ip -n %s link set veth3 vrf red", ns2);
+ SYS(fail, "ip -n %s link set veth6 vrf red", ns2);
+ SYS(fail, "ip -n %s link set veth7 vrf red", ns2);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int configure_ns1(const char *ns1, const char *vrf)
+{
+ struct nstoken *nstoken = NULL;
+
+ if (!ns1 || !vrf)
+ goto fail;
+
+ nstoken = open_netns(ns1);
+ if (!ASSERT_OK_PTR(nstoken, "open ns1"))
+ goto fail;
+
+ /* Top route */
+ SYS(fail, "ip route add %s/32 dev veth1 %s", IP4_ADDR_2, vrf);
+ SYS(fail, "ip route add default dev veth1 via %s %s", IP4_ADDR_2, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth1 %s", IP6_ADDR_2, vrf);
+ SYS(fail, "ip -6 route add default dev veth1 via %s %s", IP6_ADDR_2, vrf);
+ /* Bottom route */
+ SYS(fail, "ip route add %s/32 dev veth5 %s", IP4_ADDR_6, vrf);
+ SYS(fail, "ip route add %s/32 dev veth5 via %s %s", IP4_ADDR_7, IP4_ADDR_6, vrf);
+ SYS(fail, "ip route add %s/32 dev veth5 via %s %s", IP4_ADDR_8, IP4_ADDR_6, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth5 %s", IP6_ADDR_6, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth5 via %s %s", IP6_ADDR_7, IP6_ADDR_6, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth5 via %s %s", IP6_ADDR_8, IP6_ADDR_6, vrf);
+
+ close_netns(nstoken);
+ return 0;
+fail:
+ close_netns(nstoken);
+ return -1;
+}
+
+static int configure_ns2(const char *ns2, const char *vrf)
+{
+ struct nstoken *nstoken = NULL;
+
+ if (!ns2 || !vrf)
+ goto fail;
+
+ nstoken = open_netns(ns2);
+ if (!ASSERT_OK_PTR(nstoken, "open ns2"))
+ goto fail;
+
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.ip_forward=1", ns2);
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.all.forwarding=1", ns2);
+
+ /* Top route */
+ SYS(fail, "ip route add %s/32 dev veth2 %s", IP4_ADDR_1, vrf);
+ SYS(fail, "ip route add %s/32 dev veth3 %s", IP4_ADDR_4, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth2 %s", IP6_ADDR_1, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth3 %s", IP6_ADDR_4, vrf);
+ /* Bottom route */
+ SYS(fail, "ip route add %s/32 dev veth6 %s", IP4_ADDR_5, vrf);
+ SYS(fail, "ip route add %s/32 dev veth7 %s", IP4_ADDR_8, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth6 %s", IP6_ADDR_5, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth7 %s", IP6_ADDR_8, vrf);
+
+ close_netns(nstoken);
+ return 0;
+fail:
+ close_netns(nstoken);
+ return -1;
+}
+
+static int configure_ns3(const char *ns3)
+{
+ struct nstoken *nstoken = NULL;
+
+ if (!ns3)
+ goto fail;
+
+ nstoken = open_netns(ns3);
+ if (!ASSERT_OK_PTR(nstoken, "open ns3"))
+ goto fail;
+
+ /* Top route */
+ SYS(fail, "ip route add %s/32 dev veth4", IP4_ADDR_3);
+ SYS(fail, "ip route add %s/32 dev veth4 via %s", IP4_ADDR_1, IP4_ADDR_3);
+ SYS(fail, "ip route add %s/32 dev veth4 via %s", IP4_ADDR_2, IP4_ADDR_3);
+ SYS(fail, "ip -6 route add %s/128 dev veth4", IP6_ADDR_3);
+ SYS(fail, "ip -6 route add %s/128 dev veth4 via %s", IP6_ADDR_1, IP6_ADDR_3);
+ SYS(fail, "ip -6 route add %s/128 dev veth4 via %s", IP6_ADDR_2, IP6_ADDR_3);
+ /* Bottom route */
+ SYS(fail, "ip route add %s/32 dev veth8", IP4_ADDR_7);
+ SYS(fail, "ip route add %s/32 dev veth8 via %s", IP4_ADDR_5, IP4_ADDR_7);
+ SYS(fail, "ip route add %s/32 dev veth8 via %s", IP4_ADDR_6, IP4_ADDR_7);
+ SYS(fail, "ip -6 route add %s/128 dev veth8", IP6_ADDR_7);
+ SYS(fail, "ip -6 route add %s/128 dev veth8 via %s", IP6_ADDR_5, IP6_ADDR_7);
+ SYS(fail, "ip -6 route add %s/128 dev veth8 via %s", IP6_ADDR_6, IP6_ADDR_7);
+
+ /* Configure IPv4 GRE device */
+ SYS(fail, "ip tunnel add gre_dev mode gre remote %s local %s ttl 255",
+ IP4_ADDR_1, IP4_ADDR_GRE);
+ SYS(fail, "ip link set gre_dev up");
+ SYS(fail, "ip a add %s dev gre_dev", IP4_ADDR_GRE);
+
+ /* Configure IPv6 GRE device */
+ SYS(fail, "ip tunnel add gre6_dev mode ip6gre remote %s local %s ttl 255",
+ IP6_ADDR_1, IP6_ADDR_GRE);
+ SYS(fail, "ip link set gre6_dev up");
+ SYS(fail, "ip a add %s dev gre6_dev", IP6_ADDR_GRE);
+
+ close_netns(nstoken);
+ return 0;
+fail:
+ close_netns(nstoken);
+ return -1;
+}
+
+static int setup_network(char *ns1, char *ns2, char *ns3, const char *vrf)
+{
+ if (!ns1 || !ns2 || !ns3 || !vrf)
+ goto fail;
+
+ SYS(fail, "ip -n %s link add veth1 type veth peer name veth2 netns %s", ns1, ns2);
+ SYS(fail, "ip -n %s link add veth3 type veth peer name veth4 netns %s", ns2, ns3);
+ SYS(fail, "ip -n %s link add veth5 type veth peer name veth6 netns %s", ns1, ns2);
+ SYS(fail, "ip -n %s link add veth7 type veth peer name veth8 netns %s", ns2, ns3);
+
+ if (vrf[0]) {
+ if (!ASSERT_OK(configure_vrf(ns1, ns2), "configure vrf"))
+ goto fail;
+ }
+ if (!ASSERT_OK(set_top_addr(ns1, ns2, ns3), "set top addresses"))
+ goto fail;
+
+ if (!ASSERT_OK(set_bottom_addr(ns1, ns2, ns3), "set bottom addresses"))
+ goto fail;
+
+ if (!ASSERT_OK(configure_ns1(ns1, vrf), "configure ns1 routes"))
+ goto fail;
+
+ if (!ASSERT_OK(configure_ns2(ns2, vrf), "configure ns2 routes"))
+ goto fail;
+
+ if (!ASSERT_OK(configure_ns3(ns3), "configure ns3 routes"))
+ goto fail;
+
+ /* Link bottom route to the GRE tunnels */
+ SYS(fail, "ip -n %s route add %s/32 dev veth5 via %s %s",
+ ns1, IP4_ADDR_GRE, IP4_ADDR_6, vrf);
+ SYS(fail, "ip -n %s route add %s/32 dev veth7 via %s %s",
+ ns2, IP4_ADDR_GRE, IP4_ADDR_8, vrf);
+ SYS(fail, "ip -n %s -6 route add %s/128 dev veth5 via %s %s",
+ ns1, IP6_ADDR_GRE, IP6_ADDR_6, vrf);
+ SYS(fail, "ip -n %s -6 route add %s/128 dev veth7 via %s %s",
+ ns2, IP6_ADDR_GRE, IP6_ADDR_8, vrf);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int remove_routes_to_gredev(const char *ns1, const char *ns2, const char *vrf)
+{
+ SYS(fail, "ip -n %s route del %s dev veth5 %s", ns1, IP4_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s route del %s dev veth7 %s", ns2, IP4_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s -6 route del %s/128 dev veth5 %s", ns1, IP6_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s -6 route del %s/128 dev veth7 %s", ns2, IP6_ADDR_GRE, vrf);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int add_unreachable_routes_to_gredev(const char *ns1, const char *ns2, const char *vrf)
+{
+ SYS(fail, "ip -n %s route add unreachable %s/32 %s", ns1, IP4_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s route add unreachable %s/32 %s", ns2, IP4_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s -6 route add unreachable %s/128 %s", ns1, IP6_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s -6 route add unreachable %s/128 %s", ns2, IP6_ADDR_GRE, vrf);
+
+ return 0;
+fail:
+ return -1;
+}
+
+#define GSO_SIZE 5000
+#define GSO_TCP_PORT 9000
+/* This tests the fix from commit ea0371f78799 ("net: fix GSO in bpf_lwt_push_ip_encap") */
+static int test_gso_fix(const char *ns1, const char *ns3, int family)
+{
+ const char *ip_addr = family == AF_INET ? IP4_ADDR_DST : IP6_ADDR_DST;
+ char gso_packet[GSO_SIZE] = {};
+ struct nstoken *nstoken = NULL;
+ int sfd, cfd, afd;
+ ssize_t bytes;
+ int ret = -1;
+
+ if (!ns1 || !ns3)
+ return ret;
+
+ nstoken = open_netns(ns3);
+ if (!ASSERT_OK_PTR(nstoken, "open ns3"))
+ return ret;
+
+ sfd = start_server_str(family, SOCK_STREAM, ip_addr, GSO_TCP_PORT, NULL);
+ if (!ASSERT_OK_FD(sfd, "start server"))
+ goto close_netns;
+
+ close_netns(nstoken);
+
+ nstoken = open_netns(ns1);
+ if (!ASSERT_OK_PTR(nstoken, "open ns1"))
+ goto close_server;
+
+ cfd = connect_to_addr_str(family, SOCK_STREAM, ip_addr, GSO_TCP_PORT, NULL);
+ if (!ASSERT_OK_FD(cfd, "connect to server"))
+ goto close_server;
+
+ close_netns(nstoken);
+ nstoken = NULL;
+
+ afd = accept(sfd, NULL, NULL);
+ if (!ASSERT_OK_FD(afd, "accept"))
+ goto close_client;
+
+ /* Send a packet larger than MTU */
+ bytes = send(cfd, gso_packet, GSO_SIZE, 0);
+ if (!ASSERT_EQ(bytes, GSO_SIZE, "send packet"))
+ goto close_accept;
+
+ /* Verify we received all expected bytes */
+ bytes = read(afd, gso_packet, GSO_SIZE);
+ if (!ASSERT_EQ(bytes, GSO_SIZE, "receive packet"))
+ goto close_accept;
+
+ ret = 0;
+
+close_accept:
+ close(afd);
+close_client:
+ close(cfd);
+close_server:
+ close(sfd);
+close_netns:
+ close_netns(nstoken);
+
+ return ret;
+}
+
+static int check_ping_ok(const char *ns1)
+{
+ SYS(fail, "ip netns exec %s ping -c 1 -W1 -I veth1 %s > /dev/null", ns1, IP4_ADDR_DST);
+ SYS(fail, "ip netns exec %s ping6 -c 1 -W1 -I veth1 %s > /dev/null", ns1, IP6_ADDR_DST);
+ return 0;
+fail:
+ return -1;
+}
+
+static int check_ping_fails(const char *ns1)
+{
+ int ret;
+
+ ret = SYS_NOFAIL("ip netns exec %s ping -c 1 -W1 -I veth1 %s", ns1, IP4_ADDR_DST);
+ if (!ret)
+ return -1;
+
+ ret = SYS_NOFAIL("ip netns exec %s ping6 -c 1 -W1 -I veth1 %s", ns1, IP6_ADDR_DST);
+ if (!ret)
+ return -1;
+
+ return 0;
+}
+
+#define EGRESS true
+#define INGRESS false
+#define IPV4_ENCAP true
+#define IPV6_ENCAP false
+static void lwt_ip_encap(bool ipv4_encap, bool egress, const char *vrf)
+{
+ char ns1[NETNS_NAME_SIZE] = NETNS_BASE "-1-";
+ char ns2[NETNS_NAME_SIZE] = NETNS_BASE "-2-";
+ char ns3[NETNS_NAME_SIZE] = NETNS_BASE "-3-";
+ char *sec = ipv4_encap ? "encap_gre" : "encap_gre6";
+
+ if (!vrf)
+ return;
+
+ if (!ASSERT_OK(create_ns(ns1, NETNS_NAME_SIZE), "create ns1"))
+ goto out;
+ if (!ASSERT_OK(create_ns(ns2, NETNS_NAME_SIZE), "create ns2"))
+ goto out;
+ if (!ASSERT_OK(create_ns(ns3, NETNS_NAME_SIZE), "create ns3"))
+ goto out;
+
+ if (!ASSERT_OK(setup_network(ns1, ns2, ns3, vrf), "setup network"))
+ goto out;
+
+ /* By default, pings work */
+ if (!ASSERT_OK(check_ping_ok(ns1), "ping OK"))
+ goto out;
+
+ /* Remove NS2->DST routes, ping fails */
+ SYS(out, "ip -n %s route del %s/32 dev veth3 %s", ns2, IP4_ADDR_DST, vrf);
+ SYS(out, "ip -n %s -6 route del %s/128 dev veth3 %s", ns2, IP6_ADDR_DST, vrf);
+ if (!ASSERT_OK(check_ping_fails(ns1), "ping expected fail"))
+ goto out;
+
+ /* Install replacement routes (LWT/eBPF), pings succeed */
+ if (egress) {
+ SYS(out, "ip -n %s route add %s encap bpf xmit obj %s sec %s dev veth1 %s",
+ ns1, IP4_ADDR_DST, BPF_FILE, sec, vrf);
+ SYS(out, "ip -n %s -6 route add %s encap bpf xmit obj %s sec %s dev veth1 %s",
+ ns1, IP6_ADDR_DST, BPF_FILE, sec, vrf);
+ } else {
+ SYS(out, "ip -n %s route add %s encap bpf in obj %s sec %s dev veth2 %s",
+ ns2, IP4_ADDR_DST, BPF_FILE, sec, vrf);
+ SYS(out, "ip -n %s -6 route add %s encap bpf in obj %s sec %s dev veth2 %s",
+ ns2, IP6_ADDR_DST, BPF_FILE, sec, vrf);
+ }
+
+ if (!ASSERT_OK(check_ping_ok(ns1), "ping OK"))
+ goto out;
+
+ /* Skip GSO tests with VRF: VRF routing needs properly assigned
+ * source IP/device, which is easy to do with ping but hard with TCP.
+ */
+ if (egress && !vrf[0]) {
+ if (!ASSERT_OK(test_gso_fix(ns1, ns3, AF_INET), "test GSO"))
+ goto out;
+ }
+
+ /* Negative test: remove routes to GRE devices: ping fails */
+ if (!ASSERT_OK(remove_routes_to_gredev(ns1, ns2, vrf), "remove routes to gredev"))
+ goto out;
+ if (!ASSERT_OK(check_ping_fails(ns1), "ping expected fail"))
+ goto out;
+
+ /* Another negative test */
+ if (!ASSERT_OK(add_unreachable_routes_to_gredev(ns1, ns2, vrf),
+ "add unreachable routes"))
+ goto out;
+ ASSERT_OK(check_ping_fails(ns1), "ping expected fail");
+
+out:
+ SYS_NOFAIL("ip netns del %s", ns1);
+ SYS_NOFAIL("ip netns del %s", ns2);
+ SYS_NOFAIL("ip netns del %s", ns3);
+}
+
+void test_lwt_ip_encap_vrf_ipv6(void)
+{
+ if (test__start_subtest("egress"))
+ lwt_ip_encap(IPV6_ENCAP, EGRESS, "vrf red");
+
+ if (test__start_subtest("ingress"))
+ lwt_ip_encap(IPV6_ENCAP, INGRESS, "vrf red");
+}
+
+void test_lwt_ip_encap_vrf_ipv4(void)
+{
+ if (test__start_subtest("egress"))
+ lwt_ip_encap(IPV4_ENCAP, EGRESS, "vrf red");
+
+ if (test__start_subtest("ingress"))
+ lwt_ip_encap(IPV4_ENCAP, INGRESS, "vrf red");
+}
+
+void test_lwt_ip_encap_ipv6(void)
+{
+ if (test__start_subtest("egress"))
+ lwt_ip_encap(IPV6_ENCAP, EGRESS, "");
+
+ if (test__start_subtest("ingress"))
+ lwt_ip_encap(IPV6_ENCAP, INGRESS, "");
+}
+
+void test_lwt_ip_encap_ipv4(void)
+{
+ if (test__start_subtest("egress"))
+ lwt_ip_encap(IPV4_ENCAP, EGRESS, "");
+
+ if (test__start_subtest("ingress"))
+ lwt_ip_encap(IPV4_ENCAP, INGRESS, "");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c
new file mode 100644
index 000000000000..3bc730b7c7fa
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Connects 6 network namespaces through veths.
+ * Each NS may have different IPv6 global scope addresses :
+ *
+ * NS1 NS2 NS3 NS4 NS5 NS6
+ * lo veth1 <-> veth2 veth3 <-> veth4 veth5 <-> veth6 lo veth7 <-> veth8 veth9 <-> veth10 lo
+ * fb00 ::1 ::12 ::21 ::34 ::43 ::56 ::65 ::78 ::87 ::910 ::109 ::6
+ * fd00 ::4
+ * fc42 ::1
+ *
+ * All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
+ * IPv6 header with a Segment Routing Header, with segments :
+ * fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
+ *
+ * 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
+ * - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
+ * - fd00::2 : remove the TLV, change the flags, add a tag
+ * - fd00::3 : apply an End.T action to fd00::4, through routing table 117
+ *
+ * fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
+ * Each End.BPF action will validate the operations applied on the SRH by the
+ * previous BPF program in the chain, otherwise the packet is dropped.
+ *
+ * An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
+ * datagram can be read on NS6 when binding to fb00::6.
+ */
+
+#include "network_helpers.h"
+#include "test_progs.h"
+
+#define NETNS_BASE "lwt-seg6local-"
+#define BPF_FILE "test_lwt_seg6local.bpf.o"
+
+static void cleanup(void)
+{
+ int ns;
+
+ for (ns = 1; ns < 7; ns++)
+ SYS_NOFAIL("ip netns del %s%d", NETNS_BASE, ns);
+}
+
+static int setup(void)
+{
+ int ns;
+
+ for (ns = 1; ns < 7; ns++)
+ SYS(fail, "ip netns add %s%d", NETNS_BASE, ns);
+
+ SYS(fail, "ip -n %s6 link set dev lo up", NETNS_BASE);
+
+ for (ns = 1; ns < 6; ns++) {
+ int local_id = ns * 2 - 1;
+ int peer_id = ns * 2;
+ int next_ns = ns + 1;
+
+ SYS(fail, "ip -n %s%d link add veth%d type veth peer name veth%d netns %s%d",
+ NETNS_BASE, ns, local_id, peer_id, NETNS_BASE, next_ns);
+
+ SYS(fail, "ip -n %s%d link set dev veth%d up", NETNS_BASE, ns, local_id);
+ SYS(fail, "ip -n %s%d link set dev veth%d up", NETNS_BASE, next_ns, peer_id);
+
+ /* All link scope addresses to veths */
+ SYS(fail, "ip -n %s%d -6 addr add fb00::%d%d/16 dev veth%d scope link",
+ NETNS_BASE, ns, local_id, peer_id, local_id);
+ SYS(fail, "ip -n %s%d -6 addr add fb00::%d%d/16 dev veth%d scope link",
+ NETNS_BASE, next_ns, peer_id, local_id, peer_id);
+ }
+
+
+ SYS(fail, "ip -n %s5 -6 route add fb00::109 table 117 dev veth9 scope link", NETNS_BASE);
+
+ SYS(fail, "ip -n %s1 -6 addr add fb00::1/16 dev lo", NETNS_BASE);
+ SYS(fail, "ip -n %s1 -6 route add fb00::6 dev veth1 via fb00::21", NETNS_BASE);
+
+ SYS(fail, "ip -n %s2 -6 route add fb00::6 encap bpf in obj %s sec encap_srh dev veth2",
+ NETNS_BASE, BPF_FILE);
+ SYS(fail, "ip -n %s2 -6 route add fd00::1 dev veth3 via fb00::43 scope link", NETNS_BASE);
+
+ SYS(fail, "ip -n %s3 -6 route add fc42::1 dev veth5 via fb00::65", NETNS_BASE);
+ SYS(fail,
+ "ip -n %s3 -6 route add fd00::1 encap seg6local action End.BPF endpoint obj %s sec add_egr_x dev veth4",
+ NETNS_BASE, BPF_FILE);
+
+ SYS(fail,
+ "ip -n %s4 -6 route add fd00::2 encap seg6local action End.BPF endpoint obj %s sec pop_egr dev veth6",
+ NETNS_BASE, BPF_FILE);
+ SYS(fail, "ip -n %s4 -6 addr add fc42::1 dev lo", NETNS_BASE);
+ SYS(fail, "ip -n %s4 -6 route add fd00::3 dev veth7 via fb00::87", NETNS_BASE);
+
+ SYS(fail, "ip -n %s5 -6 route add fd00::4 table 117 dev veth9 via fb00::109", NETNS_BASE);
+ SYS(fail,
+ "ip -n %s5 -6 route add fd00::3 encap seg6local action End.BPF endpoint obj %s sec inspect_t dev veth8",
+ NETNS_BASE, BPF_FILE);
+
+ SYS(fail, "ip -n %s6 -6 addr add fb00::6/16 dev lo", NETNS_BASE);
+ SYS(fail, "ip -n %s6 -6 addr add fd00::4/16 dev lo", NETNS_BASE);
+
+ for (ns = 1; ns < 6; ns++)
+ SYS(fail, "ip netns exec %s%d sysctl -wq net.ipv6.conf.all.forwarding=1",
+ NETNS_BASE, ns);
+
+ SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.all.seg6_enabled=1", NETNS_BASE);
+ SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.lo.seg6_enabled=1", NETNS_BASE);
+ SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.veth10.seg6_enabled=1", NETNS_BASE);
+
+ return 0;
+fail:
+ return -1;
+}
+
+#define SERVER_PORT 7330
+#define CLIENT_PORT 2121
+void test_lwt_seg6local(void)
+{
+ struct sockaddr_in6 server_addr = {};
+ const char *ns1 = NETNS_BASE "1";
+ const char *ns6 = NETNS_BASE "6";
+ struct nstoken *nstoken = NULL;
+ const char *foobar = "foobar";
+ ssize_t bytes;
+ int sfd, cfd;
+ char buf[7];
+
+ if (!ASSERT_OK(setup(), "setup"))
+ goto out;
+
+ nstoken = open_netns(ns6);
+ if (!ASSERT_OK_PTR(nstoken, "open ns6"))
+ goto out;
+
+ sfd = start_server_str(AF_INET6, SOCK_DGRAM, "fb00::6", SERVER_PORT, NULL);
+ if (!ASSERT_OK_FD(sfd, "start server"))
+ goto close_netns;
+
+ close_netns(nstoken);
+
+ nstoken = open_netns(ns1);
+ if (!ASSERT_OK_PTR(nstoken, "open ns1"))
+ goto close_server;
+
+ cfd = start_server_str(AF_INET6, SOCK_DGRAM, "fb00::1", CLIENT_PORT, NULL);
+ if (!ASSERT_OK_FD(cfd, "start client"))
+ goto close_server;
+
+ close_netns(nstoken);
+ nstoken = NULL;
+
+ /* Send a packet larger than MTU */
+ server_addr.sin6_family = AF_INET6;
+ server_addr.sin6_port = htons(SERVER_PORT);
+ if (!ASSERT_EQ(inet_pton(AF_INET6, "fb00::6", &server_addr.sin6_addr), 1,
+ "build target addr"))
+ goto close_client;
+
+ bytes = sendto(cfd, foobar, sizeof(foobar), 0,
+ (struct sockaddr *)&server_addr, sizeof(server_addr));
+ if (!ASSERT_EQ(bytes, sizeof(foobar), "send packet"))
+ goto close_client;
+
+ /* Verify we received all expected bytes */
+ bytes = read(sfd, buf, sizeof(buf));
+ if (!ASSERT_EQ(bytes, sizeof(buf), "receive packet"))
+ goto close_client;
+ ASSERT_STREQ(buf, foobar, "check udp packet");
+
+close_client:
+ close(cfd);
+close_server:
+ close(sfd);
+close_netns:
+ close_netns(nstoken);
+
+out:
+ cleanup();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/net_timestamping.c b/tools/testing/selftests/bpf/prog_tests/net_timestamping.c
new file mode 100644
index 000000000000..dbfd87499b6b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/net_timestamping.c
@@ -0,0 +1,239 @@
+#include <linux/net_tstamp.h>
+#include <sys/time.h>
+#include <linux/errqueue.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "net_timestamping.skel.h"
+
+#define CG_NAME "/net-timestamping-test"
+#define NSEC_PER_SEC 1000000000LL
+
+static const char addr4_str[] = "127.0.0.1";
+static const char addr6_str[] = "::1";
+static struct net_timestamping *skel;
+static const int cfg_payload_len = 30;
+static struct timespec usr_ts;
+static u64 delay_tolerance_nsec = 10000000000; /* 10 seconds */
+int SK_TS_SCHED;
+int SK_TS_TXSW;
+int SK_TS_ACK;
+
+static int64_t timespec_to_ns64(struct timespec *ts)
+{
+ return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec;
+}
+
+static void validate_key(int tskey, int tstype)
+{
+ static int expected_tskey = -1;
+
+ if (tstype == SCM_TSTAMP_SCHED)
+ expected_tskey = cfg_payload_len - 1;
+
+ ASSERT_EQ(expected_tskey, tskey, "tskey mismatch");
+
+ expected_tskey = tskey;
+}
+
+static void validate_timestamp(struct timespec *cur, struct timespec *prev)
+{
+ int64_t cur_ns, prev_ns;
+
+ cur_ns = timespec_to_ns64(cur);
+ prev_ns = timespec_to_ns64(prev);
+
+ ASSERT_LT(cur_ns - prev_ns, delay_tolerance_nsec, "latency");
+}
+
+static void test_socket_timestamp(struct scm_timestamping *tss, int tstype,
+ int tskey)
+{
+ static struct timespec prev_ts;
+
+ validate_key(tskey, tstype);
+
+ switch (tstype) {
+ case SCM_TSTAMP_SCHED:
+ validate_timestamp(&tss->ts[0], &usr_ts);
+ SK_TS_SCHED += 1;
+ break;
+ case SCM_TSTAMP_SND:
+ validate_timestamp(&tss->ts[0], &prev_ts);
+ SK_TS_TXSW += 1;
+ break;
+ case SCM_TSTAMP_ACK:
+ validate_timestamp(&tss->ts[0], &prev_ts);
+ SK_TS_ACK += 1;
+ break;
+ }
+
+ prev_ts = tss->ts[0];
+}
+
+static void test_recv_errmsg_cmsg(struct msghdr *msg)
+{
+ struct sock_extended_err *serr = NULL;
+ struct scm_timestamping *tss = NULL;
+ struct cmsghdr *cm;
+
+ for (cm = CMSG_FIRSTHDR(msg);
+ cm && cm->cmsg_len;
+ cm = CMSG_NXTHDR(msg, cm)) {
+ if (cm->cmsg_level == SOL_SOCKET &&
+ cm->cmsg_type == SCM_TIMESTAMPING) {
+ tss = (void *)CMSG_DATA(cm);
+ } else if ((cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type == IP_RECVERR) ||
+ (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type == IPV6_RECVERR) ||
+ (cm->cmsg_level == SOL_PACKET &&
+ cm->cmsg_type == PACKET_TX_TIMESTAMP)) {
+ serr = (void *)CMSG_DATA(cm);
+ ASSERT_EQ(serr->ee_origin, SO_EE_ORIGIN_TIMESTAMPING,
+ "cmsg type");
+ }
+
+ if (serr && tss)
+ test_socket_timestamp(tss, serr->ee_info,
+ serr->ee_data);
+ }
+}
+
+static bool socket_recv_errmsg(int fd)
+{
+ static char ctrl[1024 /* overprovision*/];
+ char data[cfg_payload_len];
+ static struct msghdr msg;
+ struct iovec entry;
+ int n = 0;
+
+ memset(&msg, 0, sizeof(msg));
+ memset(&entry, 0, sizeof(entry));
+ memset(ctrl, 0, sizeof(ctrl));
+
+ entry.iov_base = data;
+ entry.iov_len = cfg_payload_len;
+ msg.msg_iov = &entry;
+ msg.msg_iovlen = 1;
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_control = ctrl;
+ msg.msg_controllen = sizeof(ctrl);
+
+ n = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (n == -1)
+ ASSERT_EQ(errno, EAGAIN, "recvmsg MSG_ERRQUEUE");
+
+ if (n >= 0)
+ test_recv_errmsg_cmsg(&msg);
+
+ return n == -1;
+}
+
+static void test_socket_timestamping(int fd)
+{
+ while (!socket_recv_errmsg(fd));
+
+ ASSERT_EQ(SK_TS_SCHED, 1, "SCM_TSTAMP_SCHED");
+ ASSERT_EQ(SK_TS_TXSW, 1, "SCM_TSTAMP_SND");
+ ASSERT_EQ(SK_TS_ACK, 1, "SCM_TSTAMP_ACK");
+
+ SK_TS_SCHED = 0;
+ SK_TS_TXSW = 0;
+ SK_TS_ACK = 0;
+}
+
+static void test_tcp(int family, bool enable_socket_timestamping)
+{
+ struct net_timestamping__bss *bss;
+ char buf[cfg_payload_len];
+ int sfd = -1, cfd = -1;
+ unsigned int sock_opt;
+ struct netns_obj *ns;
+ int cg_fd;
+ int ret;
+
+ cg_fd = test__join_cgroup(CG_NAME);
+ if (!ASSERT_OK_FD(cg_fd, "join cgroup"))
+ return;
+
+ ns = netns_new("net_timestamping_ns", true);
+ if (!ASSERT_OK_PTR(ns, "create ns"))
+ goto out;
+
+ skel = net_timestamping__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open and load skel"))
+ goto out;
+
+ if (!ASSERT_OK(net_timestamping__attach(skel), "attach skel"))
+ goto out;
+
+ skel->links.skops_sockopt =
+ bpf_program__attach_cgroup(skel->progs.skops_sockopt, cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.skops_sockopt, "attach cgroup"))
+ goto out;
+
+ bss = skel->bss;
+ memset(bss, 0, sizeof(*bss));
+
+ skel->bss->monitored_pid = getpid();
+
+ sfd = start_server(family, SOCK_STREAM,
+ family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
+ if (!ASSERT_OK_FD(sfd, "start_server"))
+ goto out;
+
+ cfd = connect_to_fd(sfd, 0);
+ if (!ASSERT_OK_FD(cfd, "connect_to_fd_server"))
+ goto out;
+
+ if (enable_socket_timestamping) {
+ sock_opt = SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_ID |
+ SOF_TIMESTAMPING_TX_SCHED |
+ SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK;
+ ret = setsockopt(cfd, SOL_SOCKET, SO_TIMESTAMPING,
+ (char *) &sock_opt, sizeof(sock_opt));
+ if (!ASSERT_OK(ret, "setsockopt SO_TIMESTAMPING"))
+ goto out;
+
+ ret = clock_gettime(CLOCK_REALTIME, &usr_ts);
+ if (!ASSERT_OK(ret, "get user time"))
+ goto out;
+ }
+
+ ret = write(cfd, buf, sizeof(buf));
+ if (!ASSERT_EQ(ret, sizeof(buf), "send to server"))
+ goto out;
+
+ if (enable_socket_timestamping)
+ test_socket_timestamping(cfd);
+
+ ASSERT_EQ(bss->nr_active, 1, "nr_active");
+ ASSERT_EQ(bss->nr_snd, 2, "nr_snd");
+ ASSERT_EQ(bss->nr_sched, 1, "nr_sched");
+ ASSERT_EQ(bss->nr_txsw, 1, "nr_txsw");
+ ASSERT_EQ(bss->nr_ack, 1, "nr_ack");
+
+out:
+ if (sfd >= 0)
+ close(sfd);
+ if (cfd >= 0)
+ close(cfd);
+ net_timestamping__destroy(skel);
+ netns_free(ns);
+ close(cg_fd);
+}
+
+void test_net_timestamping(void)
+{
+ if (test__start_subtest("INET4: bpf timestamping"))
+ test_tcp(AF_INET, false);
+ if (test__start_subtest("INET4: bpf and socket timestamping"))
+ test_tcp(AF_INET, true);
+ if (test__start_subtest("INET6: bpf timestamping"))
+ test_tcp(AF_INET6, false);
+ if (test__start_subtest("INET6: bpf and socket timestamping"))
+ test_tcp(AF_INET6, true);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
index ac3c3c097c0e..e00cd34586dd 100644
--- a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
@@ -33,20 +33,25 @@ void test_netns_cookie(void)
skel->links.get_netns_cookie_sockops = bpf_program__attach_cgroup(
skel->progs.get_netns_cookie_sockops, cgroup_fd);
- if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach"))
+ if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach_sockops"))
goto done;
verdict = bpf_program__fd(skel->progs.get_netns_cookie_sk_msg);
map = bpf_map__fd(skel->maps.sock_map);
err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
- if (!ASSERT_OK(err, "prog_attach"))
+ if (!ASSERT_OK(err, "prog_attach_sk_msg"))
goto done;
tc_fd = bpf_program__fd(skel->progs.get_netns_cookie_tcx);
err = bpf_prog_attach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &opta);
- if (!ASSERT_OK(err, "prog_attach"))
+ if (!ASSERT_OK(err, "prog_attach_tcx"))
goto done;
+ skel->links.get_netns_cookie_cgroup_skb = bpf_program__attach_cgroup(
+ skel->progs.get_netns_cookie_cgroup_skb, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_cgroup_skb, "prog_attach_cgroup_skb"))
+ goto cleanup_tc;
+
server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno))
goto cleanup_tc;
@@ -69,16 +74,18 @@ void test_netns_cookie(void)
if (!ASSERT_OK(err, "getsockopt"))
goto cleanup_tc;
- ASSERT_EQ(val, cookie_expected_value, "cookie_value");
+ ASSERT_EQ(val, cookie_expected_value, "cookie_value_sockops");
err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_msg_netns_cookies),
&client_fd, &val);
if (!ASSERT_OK(err, "map_lookup(sk_msg_netns_cookies)"))
goto cleanup_tc;
- ASSERT_EQ(val, cookie_expected_value, "cookie_value");
- ASSERT_EQ(skel->bss->tcx_init_netns_cookie, cookie_expected_value, "cookie_value");
- ASSERT_EQ(skel->bss->tcx_netns_cookie, cookie_expected_value, "cookie_value");
+ ASSERT_EQ(val, cookie_expected_value, "cookie_value_sk_msg");
+ ASSERT_EQ(skel->bss->tcx_init_netns_cookie, cookie_expected_value, "cookie_value_init_tcx");
+ ASSERT_EQ(skel->bss->tcx_netns_cookie, cookie_expected_value, "cookie_value_tcx");
+ ASSERT_EQ(skel->bss->cgroup_skb_init_netns_cookie, cookie_expected_value, "cookie_value_init_cgroup_skb");
+ ASSERT_EQ(skel->bss->cgroup_skb_netns_cookie, cookie_expected_value, "cookie_value_cgroup_skb");
cleanup_tc:
err = bpf_prog_detach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &optd);
diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
index 761ce24bce38..99c953f2be21 100644
--- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -200,41 +200,28 @@ static void test_ns_current_pid_tgid_new_ns(int (*fn)(void *), void *arg)
return;
}
-static void test_in_netns(int (*fn)(void *), void *arg)
-{
- struct nstoken *nstoken = NULL;
-
- SYS(cleanup, "ip netns add ns_current_pid_tgid");
- SYS(cleanup, "ip -net ns_current_pid_tgid link set dev lo up");
-
- nstoken = open_netns("ns_current_pid_tgid");
- if (!ASSERT_OK_PTR(nstoken, "open_netns"))
- goto cleanup;
-
- test_ns_current_pid_tgid_new_ns(fn, arg);
-
-cleanup:
- if (nstoken)
- close_netns(nstoken);
- SYS_NOFAIL("ip netns del ns_current_pid_tgid");
-}
-
/* TODO: use a different tracepoint */
-void serial_test_ns_current_pid_tgid(void)
+void serial_test_current_pid_tgid(void)
{
if (test__start_subtest("root_ns_tp"))
test_current_pid_tgid_tp(NULL);
if (test__start_subtest("new_ns_tp"))
test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_tp, NULL);
- if (test__start_subtest("new_ns_cgrp")) {
- int cgroup_fd = -1;
-
- cgroup_fd = test__join_cgroup("/sock_addr");
- if (ASSERT_GE(cgroup_fd, 0, "join_cgroup")) {
- test_in_netns(test_current_pid_tgid_cgrp, &cgroup_fd);
- close(cgroup_fd);
- }
+}
+
+void test_ns_current_pid_tgid_cgrp(void)
+{
+ int cgroup_fd = test__join_cgroup("/sock_addr");
+
+ if (ASSERT_OK_FD(cgroup_fd, "join_cgroup")) {
+ test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_cgrp, &cgroup_fd);
+ close(cgroup_fd);
}
- if (test__start_subtest("new_ns_sk_msg"))
- test_in_netns(test_current_pid_tgid_sk_msg, NULL);
}
+
+void test_ns_current_pid_tgid_sk_msg(void)
+{
+ test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_sk_msg, NULL);
+}
+
+
diff --git a/tools/testing/selftests/bpf/prog_tests/prepare.c b/tools/testing/selftests/bpf/prog_tests/prepare.c
new file mode 100644
index 000000000000..fb5cdad97116
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/prepare.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "prepare.skel.h"
+
+static bool check_prepared(struct bpf_object *obj)
+{
+ bool is_prepared = true;
+ const struct bpf_map *map;
+
+ bpf_object__for_each_map(map, obj) {
+ if (bpf_map__fd(map) < 0)
+ is_prepared = false;
+ }
+
+ return is_prepared;
+}
+
+static void test_prepare_no_load(void)
+{
+ struct prepare *skel;
+ int err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ );
+
+ skel = prepare__open();
+ if (!ASSERT_OK_PTR(skel, "prepare__open"))
+ return;
+
+ if (!ASSERT_FALSE(check_prepared(skel->obj), "not check_prepared"))
+ goto cleanup;
+
+ err = bpf_object__prepare(skel->obj);
+
+ if (!ASSERT_TRUE(check_prepared(skel->obj), "check_prepared"))
+ goto cleanup;
+
+ if (!ASSERT_OK(err, "bpf_object__prepare"))
+ goto cleanup;
+
+cleanup:
+ prepare__destroy(skel);
+}
+
+static void test_prepare_load(void)
+{
+ struct prepare *skel;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ );
+
+ skel = prepare__open();
+ if (!ASSERT_OK_PTR(skel, "prepare__open"))
+ return;
+
+ if (!ASSERT_FALSE(check_prepared(skel->obj), "not check_prepared"))
+ goto cleanup;
+
+ err = bpf_object__prepare(skel->obj);
+ if (!ASSERT_OK(err, "bpf_object__prepare"))
+ goto cleanup;
+
+ err = prepare__load(skel);
+ if (!ASSERT_OK(err, "prepare__load"))
+ goto cleanup;
+
+ if (!ASSERT_TRUE(check_prepared(skel->obj), "check_prepared"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.program);
+ if (!ASSERT_GE(prog_fd, 0, "prog_fd"))
+ goto cleanup;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ goto cleanup;
+
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->err, 0, "err");
+
+cleanup:
+ prepare__destroy(skel);
+}
+
+void test_prepare(void)
+{
+ if (test__start_subtest("prepare_load"))
+ test_prepare_load();
+ if (test__start_subtest("prepare_no_load"))
+ test_prepare_no_load();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c b/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c
index 509883e6823a..5d3c00a08a88 100644
--- a/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c
+++ b/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c
@@ -6,6 +6,7 @@
#include "epilogue_tailcall.skel.h"
#include "pro_epilogue_goto_start.skel.h"
#include "epilogue_exit.skel.h"
+#include "pro_epilogue_with_kfunc.skel.h"
struct st_ops_args {
__u64 a;
@@ -55,6 +56,7 @@ void test_pro_epilogue(void)
RUN_TESTS(pro_epilogue);
RUN_TESTS(pro_epilogue_goto_start);
RUN_TESTS(epilogue_exit);
+ RUN_TESTS(pro_epilogue_with_kfunc);
if (test__start_subtest("tailcall"))
test_tailcall();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
index ebe0c12b5536..c9f855e5da24 100644
--- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
@@ -81,6 +81,9 @@ static const char * const inproper_region_tests[] = {
"nested_rcu_region",
"rcu_read_lock_global_subprog_lock",
"rcu_read_lock_global_subprog_unlock",
+ "rcu_read_lock_sleepable_helper_global_subprog",
+ "rcu_read_lock_sleepable_kfunc_global_subprog",
+ "rcu_read_lock_sleepable_global_subprog_indirect",
};
static void test_inproper_region(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
index c7b9ba8b1d06..a8d1eaa67020 100644
--- a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
@@ -24,6 +24,7 @@ struct read_ret_desc {
{ .name = "copy_from_user", .ret = -EFAULT },
{ .name = "copy_from_user_task", .ret = -EFAULT },
{ .name = "copy_from_user_str", .ret = -EFAULT },
+ { .name = "copy_from_user_task_str", .ret = -EFAULT },
};
void test_read_vsyscall(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
new file mode 100644
index 000000000000..115287ba441b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024-2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <sys/sysinfo.h>
+
+#include "res_spin_lock.skel.h"
+#include "res_spin_lock_fail.skel.h"
+
+void test_res_spin_lock_failure(void)
+{
+ RUN_TESTS(res_spin_lock_fail);
+}
+
+static volatile int skip;
+
+static void *spin_lock_thread(void *arg)
+{
+ int err, prog_fd = *(u32 *) arg;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 10000,
+ );
+
+ while (!READ_ONCE(skip)) {
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_OK(topts.retval, "test_run retval");
+ }
+ pthread_exit(arg);
+}
+
+void test_res_spin_lock_success(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct res_spin_lock *skel;
+ pthread_t thread_id[16];
+ int prog_fd, i, err;
+ void *ret;
+
+ if (get_nprocs() < 2) {
+ test__skip();
+ return;
+ }
+
+ skel = res_spin_lock__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "res_spin_lock__open_and_load"))
+ return;
+ /* AA deadlock */
+ prog_fd = bpf_program__fd(skel->progs.res_spin_lock_test);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "error");
+ ASSERT_OK(topts.retval, "retval");
+
+ prog_fd = bpf_program__fd(skel->progs.res_spin_lock_test_held_lock_max);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "error");
+ ASSERT_OK(topts.retval, "retval");
+
+ /* Multi-threaded ABBA deadlock. */
+
+ prog_fd = bpf_program__fd(skel->progs.res_spin_lock_test_AB);
+ for (i = 0; i < 16; i++) {
+ int err;
+
+ err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto end;
+ }
+
+ topts.retval = 0;
+ topts.repeat = 1000;
+ int fd = bpf_program__fd(skel->progs.res_spin_lock_test_BA);
+ while (!topts.retval && !err && !READ_ONCE(skel->bss->err)) {
+ err = bpf_prog_test_run_opts(fd, &topts);
+ }
+
+ WRITE_ONCE(skip, true);
+
+ for (i = 0; i < 16; i++) {
+ if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join"))
+ goto end;
+ if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd"))
+ goto end;
+ }
+
+ ASSERT_EQ(READ_ONCE(skel->bss->err), -EDEADLK, "timeout err");
+ ASSERT_OK(err, "err");
+ ASSERT_EQ(topts.retval, -EDEADLK, "timeout");
+end:
+ res_spin_lock__destroy(skel);
+ return;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
index e12255121c15..e4dac529d424 100644
--- a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
@@ -202,7 +202,7 @@ err_out:
void test_setget_sockopt(void)
{
cg_fd = test__join_cgroup(CG_NAME);
- if (cg_fd < 0)
+ if (!ASSERT_OK_FD(cg_fd, "join cgroup"))
return;
if (create_netns())
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 884ad87783d5..1e3e4392dcca 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -111,31 +111,35 @@ out:
static void test_sockmap_vsock_delete_on_close(void)
{
- int err, c, p, map;
- const int zero = 0;
-
- err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
- if (!ASSERT_OK(err, "create_pair(AF_VSOCK)"))
- return;
+ int map, c, p, err, zero = 0;
map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int),
sizeof(int), 1, NULL);
- if (!ASSERT_GE(map, 0, "bpf_map_create")) {
- close(c);
- goto out;
- }
+ if (!ASSERT_OK_FD(map, "bpf_map_create"))
+ return;
- err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST);
- close(c);
- if (!ASSERT_OK(err, "bpf_map_update"))
- goto out;
+ err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
+ if (!ASSERT_OK(err, "create_pair"))
+ goto close_map;
+
+ if (xbpf_map_update_elem(map, &zero, &c, BPF_NOEXIST))
+ goto close_socks;
+
+ xclose(c);
+ xclose(p);
+
+ err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
+ if (!ASSERT_OK(err, "create_pair"))
+ goto close_map;
- err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+ err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST);
ASSERT_OK(err, "after close(), bpf_map_update");
-out:
- close(p);
- close(map);
+close_socks:
+ xclose(c);
+ xclose(p);
+close_map:
+ xclose(map);
}
static void test_skmsg_helpers(enum bpf_map_type map_type)
@@ -522,8 +526,8 @@ static void test_sockmap_skb_verdict_shutdown(void)
if (!ASSERT_EQ(err, 1, "epoll_wait(fd)"))
goto out_close;
- n = recv(c1, &b, 1, SOCK_NONBLOCK);
- ASSERT_EQ(n, 0, "recv_timeout(fin)");
+ n = recv(c1, &b, 1, MSG_DONTWAIT);
+ ASSERT_EQ(n, 0, "recv(fin)");
out_close:
close(c1);
close(p1);
@@ -531,57 +535,6 @@ out:
test_sockmap_pass_prog__destroy(skel);
}
-static void test_sockmap_stream_pass(void)
-{
- int zero = 0, sent, recvd;
- int verdict, parser;
- int err, map;
- int c = -1, p = -1;
- struct test_sockmap_pass_prog *pass = NULL;
- char snd[256] = "0123456789";
- char rcv[256] = "0";
-
- pass = test_sockmap_pass_prog__open_and_load();
- verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
- parser = bpf_program__fd(pass->progs.prog_skb_parser);
- map = bpf_map__fd(pass->maps.sock_map_rx);
-
- err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0);
- if (!ASSERT_OK(err, "bpf_prog_attach stream parser"))
- goto out;
-
- err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
- if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
- goto out;
-
- err = create_pair(AF_INET, SOCK_STREAM, &c, &p);
- if (err)
- goto out;
-
- /* sk_data_ready of 'p' will be replaced by strparser handler */
- err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
- if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
- goto out_close;
-
- /*
- * as 'prog_skb_parser' return the original skb len and
- * 'prog_skb_verdict' return SK_PASS, the kernel will just
- * pass it through to original socket 'p'
- */
- sent = xsend(c, snd, sizeof(snd), 0);
- ASSERT_EQ(sent, sizeof(snd), "xsend(c)");
-
- recvd = recv_timeout(p, rcv, sizeof(rcv), SOCK_NONBLOCK,
- IO_TIMEOUT_SEC);
- ASSERT_EQ(recvd, sizeof(rcv), "recv_timeout(p)");
-
-out_close:
- close(c);
- close(p);
-
-out:
- test_sockmap_pass_prog__destroy(pass);
-}
static void test_sockmap_skb_verdict_fionread(bool pass_prog)
{
@@ -628,7 +581,7 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
ASSERT_EQ(avail, expected, "ioctl(FIONREAD)");
/* On DROP test there will be no data to read */
if (pass_prog) {
- recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC);
+ recvd = recv_timeout(c1, &buf, sizeof(buf), MSG_DONTWAIT, IO_TIMEOUT_SEC);
ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)");
}
@@ -1061,6 +1014,34 @@ destroy:
test_sockmap_pass_prog__destroy(skel);
}
+static void test_sockmap_vsock_unconnected(void)
+{
+ struct sockaddr_storage addr;
+ int map, s, zero = 0;
+ socklen_t alen;
+
+ map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int),
+ sizeof(int), 1, NULL);
+ if (!ASSERT_OK_FD(map, "bpf_map_create"))
+ return;
+
+ s = xsocket(AF_VSOCK, SOCK_STREAM, 0);
+ if (s < 0)
+ goto close_map;
+
+ /* Fail connect(), but trigger transport assignment. */
+ init_addr_loopback(AF_VSOCK, &addr, &alen);
+ if (!ASSERT_ERR(connect(s, sockaddr(&addr), alen), "connect"))
+ goto close_sock;
+
+ ASSERT_ERR(bpf_map_update_elem(map, &zero, &s, BPF_ANY), "map_update");
+
+close_sock:
+ xclose(s);
+close_map:
+ xclose(map);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@@ -1101,8 +1082,6 @@ void test_sockmap_basic(void)
test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
if (test__start_subtest("sockmap skb_verdict shutdown"))
test_sockmap_skb_verdict_shutdown();
- if (test__start_subtest("sockmap stream parser and verdict pass"))
- test_sockmap_stream_pass();
if (test__start_subtest("sockmap skb_verdict fionread"))
test_sockmap_skb_verdict_fionread(true);
if (test__start_subtest("sockmap skb_verdict fionread on drop"))
@@ -1127,4 +1106,6 @@ void test_sockmap_basic(void)
test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH);
if (test__start_subtest("sockmap skb_verdict vsock poll"))
test_sockmap_skb_verdict_vsock_poll();
+ if (test__start_subtest("sockmap vsock unconnected"))
+ test_sockmap_vsock_unconnected();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c
new file mode 100644
index 000000000000..621b3b71888e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c
@@ -0,0 +1,454 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <netinet/tcp.h>
+#include <test_progs.h>
+#include "sockmap_helpers.h"
+#include "test_skmsg_load_helpers.skel.h"
+#include "test_sockmap_strp.skel.h"
+
+#define STRP_PKT_HEAD_LEN 4
+#define STRP_PKT_BODY_LEN 6
+#define STRP_PKT_FULL_LEN (STRP_PKT_HEAD_LEN + STRP_PKT_BODY_LEN)
+
+static const char packet[STRP_PKT_FULL_LEN] = "head+body\0";
+static const int test_packet_num = 100;
+
+/* Current implementation of tcp_bpf_recvmsg_parser() invokes data_ready
+ * with sk held if an skb exists in sk_receive_queue. Then for the
+ * data_ready implementation of strparser, it will delay the read
+ * operation if sk is held and EAGAIN is returned.
+ */
+static int sockmap_strp_consume_pre_data(int p)
+{
+ int recvd;
+ bool retried = false;
+ char rcv[10];
+
+retry:
+ errno = 0;
+ recvd = recv_timeout(p, rcv, sizeof(rcv), 0, 1);
+ if (recvd < 0 && errno == EAGAIN && retried == false) {
+ /* On the first call, EAGAIN will certainly be returned.
+ * A 1-second wait is enough for the workqueue to finish.
+ */
+ sleep(1);
+ retried = true;
+ goto retry;
+ }
+
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv error or truncated data") ||
+ !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
+ "data mismatch"))
+ return -1;
+ return 0;
+}
+
+static struct test_sockmap_strp *sockmap_strp_init(int *out_map, bool pass,
+ bool need_parser)
+{
+ struct test_sockmap_strp *strp = NULL;
+ int verdict, parser;
+ int err;
+
+ strp = test_sockmap_strp__open_and_load();
+ *out_map = bpf_map__fd(strp->maps.sock_map);
+
+ if (need_parser)
+ parser = bpf_program__fd(strp->progs.prog_skb_parser_partial);
+ else
+ parser = bpf_program__fd(strp->progs.prog_skb_parser);
+
+ if (pass)
+ verdict = bpf_program__fd(strp->progs.prog_skb_verdict_pass);
+ else
+ verdict = bpf_program__fd(strp->progs.prog_skb_verdict);
+
+ err = bpf_prog_attach(parser, *out_map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach stream parser"))
+ goto err;
+
+ err = bpf_prog_attach(verdict, *out_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
+ goto err;
+
+ return strp;
+err:
+ test_sockmap_strp__destroy(strp);
+ return NULL;
+}
+
+/* Dispatch packets to different socket by packet size:
+ *
+ * ------ ------
+ * | pkt4 || pkt1 |... > remote socket
+ * ------ ------ / ------ ------
+ * | pkt8 | pkt7 |...
+ * ------ ------ \ ------ ------
+ * | pkt3 || pkt2 |... > local socket
+ * ------ ------
+ */
+static void test_sockmap_strp_dispatch_pkt(int family, int sotype)
+{
+ int i, j, zero = 0, one = 1, recvd;
+ int err, map;
+ int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
+ struct test_sockmap_strp *strp = NULL;
+ int test_cnt = 6;
+ char rcv[10];
+ struct {
+ char data[7];
+ int data_len;
+ int send_cnt;
+ int *receiver;
+ } send_dir[2] = {
+ /* data expected to deliver to local */
+ {"llllll", 6, 0, &p0},
+ /* data expected to deliver to remote */
+ {"rrrrr", 5, 0, &c1}
+ };
+
+ strp = sockmap_strp_init(&map, false, false);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1);
+ if (!ASSERT_OK(err, "create_socket_pairs()"))
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
+ goto out_close;
+
+ err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
+ goto out_close;
+
+ err = setsockopt(c1, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero));
+ if (!ASSERT_OK(err, "setsockopt(TCP_NODELAY)"))
+ goto out_close;
+
+ /* deliver data with data size greater than 5 to local */
+ strp->data->verdict_max_size = 5;
+
+ for (i = 0; i < test_cnt; i++) {
+ int d = i % 2;
+
+ xsend(c0, send_dir[d].data, send_dir[d].data_len, 0);
+ send_dir[d].send_cnt++;
+ }
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < send_dir[i].send_cnt; j++) {
+ int expected = send_dir[i].data_len;
+
+ recvd = recv_timeout(*send_dir[i].receiver, rcv,
+ expected, MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, expected, "recv_timeout()"))
+ goto out_close;
+ if (!ASSERT_OK(memcmp(send_dir[i].data, rcv, recvd),
+ "data mismatch"))
+ goto out_close;
+ }
+ }
+out_close:
+ close(c0);
+ close(c1);
+ close(p0);
+ close(p1);
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+/* We have multiple packets in one skb
+ * ------------ ------------ ------------
+ * | packet1 | packet2 | ...
+ * ------------ ------------ ------------
+ */
+static void test_sockmap_strp_multiple_pkt(int family, int sotype)
+{
+ int i, zero = 0;
+ int sent, recvd, total;
+ int err, map;
+ int c = -1, p = -1;
+ struct test_sockmap_strp *strp = NULL;
+ char *snd = NULL, *rcv = NULL;
+
+ strp = sockmap_strp_init(&map, true, true);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (err)
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)"))
+ goto out_close;
+
+ /* construct multiple packets in one buffer */
+ total = test_packet_num * STRP_PKT_FULL_LEN;
+ snd = malloc(total);
+ rcv = malloc(total + 1);
+ if (!ASSERT_TRUE(snd, "malloc(snd)") ||
+ !ASSERT_TRUE(rcv, "malloc(rcv)"))
+ goto out_close;
+
+ for (i = 0; i < test_packet_num; i++) {
+ memcpy(snd + i * STRP_PKT_FULL_LEN,
+ packet, STRP_PKT_FULL_LEN);
+ }
+
+ sent = xsend(c, snd, total, 0);
+ if (!ASSERT_EQ(sent, total, "xsend(c)"))
+ goto out_close;
+
+ /* try to recv one more byte to avoid truncation check */
+ recvd = recv_timeout(p, rcv, total + 1, MSG_DONTWAIT, IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, total, "recv(rcv)"))
+ goto out_close;
+
+ /* we sent TCP segment with multiple encapsulation
+ * then check whether packets are handled correctly
+ */
+ if (!ASSERT_OK(memcmp(snd, rcv, total), "data mismatch"))
+ goto out_close;
+
+out_close:
+ close(c);
+ close(p);
+ if (snd)
+ free(snd);
+ if (rcv)
+ free(rcv);
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+/* Test strparser with partial read */
+static void test_sockmap_strp_partial_read(int family, int sotype)
+{
+ int zero = 0, recvd, off;
+ int err, map;
+ int c = -1, p = -1;
+ struct test_sockmap_strp *strp = NULL;
+ char rcv[STRP_PKT_FULL_LEN + 1] = "0";
+
+ strp = sockmap_strp_init(&map, true, true);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (err)
+ goto out;
+
+ /* sk_data_ready of 'p' will be replaced by strparser handler */
+ err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)"))
+ goto out_close;
+
+ /* 1.1 send partial head, 1 byte header left */
+ off = STRP_PKT_HEAD_LEN - 1;
+ xsend(c, packet, off, 0);
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(-1, recvd, "partial head sent, expected no data"))
+ goto out_close;
+
+ /* 1.2 send remaining head and body */
+ xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0);
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data"))
+ goto out_close;
+
+ /* 2.1 send partial head, 1 byte header left */
+ off = STRP_PKT_HEAD_LEN - 1;
+ xsend(c, packet, off, 0);
+
+ /* 2.2 send remaining head and partial body, 1 byte body left */
+ xsend(c, packet + off, STRP_PKT_FULL_LEN - off - 1, 0);
+ off = STRP_PKT_FULL_LEN - 1;
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(-1, recvd, "partial body sent, expected no data"))
+ goto out_close;
+
+ /* 2.3 send remaining body */
+ xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0);
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data"))
+ goto out_close;
+
+out_close:
+ close(c);
+ close(p);
+
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+/* Test simple socket read/write with strparser + FIONREAD */
+static void test_sockmap_strp_pass(int family, int sotype, bool fionread)
+{
+ int zero = 0, pkt_size = STRP_PKT_FULL_LEN, sent, recvd, avail;
+ int err, map;
+ int c = -1, p = -1;
+ int test_cnt = 10, i;
+ struct test_sockmap_strp *strp = NULL;
+ char rcv[STRP_PKT_FULL_LEN + 1] = "0";
+
+ strp = sockmap_strp_init(&map, true, true);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (err)
+ goto out;
+
+ /* inject some data before bpf process, it should be read
+ * correctly because we check sk_receive_queue in
+ * tcp_bpf_recvmsg_parser().
+ */
+ sent = xsend(c, packet, pkt_size, 0);
+ if (!ASSERT_EQ(sent, pkt_size, "xsend(pre-data)"))
+ goto out_close;
+
+ /* sk_data_ready of 'p' will be replaced by strparser handler */
+ err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
+ goto out_close;
+
+ /* consume previous data we injected */
+ if (sockmap_strp_consume_pre_data(p))
+ goto out_close;
+
+ /* Previously, we encountered issues such as deadlocks and
+ * sequence errors that resulted in the inability to read
+ * continuously. Therefore, we perform multiple iterations
+ * of testing here.
+ */
+ for (i = 0; i < test_cnt; i++) {
+ sent = xsend(c, packet, pkt_size, 0);
+ if (!ASSERT_EQ(sent, pkt_size, "xsend(c)"))
+ goto out_close;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, pkt_size, "recv_timeout(p)") ||
+ !ASSERT_OK(memcmp(packet, rcv, pkt_size),
+ "memcmp, data mismatch"))
+ goto out_close;
+ }
+
+ if (fionread) {
+ sent = xsend(c, packet, pkt_size, 0);
+ if (!ASSERT_EQ(sent, pkt_size, "second xsend(c)"))
+ goto out_close;
+
+ err = ioctl(p, FIONREAD, &avail);
+ if (!ASSERT_OK(err, "ioctl(FIONREAD) error") ||
+ !ASSERT_EQ(avail, pkt_size, "ioctl(FIONREAD)"))
+ goto out_close;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, pkt_size, "second recv_timeout(p)") ||
+ !ASSERT_OK(memcmp(packet, rcv, pkt_size),
+ "second memcmp, data mismatch"))
+ goto out_close;
+ }
+
+out_close:
+ close(c);
+ close(p);
+
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+/* Test strparser with verdict mode */
+static void test_sockmap_strp_verdict(int family, int sotype)
+{
+ int zero = 0, one = 1, sent, recvd, off;
+ int err, map;
+ int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
+ struct test_sockmap_strp *strp = NULL;
+ char rcv[STRP_PKT_FULL_LEN + 1] = "0";
+
+ strp = sockmap_strp_init(&map, false, true);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ /* We simulate a reverse proxy server.
+ * When p0 receives data from c0, we forward it to c1.
+ * From c1's perspective, it will consider this data
+ * as being sent by p1.
+ */
+ err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1);
+ if (!ASSERT_OK(err, "create_socket_pairs()"))
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
+ goto out_close;
+
+ err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
+ goto out_close;
+
+ sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0);
+ if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "xsend(c0)"))
+ goto out_close;
+
+ recvd = recv_timeout(c1, rcv, sizeof(rcv), MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv_timeout(c1)") ||
+ !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
+ "received data does not match the sent data"))
+ goto out_close;
+
+ /* send again to ensure the stream is functioning correctly. */
+ sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0);
+ if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "second xsend(c0)"))
+ goto out_close;
+
+ /* partial read */
+ off = STRP_PKT_FULL_LEN / 2;
+ recvd = recv_timeout(c1, rcv, off, MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ recvd += recv_timeout(c1, rcv + off, sizeof(rcv) - off, MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "partial recv_timeout(c1)") ||
+ !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
+ "partial received data does not match the sent data"))
+ goto out_close;
+
+out_close:
+ close(c0);
+ close(c1);
+ close(p0);
+ close(p1);
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+void test_sockmap_strp(void)
+{
+ if (test__start_subtest("sockmap strp tcp pass"))
+ test_sockmap_strp_pass(AF_INET, SOCK_STREAM, false);
+ if (test__start_subtest("sockmap strp tcp v6 pass"))
+ test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, false);
+ if (test__start_subtest("sockmap strp tcp pass fionread"))
+ test_sockmap_strp_pass(AF_INET, SOCK_STREAM, true);
+ if (test__start_subtest("sockmap strp tcp v6 pass fionread"))
+ test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, true);
+ if (test__start_subtest("sockmap strp tcp verdict"))
+ test_sockmap_strp_verdict(AF_INET, SOCK_STREAM);
+ if (test__start_subtest("sockmap strp tcp v6 verdict"))
+ test_sockmap_strp_verdict(AF_INET6, SOCK_STREAM);
+ if (test__start_subtest("sockmap strp tcp partial read"))
+ test_sockmap_strp_partial_read(AF_INET, SOCK_STREAM);
+ if (test__start_subtest("sockmap strp tcp multiple packets"))
+ test_sockmap_strp_multiple_pkt(AF_INET, SOCK_STREAM);
+ if (test__start_subtest("sockmap strp tcp dispatch"))
+ test_sockmap_strp_dispatch_pkt(AF_INET, SOCK_STREAM);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
index 2b0068742ef9..e3ea5dc2f697 100644
--- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -50,6 +50,9 @@ static struct {
{ "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" },
{ "lock_global_subprog_call1", "global function calls are not allowed while holding a lock" },
{ "lock_global_subprog_call2", "global function calls are not allowed while holding a lock" },
+ { "lock_global_sleepable_helper_subprog", "global function calls are not allowed while holding a lock" },
+ { "lock_global_sleepable_kfunc_subprog", "global function calls are not allowed while holding a lock" },
+ { "lock_global_sleepable_subprog_indirect", "global function calls are not allowed while holding a lock" },
};
static int match_regex(const char *pattern, const char *string)
diff --git a/tools/testing/selftests/bpf/prog_tests/summarization.c b/tools/testing/selftests/bpf/prog_tests/summarization.c
new file mode 100644
index 000000000000..5dd6c120a838
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/summarization.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bpf/libbpf.h"
+#include "summarization_freplace.skel.h"
+#include "summarization.skel.h"
+#include <test_progs.h>
+
+static void print_verifier_log(const char *log)
+{
+ if (env.verbosity >= VERBOSE_VERY)
+ fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log);
+}
+
+static void test_aux(const char *main_prog_name,
+ const char *to_be_replaced,
+ const char *replacement,
+ bool expect_load,
+ const char *err_msg)
+{
+ struct summarization_freplace *freplace = NULL;
+ struct bpf_program *freplace_prog = NULL;
+ struct bpf_program *main_prog = NULL;
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct summarization *main = NULL;
+ char log[16*1024];
+ int err;
+
+ opts.kernel_log_buf = log;
+ opts.kernel_log_size = sizeof(log);
+ if (env.verbosity >= VERBOSE_SUPER)
+ opts.kernel_log_level = 1 | 2 | 4;
+ main = summarization__open_opts(&opts);
+ if (!ASSERT_OK_PTR(main, "summarization__open"))
+ goto out;
+ main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name);
+ if (!ASSERT_OK_PTR(main_prog, "main_prog"))
+ goto out;
+ bpf_program__set_autoload(main_prog, true);
+ err = summarization__load(main);
+ print_verifier_log(log);
+ if (!ASSERT_OK(err, "summarization__load"))
+ goto out;
+ freplace = summarization_freplace__open_opts(&opts);
+ if (!ASSERT_OK_PTR(freplace, "summarization_freplace__open"))
+ goto out;
+ freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement);
+ if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog"))
+ goto out;
+ bpf_program__set_autoload(freplace_prog, true);
+ bpf_program__set_autoattach(freplace_prog, true);
+ bpf_program__set_attach_target(freplace_prog,
+ bpf_program__fd(main_prog),
+ to_be_replaced);
+ err = summarization_freplace__load(freplace);
+ print_verifier_log(log);
+
+ /* The might_sleep extension doesn't work yet as sleepable calls are not
+ * allowed, but preserve the check in case it's supported later and then
+ * this particular combination can be enabled.
+ */
+ if (!strcmp("might_sleep", replacement) && err) {
+ ASSERT_HAS_SUBSTR(log, "helper call might sleep in a non-sleepable prog", "error log");
+ ASSERT_EQ(err, -EINVAL, "err");
+ test__skip();
+ goto out;
+ }
+
+ if (expect_load) {
+ ASSERT_OK(err, "summarization_freplace__load");
+ } else {
+ ASSERT_ERR(err, "summarization_freplace__load");
+ ASSERT_HAS_SUBSTR(log, err_msg, "error log");
+ }
+
+out:
+ summarization_freplace__destroy(freplace);
+ summarization__destroy(main);
+}
+
+/* There are two global subprograms in both summarization.skel.h:
+ * - one changes packet data;
+ * - another does not.
+ * It is ok to freplace subprograms that change packet data with those
+ * that either do or do not. It is only ok to freplace subprograms
+ * that do not change packet data with those that do not as well.
+ * The below tests check outcomes for each combination of such freplace.
+ * Also test a case when main subprogram itself is replaced and is a single
+ * subprogram in a program.
+ *
+ * This holds for might_sleep programs. It is ok to replace might_sleep with
+ * might_sleep and with does_not_sleep, but does_not_sleep cannot be replaced
+ * with might_sleep.
+ */
+void test_summarization_freplace(void)
+{
+ struct {
+ const char *main;
+ const char *to_be_replaced;
+ bool has_side_effect;
+ } mains[2][4] = {
+ {
+ { "main_changes_with_subprogs", "changes_pkt_data", true },
+ { "main_changes_with_subprogs", "does_not_change_pkt_data", false },
+ { "main_changes", "main_changes", true },
+ { "main_does_not_change", "main_does_not_change", false },
+ },
+ {
+ { "main_might_sleep_with_subprogs", "might_sleep", true },
+ { "main_might_sleep_with_subprogs", "does_not_sleep", false },
+ { "main_might_sleep", "main_might_sleep", true },
+ { "main_does_not_sleep", "main_does_not_sleep", false },
+ },
+ };
+ const char *pkt_err = "Extension program changes packet data";
+ const char *slp_err = "Extension program may sleep";
+ struct {
+ const char *func;
+ bool has_side_effect;
+ const char *err_msg;
+ } replacements[2][2] = {
+ {
+ { "changes_pkt_data", true, pkt_err },
+ { "does_not_change_pkt_data", false, pkt_err },
+ },
+ {
+ { "might_sleep", true, slp_err },
+ { "does_not_sleep", false, slp_err },
+ },
+ };
+ char buf[64];
+
+ for (int t = 0; t < 2; t++) {
+ for (int i = 0; i < ARRAY_SIZE(mains); ++i) {
+ for (int j = 0; j < ARRAY_SIZE(replacements); ++j) {
+ snprintf(buf, sizeof(buf), "%s_with_%s",
+ mains[t][i].to_be_replaced, replacements[t][j].func);
+ if (!test__start_subtest(buf))
+ continue;
+ test_aux(mains[t][i].main, mains[t][i].to_be_replaced, replacements[t][j].func,
+ mains[t][i].has_side_effect || !replacements[t][j].has_side_effect,
+ replacements[t][j].err_msg);
+ }
+ }
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index 544144620ca6..66a900327f91 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -1600,6 +1600,7 @@ static void test_tailcall_bpf2bpf_freplace(void)
goto out;
err = bpf_link__destroy(freplace_link);
+ freplace_link = NULL;
if (!ASSERT_OK(err, "destroy link"))
goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c
index 1af9ec1149aa..2186a24e7d8a 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_links.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c
@@ -13,7 +13,7 @@
#include "netlink_helpers.h"
#include "tc_helpers.h"
-void serial_test_tc_links_basic(void)
+void test_ns_tc_links_basic(void)
{
LIBBPF_OPTS(bpf_prog_query_opts, optq);
LIBBPF_OPTS(bpf_tcx_opts, optl);
@@ -260,7 +260,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_before(void)
+void test_ns_tc_links_before(void)
{
test_tc_links_before_target(BPF_TCX_INGRESS);
test_tc_links_before_target(BPF_TCX_EGRESS);
@@ -414,7 +414,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_after(void)
+void test_ns_tc_links_after(void)
{
test_tc_links_after_target(BPF_TCX_INGRESS);
test_tc_links_after_target(BPF_TCX_EGRESS);
@@ -514,7 +514,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_revision(void)
+void test_ns_tc_links_revision(void)
{
test_tc_links_revision_target(BPF_TCX_INGRESS);
test_tc_links_revision_target(BPF_TCX_EGRESS);
@@ -618,7 +618,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_chain_classic(void)
+void test_ns_tc_links_chain_classic(void)
{
test_tc_chain_classic(BPF_TCX_INGRESS, false);
test_tc_chain_classic(BPF_TCX_EGRESS, false);
@@ -846,7 +846,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_replace(void)
+void test_ns_tc_links_replace(void)
{
test_tc_links_replace_target(BPF_TCX_INGRESS);
test_tc_links_replace_target(BPF_TCX_EGRESS);
@@ -1158,7 +1158,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_invalid(void)
+void test_ns_tc_links_invalid(void)
{
test_tc_links_invalid_target(BPF_TCX_INGRESS);
test_tc_links_invalid_target(BPF_TCX_EGRESS);
@@ -1314,7 +1314,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_prepend(void)
+void test_ns_tc_links_prepend(void)
{
test_tc_links_prepend_target(BPF_TCX_INGRESS);
test_tc_links_prepend_target(BPF_TCX_EGRESS);
@@ -1470,7 +1470,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_append(void)
+void test_ns_tc_links_append(void)
{
test_tc_links_append_target(BPF_TCX_INGRESS);
test_tc_links_append_target(BPF_TCX_EGRESS);
@@ -1568,7 +1568,7 @@ cleanup:
ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
}
-void serial_test_tc_links_dev_cleanup(void)
+void test_ns_tc_links_dev_cleanup(void)
{
test_tc_links_dev_cleanup_target(BPF_TCX_INGRESS);
test_tc_links_dev_cleanup_target(BPF_TCX_EGRESS);
@@ -1672,7 +1672,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_links_chain_mixed(void)
+void test_ns_tc_links_chain_mixed(void)
{
test_tc_chain_mixed(BPF_TCX_INGRESS);
test_tc_chain_mixed(BPF_TCX_EGRESS);
@@ -1782,7 +1782,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_ingress(void)
+void test_ns_tc_links_ingress(void)
{
test_tc_links_ingress(BPF_TCX_INGRESS, true, true);
test_tc_links_ingress(BPF_TCX_INGRESS, true, false);
@@ -1823,7 +1823,7 @@ static int qdisc_replace(int ifindex, const char *kind, bool block)
return err;
}
-void serial_test_tc_links_dev_chain0(void)
+void test_ns_tc_links_dev_chain0(void)
{
int err, ifindex;
@@ -1955,7 +1955,7 @@ cleanup:
ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
}
-void serial_test_tc_links_dev_mixed(void)
+void test_ns_tc_links_dev_mixed(void)
{
test_tc_links_dev_mixed(BPF_TCX_INGRESS);
test_tc_links_dev_mixed(BPF_TCX_EGRESS);
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
index f77f604389aa..dd7a138d8c3d 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
@@ -10,7 +10,7 @@
#include "test_tc_link.skel.h"
#include "tc_helpers.h"
-void serial_test_tc_opts_basic(void)
+void test_ns_tc_opts_basic(void)
{
LIBBPF_OPTS(bpf_prog_attach_opts, opta);
LIBBPF_OPTS(bpf_prog_detach_opts, optd);
@@ -254,7 +254,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_before(void)
+void test_ns_tc_opts_before(void)
{
test_tc_opts_before_target(BPF_TCX_INGRESS);
test_tc_opts_before_target(BPF_TCX_EGRESS);
@@ -445,7 +445,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_after(void)
+void test_ns_tc_opts_after(void)
{
test_tc_opts_after_target(BPF_TCX_INGRESS);
test_tc_opts_after_target(BPF_TCX_EGRESS);
@@ -554,7 +554,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_revision(void)
+void test_ns_tc_opts_revision(void)
{
test_tc_opts_revision_target(BPF_TCX_INGRESS);
test_tc_opts_revision_target(BPF_TCX_EGRESS);
@@ -655,7 +655,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_opts_chain_classic(void)
+void test_ns_tc_opts_chain_classic(void)
{
test_tc_chain_classic(BPF_TCX_INGRESS, false);
test_tc_chain_classic(BPF_TCX_EGRESS, false);
@@ -864,7 +864,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_replace(void)
+void test_ns_tc_opts_replace(void)
{
test_tc_opts_replace_target(BPF_TCX_INGRESS);
test_tc_opts_replace_target(BPF_TCX_EGRESS);
@@ -1017,7 +1017,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_invalid(void)
+void test_ns_tc_opts_invalid(void)
{
test_tc_opts_invalid_target(BPF_TCX_INGRESS);
test_tc_opts_invalid_target(BPF_TCX_EGRESS);
@@ -1157,7 +1157,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_prepend(void)
+void test_ns_tc_opts_prepend(void)
{
test_tc_opts_prepend_target(BPF_TCX_INGRESS);
test_tc_opts_prepend_target(BPF_TCX_EGRESS);
@@ -1297,7 +1297,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_append(void)
+void test_ns_tc_opts_append(void)
{
test_tc_opts_append_target(BPF_TCX_INGRESS);
test_tc_opts_append_target(BPF_TCX_EGRESS);
@@ -1387,7 +1387,7 @@ cleanup:
ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
}
-void serial_test_tc_opts_dev_cleanup(void)
+void test_ns_tc_opts_dev_cleanup(void)
{
test_tc_opts_dev_cleanup_target(BPF_TCX_INGRESS);
test_tc_opts_dev_cleanup_target(BPF_TCX_EGRESS);
@@ -1563,7 +1563,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_opts_mixed(void)
+void test_ns_tc_opts_mixed(void)
{
test_tc_opts_mixed_target(BPF_TCX_INGRESS);
test_tc_opts_mixed_target(BPF_TCX_EGRESS);
@@ -1642,7 +1642,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_opts_demixed(void)
+void test_ns_tc_opts_demixed(void)
{
test_tc_opts_demixed_target(BPF_TCX_INGRESS);
test_tc_opts_demixed_target(BPF_TCX_EGRESS);
@@ -1813,7 +1813,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_detach(void)
+void test_ns_tc_opts_detach(void)
{
test_tc_opts_detach_target(BPF_TCX_INGRESS);
test_tc_opts_detach_target(BPF_TCX_EGRESS);
@@ -2020,7 +2020,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_detach_before(void)
+void test_ns_tc_opts_detach_before(void)
{
test_tc_opts_detach_before_target(BPF_TCX_INGRESS);
test_tc_opts_detach_before_target(BPF_TCX_EGRESS);
@@ -2236,7 +2236,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_detach_after(void)
+void test_ns_tc_opts_detach_after(void)
{
test_tc_opts_detach_after_target(BPF_TCX_INGRESS);
test_tc_opts_detach_after_target(BPF_TCX_EGRESS);
@@ -2265,7 +2265,7 @@ static void test_tc_opts_delete_empty(int target, bool chain_tc_old)
assert_mprog_count(target, 0);
}
-void serial_test_tc_opts_delete_empty(void)
+void test_ns_tc_opts_delete_empty(void)
{
test_tc_opts_delete_empty(BPF_TCX_INGRESS, false);
test_tc_opts_delete_empty(BPF_TCX_EGRESS, false);
@@ -2372,7 +2372,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_chain_mixed(void)
+void test_ns_tc_opts_chain_mixed(void)
{
test_tc_chain_mixed(BPF_TCX_INGRESS);
test_tc_chain_mixed(BPF_TCX_EGRESS);
@@ -2446,7 +2446,7 @@ cleanup:
ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
}
-void serial_test_tc_opts_max(void)
+void test_ns_tc_opts_max(void)
{
test_tc_opts_max_target(BPF_TCX_INGRESS, 0, false);
test_tc_opts_max_target(BPF_TCX_EGRESS, 0, false);
@@ -2748,7 +2748,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_query(void)
+void test_ns_tc_opts_query(void)
{
test_tc_opts_query_target(BPF_TCX_INGRESS);
test_tc_opts_query_target(BPF_TCX_EGRESS);
@@ -2807,7 +2807,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_query_attach(void)
+void test_ns_tc_opts_query_attach(void)
{
test_tc_opts_query_attach_target(BPF_TCX_INGRESS);
test_tc_opts_query_attach_target(BPF_TCX_EGRESS);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c
new file mode 100644
index 000000000000..467cc72a3588
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c
@@ -0,0 +1,16 @@
+#include <test_progs.h>
+
+#include "struct_ops_kptr_return.skel.h"
+#include "struct_ops_kptr_return_fail__wrong_type.skel.h"
+#include "struct_ops_kptr_return_fail__invalid_scalar.skel.h"
+#include "struct_ops_kptr_return_fail__nonzero_offset.skel.h"
+#include "struct_ops_kptr_return_fail__local_kptr.skel.h"
+
+void test_struct_ops_kptr_return(void)
+{
+ RUN_TESTS(struct_ops_kptr_return);
+ RUN_TESTS(struct_ops_kptr_return_fail__wrong_type);
+ RUN_TESTS(struct_ops_kptr_return_fail__invalid_scalar);
+ RUN_TESTS(struct_ops_kptr_return_fail__nonzero_offset);
+ RUN_TESTS(struct_ops_kptr_return_fail__local_kptr);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c
new file mode 100644
index 000000000000..da60c715fc59
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c
@@ -0,0 +1,14 @@
+#include <test_progs.h>
+
+#include "struct_ops_refcounted.skel.h"
+#include "struct_ops_refcounted_fail__ref_leak.skel.h"
+#include "struct_ops_refcounted_fail__global_subprog.skel.h"
+#include "struct_ops_refcounted_fail__tail_call.skel.h"
+
+void test_struct_ops_refcounted(void)
+{
+ RUN_TESTS(struct_ops_refcounted);
+ RUN_TESTS(struct_ops_refcounted_fail__ref_leak);
+ RUN_TESTS(struct_ops_refcounted_fail__global_subprog);
+ RUN_TESTS(struct_ops_refcounted_fail__tail_call);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index cec746e77cd3..bae0e9de277d 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -71,6 +71,8 @@
#define IP4_ADDR2_VETH1 "172.16.1.20"
#define IP4_ADDR_TUNL_DEV0 "10.1.1.100"
#define IP4_ADDR_TUNL_DEV1 "10.1.1.200"
+#define IP6_ADDR_TUNL_DEV0 "fc80::100"
+#define IP6_ADDR_TUNL_DEV1 "fc80::200"
#define IP6_ADDR_VETH0 "::11"
#define IP6_ADDR1_VETH1 "::22"
@@ -98,6 +100,27 @@
#define XFRM_SPI_IN_TO_OUT 0x1
#define XFRM_SPI_OUT_TO_IN 0x2
+#define GRE_TUNL_DEV0 "gre00"
+#define GRE_TUNL_DEV1 "gre11"
+
+#define IP6GRE_TUNL_DEV0 "ip6gre00"
+#define IP6GRE_TUNL_DEV1 "ip6gre11"
+
+#define ERSPAN_TUNL_DEV0 "erspan00"
+#define ERSPAN_TUNL_DEV1 "erspan11"
+
+#define IP6ERSPAN_TUNL_DEV0 "ip6erspan00"
+#define IP6ERSPAN_TUNL_DEV1 "ip6erspan11"
+
+#define GENEVE_TUNL_DEV0 "geneve00"
+#define GENEVE_TUNL_DEV1 "geneve11"
+
+#define IP6GENEVE_TUNL_DEV0 "ip6geneve00"
+#define IP6GENEVE_TUNL_DEV1 "ip6geneve11"
+
+#define IP6TNL_TUNL_DEV0 "ip6tnl00"
+#define IP6TNL_TUNL_DEV1 "ip6tnl11"
+
#define PING_ARGS "-i 0.01 -c 3 -w 10 -q"
static int config_device(void)
@@ -216,6 +239,18 @@ fail:
return -1;
}
+static int set_ipv4_addr(const char *dev0, const char *dev1)
+{
+ SYS(fail, "ip -n at_ns0 link set dev %s up", dev0);
+ SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", dev0, IP4_ADDR_TUNL_DEV0);
+ SYS(fail, "ip link set dev %s up", dev1);
+ SYS(fail, "ip addr add dev %s %s/24", dev1, IP4_ADDR_TUNL_DEV1);
+
+ return 0;
+fail:
+ return 1;
+}
+
static int add_ipip_tunnel(enum ipip_encap encap)
{
int err;
@@ -356,6 +391,99 @@ static void delete_xfrm_tunnel(void)
IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN);
}
+static int add_ipv4_tunnel(const char *dev0, const char *dev1,
+ const char *type, const char *opt)
+{
+ if (!type || !opt || !dev0 || !dev1)
+ return -1;
+
+ SYS(fail, "ip -n at_ns0 link add dev %s type %s %s local %s remote %s",
+ dev0, type, opt, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
+
+ SYS(fail, "ip link add dev %s type %s external", dev1, type);
+
+ return set_ipv4_addr(dev0, dev1);
+fail:
+ return -1;
+}
+
+static void delete_tunnel(const char *dev0, const char *dev1)
+{
+ if (!dev0 || !dev1)
+ return;
+
+ SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s", dev0);
+ SYS_NOFAIL("ip link delete dev %s", dev1);
+}
+
+static int set_ipv6_addr(const char *dev0, const char *dev1)
+{
+ /* disable IPv6 DAD because it might take too long and fail tests */
+ SYS(fail, "ip -n at_ns0 addr add %s/96 dev veth0 nodad", IP6_ADDR_VETH0);
+ SYS(fail, "ip -n at_ns0 link set dev veth0 up");
+ SYS(fail, "ip addr add %s/96 dev veth1 nodad", IP6_ADDR1_VETH1);
+ SYS(fail, "ip link set dev veth1 up");
+
+ SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", dev0, IP4_ADDR_TUNL_DEV0);
+ SYS(fail, "ip -n at_ns0 addr add dev %s %s/96 nodad", dev0, IP6_ADDR_TUNL_DEV0);
+ SYS(fail, "ip -n at_ns0 link set dev %s up", dev0);
+
+ SYS(fail, "ip addr add dev %s %s/24", dev1, IP4_ADDR_TUNL_DEV1);
+ SYS(fail, "ip addr add dev %s %s/96 nodad", dev1, IP6_ADDR_TUNL_DEV1);
+ SYS(fail, "ip link set dev %s up", dev1);
+ return 0;
+fail:
+ return 1;
+}
+
+static int add_ipv6_tunnel(const char *dev0, const char *dev1,
+ const char *type, const char *opt)
+{
+ if (!type || !opt || !dev0 || !dev1)
+ return -1;
+
+ SYS(fail, "ip -n at_ns0 link add dev %s type %s %s local %s remote %s",
+ dev0, type, opt, IP6_ADDR_VETH0, IP6_ADDR1_VETH1);
+
+ SYS(fail, "ip link add dev %s type %s external", dev1, type);
+
+ return set_ipv6_addr(dev0, dev1);
+fail:
+ return -1;
+}
+
+static int add_geneve_tunnel(const char *dev0, const char *dev1,
+ const char *type, const char *opt)
+{
+ if (!type || !opt || !dev0 || !dev1)
+ return -1;
+
+ SYS(fail, "ip -n at_ns0 link add dev %s type %s id 2 %s remote %s",
+ dev0, type, opt, IP4_ADDR1_VETH1);
+
+ SYS(fail, "ip link add dev %s type %s %s external", dev1, type, opt);
+
+ return set_ipv4_addr(dev0, dev1);
+fail:
+ return -1;
+}
+
+static int add_ip6geneve_tunnel(const char *dev0, const char *dev1,
+ const char *type, const char *opt)
+{
+ if (!type || !opt || !dev0 || !dev1)
+ return -1;
+
+ SYS(fail, "ip -n at_ns0 link add dev %s type %s id 22 %s remote %s",
+ dev0, type, opt, IP6_ADDR1_VETH1);
+
+ SYS(fail, "ip link add dev %s type %s %s external", dev1, type, opt);
+
+ return set_ipv6_addr(dev0, dev1);
+fail:
+ return -1;
+}
+
static int test_ping(int family, const char *addr)
{
SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr);
@@ -364,32 +492,76 @@ fail:
return -1;
}
-static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd)
+static void ping_dev0(void)
{
+ /* ping from root namespace test */
+ test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
+}
+
+static void ping_dev1(void)
+{
+ struct nstoken *nstoken;
+
+ /* ping from at_ns0 namespace test */
+ nstoken = open_netns("at_ns0");
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ return;
+
+ test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
+ close_netns(nstoken);
+}
+
+static void ping6_veth0(void)
+{
+ test_ping(AF_INET6, IP6_ADDR_VETH0);
+}
+
+static void ping6_dev0(void)
+{
+ test_ping(AF_INET6, IP6_ADDR_TUNL_DEV0);
+}
+
+static void ping6_dev1(void)
+{
+ struct nstoken *nstoken;
+
+ /* ping from at_ns0 namespace test */
+ nstoken = open_netns("at_ns0");
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ return;
+
+ test_ping(AF_INET, IP6_ADDR_TUNL_DEV1);
+ close_netns(nstoken);
+}
+
+static int attach_tc_prog(int ifindex, int igr_fd, int egr_fd)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex,
+ .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS);
DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
.priority = 1, .prog_fd = igr_fd);
DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
.priority = 1, .prog_fd = egr_fd);
int ret;
- ret = bpf_tc_hook_create(hook);
+ ret = bpf_tc_hook_create(&hook);
if (!ASSERT_OK(ret, "create tc hook"))
return ret;
if (igr_fd >= 0) {
- hook->attach_point = BPF_TC_INGRESS;
- ret = bpf_tc_attach(hook, &opts1);
+ hook.attach_point = BPF_TC_INGRESS;
+ ret = bpf_tc_attach(&hook, &opts1);
if (!ASSERT_OK(ret, "bpf_tc_attach")) {
- bpf_tc_hook_destroy(hook);
+ bpf_tc_hook_destroy(&hook);
return ret;
}
}
if (egr_fd >= 0) {
- hook->attach_point = BPF_TC_EGRESS;
- ret = bpf_tc_attach(hook, &opts2);
+ hook.attach_point = BPF_TC_EGRESS;
+ ret = bpf_tc_attach(&hook, &opts2);
if (!ASSERT_OK(ret, "bpf_tc_attach")) {
- bpf_tc_hook_destroy(hook);
+ bpf_tc_hook_destroy(&hook);
return ret;
}
}
@@ -397,6 +569,50 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd)
return 0;
}
+static int generic_attach(const char *dev, int igr_fd, int egr_fd)
+{
+ int ifindex;
+
+ if (!ASSERT_OK_FD(igr_fd, "check ingress fd"))
+ return -1;
+ if (!ASSERT_OK_FD(egr_fd, "check egress fd"))
+ return -1;
+
+ ifindex = if_nametoindex(dev);
+ if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
+ return -1;
+
+ return attach_tc_prog(ifindex, igr_fd, egr_fd);
+}
+
+static int generic_attach_igr(const char *dev, int igr_fd)
+{
+ int ifindex;
+
+ if (!ASSERT_OK_FD(igr_fd, "check ingress fd"))
+ return -1;
+
+ ifindex = if_nametoindex(dev);
+ if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
+ return -1;
+
+ return attach_tc_prog(ifindex, igr_fd, -1);
+}
+
+static int generic_attach_egr(const char *dev, int egr_fd)
+{
+ int ifindex;
+
+ if (!ASSERT_OK_FD(egr_fd, "check egress fd"))
+ return -1;
+
+ ifindex = if_nametoindex(dev);
+ if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
+ return -1;
+
+ return attach_tc_prog(ifindex, -1, egr_fd);
+}
+
static void test_vxlan_tunnel(void)
{
struct test_tunnel_kern *skel = NULL;
@@ -404,11 +620,9 @@ static void test_vxlan_tunnel(void)
int local_ip_map_fd = -1;
int set_src_prog_fd, get_src_prog_fd;
int set_dst_prog_fd;
- int key = 0, ifindex = -1;
+ int key = 0;
uint local_ip;
int err;
- DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
- .attach_point = BPF_TC_INGRESS);
/* add vxlan tunnel */
err = add_vxlan_tunnel();
@@ -419,42 +633,22 @@ static void test_vxlan_tunnel(void)
skel = test_tunnel_kern__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
goto done;
- ifindex = if_nametoindex(VXLAN_TUNL_DEV1);
- if (!ASSERT_NEQ(ifindex, 0, "vxlan11 ifindex"))
- goto done;
- tc_hook.ifindex = ifindex;
get_src_prog_fd = bpf_program__fd(skel->progs.vxlan_get_tunnel_src);
set_src_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_src);
- if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
+ if (generic_attach(VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
goto done;
/* load and attach bpf prog to veth dev tc hook point */
- ifindex = if_nametoindex("veth1");
- if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
- goto done;
- tc_hook.ifindex = ifindex;
set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst);
- if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, set_dst_prog_fd, -1))
+ if (generic_attach_igr("veth1", set_dst_prog_fd))
goto done;
/* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
nstoken = open_netns("at_ns0");
if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto done;
- ifindex = if_nametoindex(VXLAN_TUNL_DEV0);
- if (!ASSERT_NEQ(ifindex, 0, "vxlan00 ifindex"))
- goto done;
- tc_hook.ifindex = ifindex;
set_dst_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_dst);
- if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd))
+ if (generic_attach_egr(VXLAN_TUNL_DEV0, set_dst_prog_fd))
goto done;
close_netns(nstoken);
@@ -468,9 +662,7 @@ static void test_vxlan_tunnel(void)
goto done;
/* ping test */
- err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
- if (!ASSERT_OK(err, "test_ping"))
- goto done;
+ ping_dev0();
done:
/* delete vxlan tunnel */
@@ -488,11 +680,9 @@ static void test_ip6vxlan_tunnel(void)
int local_ip_map_fd = -1;
int set_src_prog_fd, get_src_prog_fd;
int set_dst_prog_fd;
- int key = 0, ifindex = -1;
+ int key = 0;
uint local_ip;
int err;
- DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
- .attach_point = BPF_TC_INGRESS);
/* add vxlan tunnel */
err = add_ip6vxlan_tunnel();
@@ -503,31 +693,17 @@ static void test_ip6vxlan_tunnel(void)
skel = test_tunnel_kern__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
goto done;
- ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV1);
- if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan11 ifindex"))
- goto done;
- tc_hook.ifindex = ifindex;
get_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_get_tunnel_src);
set_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_src);
- if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
+ if (generic_attach(IP6VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
goto done;
/* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
nstoken = open_netns("at_ns0");
if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto done;
- ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV0);
- if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan00 ifindex"))
- goto done;
- tc_hook.ifindex = ifindex;
set_dst_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_dst);
- if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd))
+ if (generic_attach_egr(IP6VXLAN_TUNL_DEV0, set_dst_prog_fd))
goto done;
close_netns(nstoken);
@@ -541,9 +717,7 @@ static void test_ip6vxlan_tunnel(void)
goto done;
/* ping test */
- err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
- if (!ASSERT_OK(err, "test_ping"))
- goto done;
+ ping_dev0();
done:
/* delete ipv6 vxlan tunnel */
@@ -557,12 +731,8 @@ done:
static void test_ipip_tunnel(enum ipip_encap encap)
{
struct test_tunnel_kern *skel = NULL;
- struct nstoken *nstoken;
int set_src_prog_fd, get_src_prog_fd;
- int ifindex = -1;
int err;
- DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
- .attach_point = BPF_TC_INGRESS);
/* add ipip tunnel */
err = add_ipip_tunnel(encap);
@@ -573,10 +743,6 @@ static void test_ipip_tunnel(enum ipip_encap encap)
skel = test_tunnel_kern__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
goto done;
- ifindex = if_nametoindex(IPIP_TUNL_DEV1);
- if (!ASSERT_NEQ(ifindex, 0, "ipip11 ifindex"))
- goto done;
- tc_hook.ifindex = ifindex;
switch (encap) {
case FOU:
@@ -598,26 +764,11 @@ static void test_ipip_tunnel(enum ipip_encap encap)
skel->progs.ipip_set_tunnel);
}
- if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
+ if (generic_attach(IPIP_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
goto done;
- /* ping from root namespace test */
- err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
- if (!ASSERT_OK(err, "test_ping"))
- goto done;
-
- /* ping from at_ns0 namespace test */
- nstoken = open_netns("at_ns0");
- if (!ASSERT_OK_PTR(nstoken, "setns"))
- goto done;
- err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
- if (!ASSERT_OK(err, "test_ping"))
- goto done;
- close_netns(nstoken);
+ ping_dev0();
+ ping_dev1();
done:
/* delete ipip tunnel */
@@ -628,11 +779,8 @@ done:
static void test_xfrm_tunnel(void)
{
- DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
- .attach_point = BPF_TC_INGRESS);
LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
struct test_tunnel_kern *skel = NULL;
- struct nstoken *nstoken;
int xdp_prog_fd;
int tc_prog_fd;
int ifindex;
@@ -646,19 +794,16 @@ static void test_xfrm_tunnel(void)
if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
goto done;
- ifindex = if_nametoindex("veth1");
- if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
- goto done;
/* attach tc prog to tunnel dev */
- tc_hook.ifindex = ifindex;
tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state);
- if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, tc_prog_fd, -1))
+ if (generic_attach_igr("veth1", tc_prog_fd))
goto done;
/* attach xdp prog to tunnel dev */
+ ifindex = if_nametoindex("veth1");
+ if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
+ goto done;
xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp);
if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd"))
goto done;
@@ -666,14 +811,7 @@ static void test_xfrm_tunnel(void)
if (!ASSERT_OK(err, "bpf_xdp_attach"))
goto done;
- /* ping from at_ns0 namespace test */
- nstoken = open_netns("at_ns0");
- if (!ASSERT_OK_PTR(nstoken, "setns"))
- goto done;
- err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
- close_netns(nstoken);
- if (!ASSERT_OK(err, "test_ping"))
- goto done;
+ ping_dev1();
if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id"))
goto done;
@@ -690,6 +828,281 @@ done:
test_tunnel_kern__destroy(skel);
}
+enum gre_test {
+ GRE,
+ GRE_NOKEY,
+ GRETAP,
+ GRETAP_NOKEY,
+};
+
+static void test_gre_tunnel(enum gre_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ switch (test) {
+ case GRE:
+ err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gre", "seq");
+ set_fd = bpf_program__fd(skel->progs.gre_set_tunnel_no_key);
+ get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+ break;
+ case GRE_NOKEY:
+ err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gre", "seq key 2");
+ set_fd = bpf_program__fd(skel->progs.gre_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+ break;
+ case GRETAP:
+ err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gretap", "seq");
+ set_fd = bpf_program__fd(skel->progs.gre_set_tunnel_no_key);
+ get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+ break;
+ case GRETAP_NOKEY:
+ err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gretap", "seq key 2");
+ set_fd = bpf_program__fd(skel->progs.gre_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+ break;
+ }
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ if (generic_attach(GRE_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping_dev0();
+ ping_dev1();
+
+done:
+ delete_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+enum ip6gre_test {
+ IP6GRE,
+ IP6GRETAP
+};
+
+static void test_ip6gre_tunnel(enum ip6gre_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ switch (test) {
+ case IP6GRE:
+ err = add_ipv6_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1,
+ "ip6gre", "flowlabel 0xbcdef key 2");
+ break;
+ case IP6GRETAP:
+ err = add_ipv6_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1,
+ "ip6gretap", "flowlabel 0xbcdef key 2");
+ break;
+ }
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.ip6gretap_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ip6gretap_get_tunnel);
+ if (generic_attach(IP6GRE_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping6_veth0();
+ ping6_dev1();
+ ping_dev0();
+ ping_dev1();
+done:
+ delete_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+enum erspan_test {
+ V1,
+ V2
+};
+
+static void test_erspan_tunnel(enum erspan_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ switch (test) {
+ case V1:
+ err = add_ipv4_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1,
+ "erspan", "seq key 2 erspan_ver 1 erspan 123");
+ break;
+ case V2:
+ err = add_ipv4_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1,
+ "erspan",
+ "seq key 2 erspan_ver 2 erspan_dir egress erspan_hwid 3");
+ break;
+ }
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.erspan_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.erspan_get_tunnel);
+ if (generic_attach(ERSPAN_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping_dev0();
+ ping_dev1();
+done:
+ delete_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+static void test_ip6erspan_tunnel(enum erspan_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ switch (test) {
+ case V1:
+ err = add_ipv6_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1,
+ "ip6erspan", "seq key 2 erspan_ver 1 erspan 123");
+ break;
+ case V2:
+ err = add_ipv6_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1,
+ "ip6erspan",
+ "seq key 2 erspan_ver 2 erspan_dir egress erspan_hwid 7");
+ break;
+ }
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.ip4ip6erspan_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ip4ip6erspan_get_tunnel);
+ if (generic_attach(IP6ERSPAN_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping6_veth0();
+ ping_dev1();
+done:
+ delete_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+static void test_geneve_tunnel(void)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ err = add_geneve_tunnel(GENEVE_TUNL_DEV0, GENEVE_TUNL_DEV1,
+ "geneve", "dstport 6081");
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.geneve_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.geneve_get_tunnel);
+ if (generic_attach(GENEVE_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping_dev0();
+ ping_dev1();
+done:
+ delete_tunnel(GENEVE_TUNL_DEV0, GENEVE_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+static void test_ip6geneve_tunnel(void)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ err = add_ip6geneve_tunnel(IP6GENEVE_TUNL_DEV0, IP6GENEVE_TUNL_DEV1,
+ "geneve", "");
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.ip6geneve_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ip6geneve_get_tunnel);
+ if (generic_attach(IP6GENEVE_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping_dev0();
+ ping_dev1();
+done:
+ delete_tunnel(IP6GENEVE_TUNL_DEV0, IP6GENEVE_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+enum ip6tnl_test {
+ IPIP6,
+ IP6IP6
+};
+
+static void test_ip6tnl_tunnel(enum ip6tnl_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ err = add_ipv6_tunnel(IP6TNL_TUNL_DEV0, IP6TNL_TUNL_DEV1, "ip6tnl", "");
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ switch (test) {
+ case IPIP6:
+ set_fd = bpf_program__fd(skel->progs.ipip6_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ipip6_get_tunnel);
+ break;
+ case IP6IP6:
+ set_fd = bpf_program__fd(skel->progs.ip6ip6_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ip6ip6_get_tunnel);
+ break;
+ }
+ if (generic_attach(IP6TNL_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping6_veth0();
+ switch (test) {
+ case IPIP6:
+ ping_dev0();
+ ping_dev1();
+ break;
+ case IP6IP6:
+ ping6_dev0();
+ ping6_dev1();
+ break;
+ }
+
+done:
+ delete_tunnel(IP6TNL_TUNL_DEV0, IP6TNL_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
#define RUN_TEST(name, ...) \
({ \
if (test__start_subtest(#name)) { \
@@ -707,6 +1120,20 @@ static void *test_tunnel_run_tests(void *arg)
RUN_TEST(ipip_tunnel, FOU);
RUN_TEST(ipip_tunnel, GUE);
RUN_TEST(xfrm_tunnel);
+ RUN_TEST(gre_tunnel, GRE);
+ RUN_TEST(gre_tunnel, GRE_NOKEY);
+ RUN_TEST(gre_tunnel, GRETAP);
+ RUN_TEST(gre_tunnel, GRETAP_NOKEY);
+ RUN_TEST(ip6gre_tunnel, IP6GRE);
+ RUN_TEST(ip6gre_tunnel, IP6GRETAP);
+ RUN_TEST(erspan_tunnel, V1);
+ RUN_TEST(erspan_tunnel, V2);
+ RUN_TEST(ip6erspan_tunnel, V1);
+ RUN_TEST(ip6erspan_tunnel, V2);
+ RUN_TEST(geneve_tunnel);
+ RUN_TEST(ip6geneve_tunnel);
+ RUN_TEST(ip6tnl_tunnel, IPIP6);
+ RUN_TEST(ip6tnl_tunnel, IP6IP6);
return NULL;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
new file mode 100644
index 000000000000..a95b42bf744a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <string.h>
+#include <stdio.h>
+
+#define __CHECK_STR(str, name) \
+ do { \
+ if (!ASSERT_HAS_SUBSTR(fix->output, (str), (name))) \
+ goto out; \
+ } while (0)
+
+struct fixture {
+ char tmpfile[80];
+ int fd;
+ char *output;
+ size_t sz;
+ char veristat[80];
+};
+
+static struct fixture *init_fixture(void)
+{
+ struct fixture *fix = malloc(sizeof(struct fixture));
+
+ /* for no_alu32 and cpuv4 veristat is in parent folder */
+ if (access("./veristat", F_OK) == 0)
+ strcpy(fix->veristat, "./veristat");
+ else if (access("../veristat", F_OK) == 0)
+ strcpy(fix->veristat, "../veristat");
+ else
+ PRINT_FAIL("Can't find veristat binary");
+
+ snprintf(fix->tmpfile, sizeof(fix->tmpfile), "/tmp/test_veristat.XXXXXX");
+ fix->fd = mkstemp(fix->tmpfile);
+ fix->sz = 1000000;
+ fix->output = malloc(fix->sz);
+ return fix;
+}
+
+static void teardown_fixture(struct fixture *fix)
+{
+ free(fix->output);
+ close(fix->fd);
+ remove(fix->tmpfile);
+ free(fix);
+}
+
+static void test_set_global_vars_succeeds(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS(out,
+ "%s set_global_vars.bpf.o"\
+ " -G \"var_s64 = 0xf000000000000001\" "\
+ " -G \"var_u64 = 0xfedcba9876543210\" "\
+ " -G \"var_s32 = -0x80000000\" "\
+ " -G \"var_u32 = 0x76543210\" "\
+ " -G \"var_s16 = -32768\" "\
+ " -G \"var_u16 = 60652\" "\
+ " -G \"var_s8 = -128\" "\
+ " -G \"var_u8 = 255\" "\
+ " -G \"var_ea = EA2\" "\
+ " -G \"var_eb = EB2\" "\
+ " -G \"var_ec = EC2\" "\
+ " -G \"var_b = 1\" "\
+ "-vl2 > %s", fix->veristat, fix->tmpfile);
+
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("_w=0xf000000000000001 ", "var_s64 = 0xf000000000000001");
+ __CHECK_STR("_w=0xfedcba9876543210 ", "var_u64 = 0xfedcba9876543210");
+ __CHECK_STR("_w=0x80000000 ", "var_s32 = -0x80000000");
+ __CHECK_STR("_w=0x76543210 ", "var_u32 = 0x76543210");
+ __CHECK_STR("_w=0x8000 ", "var_s16 = -32768");
+ __CHECK_STR("_w=0xecec ", "var_u16 = 60652");
+ __CHECK_STR("_w=128 ", "var_s8 = -128");
+ __CHECK_STR("_w=255 ", "var_u8 = 255");
+ __CHECK_STR("_w=11 ", "var_ea = EA2");
+ __CHECK_STR("_w=12 ", "var_eb = EB2");
+ __CHECK_STR("_w=13 ", "var_ec = EC2");
+ __CHECK_STR("_w=1 ", "var_b = 1");
+
+out:
+ teardown_fixture(fix);
+}
+
+static void test_set_global_vars_from_file_succeeds(void)
+{
+ struct fixture *fix = init_fixture();
+ char input_file[80];
+ const char *vars = "var_s16 = -32768\nvar_u16 = 60652";
+ int fd;
+
+ snprintf(input_file, sizeof(input_file), "/tmp/veristat_input.XXXXXX");
+ fd = mkstemp(input_file);
+ if (!ASSERT_GE(fd, 0, "valid fd"))
+ goto out;
+
+ write(fd, vars, strlen(vars));
+ syncfs(fd);
+ SYS(out, "%s set_global_vars.bpf.o -G \"@%s\" -vl2 > %s",
+ fix->veristat, input_file, fix->tmpfile);
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("_w=0x8000 ", "var_s16 = -32768");
+ __CHECK_STR("_w=0xecec ", "var_u16 = 60652");
+
+out:
+ close(fd);
+ remove(input_file);
+ teardown_fixture(fix);
+}
+
+static void test_set_global_vars_out_of_range(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"var_s32 = 2147483648\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("is out of range [-2147483648; 2147483647]", "out of range");
+
+out:
+ teardown_fixture(fix);
+}
+
+void test_veristat(void)
+{
+ if (test__start_subtest("set_global_vars_succeeds"))
+ test_set_global_vars_succeeds();
+
+ if (test__start_subtest("set_global_vars_out_of_range"))
+ test_set_global_vars_out_of_range();
+
+ if (test__start_subtest("set_global_vars_from_file_succeeds"))
+ test_set_global_vars_from_file_succeeds();
+}
+
+#undef __CHECK_STR
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
index 8d75424fe6bc..3e98a1665936 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
@@ -3,17 +3,50 @@
/* Create 3 namespaces with 3 veth peers, and forward packets in-between using
* native XDP
*
- * XDP_TX
- * NS1(veth11) NS2(veth22) NS3(veth33)
- * | | |
- * | | |
- * (veth1, (veth2, (veth3,
- * id:111) id:122) id:133)
- * ^ | ^ | ^ |
- * | | XDP_REDIRECT | | XDP_REDIRECT | |
- * | ------------------ ------------------ |
- * -----------------------------------------
- * XDP_REDIRECT
+ * Network topology:
+ * ---------- ---------- ----------
+ * | NS1 | | NS2 | | NS3 |
+ * | veth11 | | veth22 | | veth33 |
+ * ----|----- -----|---- -----|----
+ * | | |
+ * ----|------------------|----------------|----
+ * | veth1 veth2 veth3 |
+ * | |
+ * | NSO |
+ * ---------------------------------------------
+ *
+ * Test cases:
+ * - [test_xdp_veth_redirect] : ping veth33 from veth11
+ *
+ * veth11 veth22 veth33
+ * (XDP_PASS) (XDP_TX) (XDP_PASS)
+ * | | |
+ * | | |
+ * veth1 veth2 veth3
+ * (XDP_REDIRECT) (XDP_REDIRECT) (XDP_REDIRECT)
+ * ^ | ^ | ^ |
+ * | | | | | |
+ * | ------------------ ------------------ |
+ * -----------------------------------------
+ *
+ * - [test_xdp_veth_broadcast_redirect]: broadcast from veth11
+ * - IPv4 ping : BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS
+ * -> echo request received by all except veth11
+ * - IPv4 ping : BPF_F_BROADCAST
+ * -> echo request received by all veth
+ * - [test_xdp_veth_egress]:
+ * - all src mac should be the magic mac
+ *
+ * veth11 veth22 veth33
+ * (XDP_PASS) (XDP_PASS) (XDP_PASS)
+ * | | |
+ * | | |
+ * veth1 veth2 veth3
+ * (XDP_REDIRECT) (XDP_REDIRECT) (XDP_REDIRECT)
+ * | ^ ^
+ * | | |
+ * ----------------------------------------
+ *
*/
#define _GNU_SOURCE
@@ -22,192 +55,545 @@
#include "network_helpers.h"
#include "xdp_dummy.skel.h"
#include "xdp_redirect_map.skel.h"
+#include "xdp_redirect_multi_kern.skel.h"
#include "xdp_tx.skel.h"
+#include <uapi/linux/if_link.h>
#define VETH_PAIRS_COUNT 3
-#define NS_SUFFIX_LEN 6
-#define VETH_NAME_MAX_LEN 16
+#define VETH_NAME_MAX_LEN 32
+#define IP_MAX_LEN 16
#define IP_SRC "10.1.1.11"
#define IP_DST "10.1.1.33"
-#define IP_CMD_MAX_LEN 128
-
-struct skeletons {
- struct xdp_dummy *xdp_dummy;
- struct xdp_tx *xdp_tx;
- struct xdp_redirect_map *xdp_redirect_maps;
-};
+#define IP_NEIGH "10.1.1.253"
+#define PROG_NAME_MAX_LEN 128
+#define NS_NAME_MAX_LEN 32
struct veth_configuration {
char local_veth[VETH_NAME_MAX_LEN]; /* Interface in main namespace */
char remote_veth[VETH_NAME_MAX_LEN]; /* Peer interface in dedicated namespace*/
- const char *namespace; /* Namespace for the remote veth */
- char next_veth[VETH_NAME_MAX_LEN]; /* Local interface to redirect traffic to */
- char *remote_addr; /* IP address of the remote veth */
+ char namespace[NS_NAME_MAX_LEN]; /* Namespace for the remote veth */
+ int next_veth; /* Local interface to redirect traffic to */
+ char remote_addr[IP_MAX_LEN]; /* IP address of the remote veth */
};
-static struct veth_configuration config[VETH_PAIRS_COUNT] = {
- {
- .local_veth = "veth1",
- .remote_veth = "veth11",
- .next_veth = "veth2",
- .remote_addr = IP_SRC,
- .namespace = "ns-veth11"
- },
- {
- .local_veth = "veth2",
- .remote_veth = "veth22",
- .next_veth = "veth3",
- .remote_addr = NULL,
- .namespace = "ns-veth22"
- },
+struct net_configuration {
+ char ns0_name[NS_NAME_MAX_LEN];
+ struct veth_configuration veth_cfg[VETH_PAIRS_COUNT];
+};
+
+static const struct net_configuration default_config = {
+ .ns0_name = "ns0-",
{
- .local_veth = "veth3",
- .remote_veth = "veth33",
- .next_veth = "veth1",
- .remote_addr = IP_DST,
- .namespace = "ns-veth33"
+ {
+ .local_veth = "veth1-",
+ .remote_veth = "veth11",
+ .next_veth = 1,
+ .remote_addr = IP_SRC,
+ .namespace = "ns-veth11-"
+ },
+ {
+ .local_veth = "veth2-",
+ .remote_veth = "veth22",
+ .next_veth = 2,
+ .remote_addr = "",
+ .namespace = "ns-veth22-"
+ },
+ {
+ .local_veth = "veth3-",
+ .remote_veth = "veth33",
+ .next_veth = 0,
+ .remote_addr = IP_DST,
+ .namespace = "ns-veth33-"
+ }
}
};
-static int attach_programs_to_veth_pair(struct skeletons *skeletons, int index)
+struct prog_configuration {
+ char local_name[PROG_NAME_MAX_LEN]; /* BPF prog to attach to local_veth */
+ char remote_name[PROG_NAME_MAX_LEN]; /* BPF prog to attach to remote_veth */
+ u32 local_flags; /* XDP flags to use on local_veth */
+ u32 remote_flags; /* XDP flags to use on remote_veth */
+};
+
+static int attach_programs_to_veth_pair(struct bpf_object **objs, size_t nb_obj,
+ struct net_configuration *net_config,
+ struct prog_configuration *prog, int index)
{
struct bpf_program *local_prog, *remote_prog;
- struct bpf_link **local_link, **remote_link;
struct nstoken *nstoken;
- struct bpf_link *link;
- int interface;
-
- switch (index) {
- case 0:
- local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_0;
- local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_0;
- remote_prog = skeletons->xdp_dummy->progs.xdp_dummy_prog;
- remote_link = &skeletons->xdp_dummy->links.xdp_dummy_prog;
- break;
- case 1:
- local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_1;
- local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_1;
- remote_prog = skeletons->xdp_tx->progs.xdp_tx;
- remote_link = &skeletons->xdp_tx->links.xdp_tx;
- break;
- case 2:
- local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_2;
- local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_2;
- remote_prog = skeletons->xdp_dummy->progs.xdp_dummy_prog;
- remote_link = &skeletons->xdp_dummy->links.xdp_dummy_prog;
- break;
+ int interface, ret, i;
+
+ for (i = 0; i < nb_obj; i++) {
+ local_prog = bpf_object__find_program_by_name(objs[i], prog[index].local_name);
+ if (local_prog)
+ break;
}
- interface = if_nametoindex(config[index].local_veth);
+ if (!ASSERT_OK_PTR(local_prog, "find local program"))
+ return -1;
+
+ for (i = 0; i < nb_obj; i++) {
+ remote_prog = bpf_object__find_program_by_name(objs[i], prog[index].remote_name);
+ if (remote_prog)
+ break;
+ }
+ if (!ASSERT_OK_PTR(remote_prog, "find remote program"))
+ return -1;
+
+ interface = if_nametoindex(net_config->veth_cfg[index].local_veth);
if (!ASSERT_NEQ(interface, 0, "non zero interface index"))
return -1;
- link = bpf_program__attach_xdp(local_prog, interface);
- if (!ASSERT_OK_PTR(link, "attach xdp program to local veth"))
+
+ ret = bpf_xdp_attach(interface, bpf_program__fd(local_prog),
+ prog[index].local_flags, NULL);
+ if (!ASSERT_OK(ret, "attach xdp program to local veth"))
return -1;
- *local_link = link;
- nstoken = open_netns(config[index].namespace);
+
+ nstoken = open_netns(net_config->veth_cfg[index].namespace);
if (!ASSERT_OK_PTR(nstoken, "switch to remote veth namespace"))
return -1;
- interface = if_nametoindex(config[index].remote_veth);
+
+ interface = if_nametoindex(net_config->veth_cfg[index].remote_veth);
if (!ASSERT_NEQ(interface, 0, "non zero interface index")) {
close_netns(nstoken);
return -1;
}
- link = bpf_program__attach_xdp(remote_prog, interface);
- *remote_link = link;
- close_netns(nstoken);
- if (!ASSERT_OK_PTR(link, "attach xdp program to remote veth"))
+
+ ret = bpf_xdp_attach(interface, bpf_program__fd(remote_prog),
+ prog[index].remote_flags, NULL);
+ if (!ASSERT_OK(ret, "attach xdp program to remote veth")) {
+ close_netns(nstoken);
return -1;
+ }
+ close_netns(nstoken);
return 0;
}
-static int configure_network(struct skeletons *skeletons)
+static int create_network(struct net_configuration *net_config)
{
- int interface_id;
- int map_fd;
- int err;
- int i = 0;
+ struct nstoken *nstoken = NULL;
+ int i, err;
+
+ memcpy(net_config, &default_config, sizeof(struct net_configuration));
+
+ /* Create unique namespaces */
+ err = append_tid(net_config->ns0_name, NS_NAME_MAX_LEN);
+ if (!ASSERT_OK(err, "append TID to ns0 name"))
+ goto fail;
+ SYS(fail, "ip netns add %s", net_config->ns0_name);
- /* First create and configure all interfaces */
for (i = 0; i < VETH_PAIRS_COUNT; i++) {
- SYS(fail, "ip netns add %s", config[i].namespace);
- SYS(fail, "ip link add %s type veth peer name %s netns %s",
- config[i].local_veth, config[i].remote_veth, config[i].namespace);
- SYS(fail, "ip link set dev %s up", config[i].local_veth);
- if (config[i].remote_addr)
- SYS(fail, "ip -n %s addr add %s/24 dev %s", config[i].namespace,
- config[i].remote_addr, config[i].remote_veth);
- SYS(fail, "ip -n %s link set dev %s up", config[i].namespace,
- config[i].remote_veth);
+ err = append_tid(net_config->veth_cfg[i].namespace, NS_NAME_MAX_LEN);
+ if (!ASSERT_OK(err, "append TID to ns name"))
+ goto fail;
+ SYS(fail, "ip netns add %s", net_config->veth_cfg[i].namespace);
}
- /* Then configure the redirect map and attach programs to interfaces */
- map_fd = bpf_map__fd(skeletons->xdp_redirect_maps->maps.tx_port);
- if (!ASSERT_GE(map_fd, 0, "open redirect map"))
+ /* Create interfaces */
+ nstoken = open_netns(net_config->ns0_name);
+ if (!nstoken)
goto fail;
+
for (i = 0; i < VETH_PAIRS_COUNT; i++) {
- interface_id = if_nametoindex(config[i].next_veth);
- if (!ASSERT_NEQ(interface_id, 0, "non zero interface index"))
- goto fail;
- err = bpf_map_update_elem(map_fd, &i, &interface_id, BPF_ANY);
- if (!ASSERT_OK(err, "configure interface redirection through map"))
- goto fail;
- if (attach_programs_to_veth_pair(skeletons, i))
- goto fail;
+ SYS(fail, "ip link add %s type veth peer name %s netns %s",
+ net_config->veth_cfg[i].local_veth, net_config->veth_cfg[i].remote_veth,
+ net_config->veth_cfg[i].namespace);
+ SYS(fail, "ip link set dev %s up", net_config->veth_cfg[i].local_veth);
+ if (net_config->veth_cfg[i].remote_addr[0])
+ SYS(fail, "ip -n %s addr add %s/24 dev %s",
+ net_config->veth_cfg[i].namespace,
+ net_config->veth_cfg[i].remote_addr,
+ net_config->veth_cfg[i].remote_veth);
+ SYS(fail, "ip -n %s link set dev %s up", net_config->veth_cfg[i].namespace,
+ net_config->veth_cfg[i].remote_veth);
}
+ close_netns(nstoken);
return 0;
fail:
+ close_netns(nstoken);
return -1;
}
-static void cleanup_network(void)
+static void cleanup_network(struct net_configuration *net_config)
{
int i;
- /* Deleting namespaces is enough to automatically remove veth pairs as well
- */
+ SYS_NOFAIL("ip netns del %s", net_config->ns0_name);
for (i = 0; i < VETH_PAIRS_COUNT; i++)
- SYS_NOFAIL("ip netns del %s", config[i].namespace);
+ SYS_NOFAIL("ip netns del %s", net_config->veth_cfg[i].namespace);
}
-static int check_ping(struct skeletons *skeletons)
+#define VETH_REDIRECT_SKEL_NB 3
+static void xdp_veth_redirect(u32 flags)
{
+ struct prog_configuration ping_config[VETH_PAIRS_COUNT] = {
+ {
+ .local_name = "xdp_redirect_map_0",
+ .remote_name = "xdp_dummy_prog",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_1",
+ .remote_name = "xdp_tx",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_2",
+ .remote_name = "xdp_dummy_prog",
+ .local_flags = flags,
+ .remote_flags = flags,
+ }
+ };
+ struct bpf_object *bpf_objs[VETH_REDIRECT_SKEL_NB];
+ struct xdp_redirect_map *xdp_redirect_map;
+ struct net_configuration net_config;
+ struct nstoken *nstoken = NULL;
+ struct xdp_dummy *xdp_dummy;
+ struct xdp_tx *xdp_tx;
+ int map_fd;
+ int i;
+
+ xdp_dummy = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load"))
+ return;
+
+ xdp_tx = xdp_tx__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_tx, "xdp_tx__open_and_load"))
+ goto destroy_xdp_dummy;
+
+ xdp_redirect_map = xdp_redirect_map__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load"))
+ goto destroy_xdp_tx;
+
+ if (!ASSERT_OK(create_network(&net_config), "create network"))
+ goto destroy_xdp_redirect_map;
+
+ /* Then configure the redirect map and attach programs to interfaces */
+ map_fd = bpf_map__fd(xdp_redirect_map->maps.tx_port);
+ if (!ASSERT_OK_FD(map_fd, "open redirect map"))
+ goto destroy_xdp_redirect_map;
+
+ bpf_objs[0] = xdp_dummy->obj;
+ bpf_objs[1] = xdp_tx->obj;
+ bpf_objs[2] = xdp_redirect_map->obj;
+
+ nstoken = open_netns(net_config.ns0_name);
+ if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+ goto destroy_xdp_redirect_map;
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ int next_veth = net_config.veth_cfg[i].next_veth;
+ int interface_id;
+ int err;
+
+ interface_id = if_nametoindex(net_config.veth_cfg[next_veth].local_veth);
+ if (!ASSERT_NEQ(interface_id, 0, "non zero interface index"))
+ goto destroy_xdp_redirect_map;
+ err = bpf_map_update_elem(map_fd, &i, &interface_id, BPF_ANY);
+ if (!ASSERT_OK(err, "configure interface redirection through map"))
+ goto destroy_xdp_redirect_map;
+ if (attach_programs_to_veth_pair(bpf_objs, VETH_REDIRECT_SKEL_NB,
+ &net_config, ping_config, i))
+ goto destroy_xdp_redirect_map;
+ }
+
/* Test: if all interfaces are properly configured, we must be able to ping
* veth33 from veth11
*/
- return SYS_NOFAIL("ip netns exec %s ping -c 1 -W 1 %s > /dev/null",
- config[0].namespace, IP_DST);
+ ASSERT_OK(SYS_NOFAIL("ip netns exec %s ping -c 1 -W 1 %s > /dev/null",
+ net_config.veth_cfg[0].namespace, IP_DST), "ping");
+
+destroy_xdp_redirect_map:
+ close_netns(nstoken);
+ xdp_redirect_map__destroy(xdp_redirect_map);
+destroy_xdp_tx:
+ xdp_tx__destroy(xdp_tx);
+destroy_xdp_dummy:
+ xdp_dummy__destroy(xdp_dummy);
+
+ cleanup_network(&net_config);
}
-void test_xdp_veth_redirect(void)
+#define BROADCAST_REDIRECT_SKEL_NB 2
+static void xdp_veth_broadcast_redirect(u32 attach_flags, u64 redirect_flags)
{
- struct skeletons skeletons = {};
+ struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = {
+ {
+ .local_name = "xdp_redirect_map_multi_prog",
+ .remote_name = "xdp_count_0",
+ .local_flags = attach_flags,
+ .remote_flags = attach_flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_multi_prog",
+ .remote_name = "xdp_count_1",
+ .local_flags = attach_flags,
+ .remote_flags = attach_flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_multi_prog",
+ .remote_name = "xdp_count_2",
+ .local_flags = attach_flags,
+ .remote_flags = attach_flags,
+ }
+ };
+ struct bpf_object *bpf_objs[BROADCAST_REDIRECT_SKEL_NB];
+ struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+ struct xdp_redirect_map *xdp_redirect_map;
+ struct bpf_devmap_val devmap_val = {};
+ struct net_configuration net_config;
+ struct nstoken *nstoken = NULL;
+ u16 protocol = ETH_P_IP;
+ int group_map;
+ int flags_map;
+ int cnt_map;
+ u64 cnt = 0;
+ int i, err;
- skeletons.xdp_dummy = xdp_dummy__open_and_load();
- if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load"))
+ xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load"))
return;
- skeletons.xdp_tx = xdp_tx__open_and_load();
- if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load"))
+ xdp_redirect_map = xdp_redirect_map__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load"))
+ goto destroy_xdp_redirect_multi_kern;
+
+ if (!ASSERT_OK(create_network(&net_config), "create network"))
+ goto destroy_xdp_redirect_map;
+
+ group_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_all);
+ if (!ASSERT_OK_FD(group_map, "open map_all"))
+ goto destroy_xdp_redirect_map;
+
+ flags_map = bpf_map__fd(xdp_redirect_multi_kern->maps.redirect_flags);
+ if (!ASSERT_OK_FD(group_map, "open map_all"))
+ goto destroy_xdp_redirect_map;
+
+ err = bpf_map_update_elem(flags_map, &protocol, &redirect_flags, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "init IP count"))
+ goto destroy_xdp_redirect_map;
+
+ cnt_map = bpf_map__fd(xdp_redirect_map->maps.rxcnt);
+ if (!ASSERT_OK_FD(cnt_map, "open rxcnt map"))
+ goto destroy_xdp_redirect_map;
+
+ bpf_objs[0] = xdp_redirect_multi_kern->obj;
+ bpf_objs[1] = xdp_redirect_map->obj;
+
+ nstoken = open_netns(net_config.ns0_name);
+ if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+ goto destroy_xdp_redirect_map;
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth);
+
+ if (attach_programs_to_veth_pair(bpf_objs, BROADCAST_REDIRECT_SKEL_NB,
+ &net_config, prog_cfg, i))
+ goto destroy_xdp_redirect_map;
+
+ SYS(destroy_xdp_redirect_map,
+ "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s",
+ net_config.veth_cfg[i].namespace, IP_NEIGH, net_config.veth_cfg[i].remote_veth);
+
+ devmap_val.ifindex = ifindex;
+ err = bpf_map_update_elem(group_map, &ifindex, &devmap_val, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto destroy_xdp_redirect_map;
+
+ }
+
+ SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ",
+ net_config.veth_cfg[0].namespace, IP_NEIGH);
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ err = bpf_map_lookup_elem(cnt_map, &i, &cnt);
+ if (!ASSERT_OK(err, "get IP cnt"))
+ goto destroy_xdp_redirect_map;
+
+ if (redirect_flags & BPF_F_EXCLUDE_INGRESS)
+ /* veth11 shouldn't receive the ICMP requests;
+ * others should
+ */
+ ASSERT_EQ(cnt, i ? 4 : 0, "compare IP cnt");
+ else
+ /* All remote veth should receive the ICMP requests */
+ ASSERT_EQ(cnt, 4, "compare IP cnt");
+ }
+
+destroy_xdp_redirect_map:
+ close_netns(nstoken);
+ xdp_redirect_map__destroy(xdp_redirect_map);
+destroy_xdp_redirect_multi_kern:
+ xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern);
+
+ cleanup_network(&net_config);
+}
+
+#define VETH_EGRESS_SKEL_NB 3
+static void xdp_veth_egress(u32 flags)
+{
+ struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = {
+ {
+ .local_name = "xdp_redirect_map_all_prog",
+ .remote_name = "xdp_dummy_prog",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_all_prog",
+ .remote_name = "store_mac_1",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_all_prog",
+ .remote_name = "store_mac_2",
+ .local_flags = flags,
+ .remote_flags = flags,
+ }
+ };
+ const char magic_mac[6] = { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF};
+ struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+ struct bpf_object *bpf_objs[VETH_EGRESS_SKEL_NB];
+ struct xdp_redirect_map *xdp_redirect_map;
+ struct bpf_devmap_val devmap_val = {};
+ struct net_configuration net_config;
+ int mac_map, egress_map, res_map;
+ struct nstoken *nstoken = NULL;
+ struct xdp_dummy *xdp_dummy;
+ int err;
+ int i;
+
+ xdp_dummy = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load"))
+ return;
+
+ xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load"))
goto destroy_xdp_dummy;
- skeletons.xdp_redirect_maps = xdp_redirect_map__open_and_load();
- if (!ASSERT_OK_PTR(skeletons.xdp_redirect_maps, "xdp_redirect_map__open_and_load"))
- goto destroy_xdp_tx;
+ xdp_redirect_map = xdp_redirect_map__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load"))
+ goto destroy_xdp_redirect_multi_kern;
- if (configure_network(&skeletons))
+ if (!ASSERT_OK(create_network(&net_config), "create network"))
goto destroy_xdp_redirect_map;
- ASSERT_OK(check_ping(&skeletons), "ping");
+ mac_map = bpf_map__fd(xdp_redirect_multi_kern->maps.mac_map);
+ if (!ASSERT_OK_FD(mac_map, "open mac_map"))
+ goto destroy_xdp_redirect_map;
+
+ egress_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_egress);
+ if (!ASSERT_OK_FD(egress_map, "open map_egress"))
+ goto destroy_xdp_redirect_map;
+
+ devmap_val.bpf_prog.fd = bpf_program__fd(xdp_redirect_multi_kern->progs.xdp_devmap_prog);
+
+ bpf_objs[0] = xdp_dummy->obj;
+ bpf_objs[1] = xdp_redirect_multi_kern->obj;
+ bpf_objs[2] = xdp_redirect_map->obj;
+
+ nstoken = open_netns(net_config.ns0_name);
+ if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+ goto destroy_xdp_redirect_map;
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth);
+
+ SYS(destroy_xdp_redirect_map,
+ "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s",
+ net_config.veth_cfg[i].namespace, IP_NEIGH, net_config.veth_cfg[i].remote_veth);
+
+ if (attach_programs_to_veth_pair(bpf_objs, VETH_REDIRECT_SKEL_NB,
+ &net_config, prog_cfg, i))
+ goto destroy_xdp_redirect_map;
+
+ err = bpf_map_update_elem(mac_map, &ifindex, magic_mac, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto destroy_xdp_redirect_map;
+
+ devmap_val.ifindex = ifindex;
+ err = bpf_map_update_elem(egress_map, &ifindex, &devmap_val, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto destroy_xdp_redirect_map;
+ }
+
+ SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ",
+ net_config.veth_cfg[0].namespace, IP_NEIGH);
+
+ res_map = bpf_map__fd(xdp_redirect_map->maps.rx_mac);
+ if (!ASSERT_OK_FD(res_map, "open rx_map"))
+ goto destroy_xdp_redirect_map;
+
+ for (i = 0; i < 2; i++) {
+ u32 key = i;
+ u64 res;
+
+ err = bpf_map_lookup_elem(res_map, &key, &res);
+ if (!ASSERT_OK(err, "get MAC res"))
+ goto destroy_xdp_redirect_map;
+
+ ASSERT_STRNEQ((const char *)&res, magic_mac, ETH_ALEN, "compare mac");
+ }
destroy_xdp_redirect_map:
- xdp_redirect_map__destroy(skeletons.xdp_redirect_maps);
-destroy_xdp_tx:
- xdp_tx__destroy(skeletons.xdp_tx);
+ close_netns(nstoken);
+ xdp_redirect_map__destroy(xdp_redirect_map);
+destroy_xdp_redirect_multi_kern:
+ xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern);
destroy_xdp_dummy:
- xdp_dummy__destroy(skeletons.xdp_dummy);
+ xdp_dummy__destroy(xdp_dummy);
+
+ cleanup_network(&net_config);
+}
+
+void test_xdp_veth_redirect(void)
+{
+ if (test__start_subtest("0"))
+ xdp_veth_redirect(0);
+
+ if (test__start_subtest("DRV_MODE"))
+ xdp_veth_redirect(XDP_FLAGS_DRV_MODE);
+
+ if (test__start_subtest("SKB_MODE"))
+ xdp_veth_redirect(XDP_FLAGS_SKB_MODE);
+}
+
+void test_xdp_veth_broadcast_redirect(void)
+{
+ if (test__start_subtest("0/BROADCAST"))
+ xdp_veth_broadcast_redirect(0, BPF_F_BROADCAST);
+
+ if (test__start_subtest("0/(BROADCAST | EXCLUDE_INGRESS)"))
+ xdp_veth_broadcast_redirect(0, BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+
+ if (test__start_subtest("DRV_MODE/BROADCAST"))
+ xdp_veth_broadcast_redirect(XDP_FLAGS_DRV_MODE, BPF_F_BROADCAST);
+
+ if (test__start_subtest("DRV_MODE/(BROADCAST | EXCLUDE_INGRESS)"))
+ xdp_veth_broadcast_redirect(XDP_FLAGS_DRV_MODE,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+
+ if (test__start_subtest("SKB_MODE/BROADCAST"))
+ xdp_veth_broadcast_redirect(XDP_FLAGS_SKB_MODE, BPF_F_BROADCAST);
+
+ if (test__start_subtest("SKB_MODE/(BROADCAST | EXCLUDE_INGRESS)"))
+ xdp_veth_broadcast_redirect(XDP_FLAGS_SKB_MODE,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+void test_xdp_veth_egress(void)
+{
+ if (test__start_subtest("0/egress"))
+ xdp_veth_egress(0);
+
+ if (test__start_subtest("DRV_MODE/egress"))
+ xdp_veth_egress(XDP_FLAGS_DRV_MODE);
- cleanup_network();
+ if (test__start_subtest("SKB_MODE/egress"))
+ xdp_veth_egress(XDP_FLAGS_SKB_MODE);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c
index c3ab9b6fb069..f9392df23f8a 100644
--- a/tools/testing/selftests/bpf/prog_tests/token.c
+++ b/tools/testing/selftests/bpf/prog_tests/token.c
@@ -19,6 +19,7 @@
#include "priv_prog.skel.h"
#include "dummy_st_ops_success.skel.h"
#include "token_lsm.skel.h"
+#include "priv_freplace_prog.skel.h"
static inline int sys_mount(const char *dev_name, const char *dir_name,
const char *type, unsigned long flags,
@@ -788,6 +789,84 @@ static int userns_obj_priv_prog(int mnt_fd, struct token_lsm *lsm_skel)
return 0;
}
+static int userns_obj_priv_freplace_setup(int mnt_fd, struct priv_freplace_prog **fr_skel,
+ struct priv_prog **skel, int *tgt_fd)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ int err;
+ char buf[256];
+
+ /* use bpf_token_path to provide BPF FS path */
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ *skel = priv_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(*skel, "priv_prog__open_opts"))
+ return -EINVAL;
+ err = priv_prog__load(*skel);
+ if (!ASSERT_OK(err, "priv_prog__load"))
+ return -EINVAL;
+
+ *fr_skel = priv_freplace_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(*skel, "priv_freplace_prog__open_opts"))
+ return -EINVAL;
+
+ *tgt_fd = bpf_program__fd((*skel)->progs.xdp_prog1);
+ return 0;
+}
+
+/* Verify that freplace works from user namespace, because bpf token is loaded
+ * in bpf_object__prepare
+ */
+static int userns_obj_priv_freplace_prog(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ struct priv_freplace_prog *fr_skel = NULL;
+ struct priv_prog *skel = NULL;
+ int err, tgt_fd;
+
+ err = userns_obj_priv_freplace_setup(mnt_fd, &fr_skel, &skel, &tgt_fd);
+ if (!ASSERT_OK(err, "setup"))
+ goto out;
+
+ err = bpf_object__prepare(fr_skel->obj);
+ if (!ASSERT_OK(err, "freplace__prepare"))
+ goto out;
+
+ err = bpf_program__set_attach_target(fr_skel->progs.new_xdp_prog2, tgt_fd, "xdp_prog1");
+ if (!ASSERT_OK(err, "set_attach_target"))
+ goto out;
+
+ err = priv_freplace_prog__load(fr_skel);
+ ASSERT_OK(err, "priv_freplace_prog__load");
+
+out:
+ priv_freplace_prog__destroy(fr_skel);
+ priv_prog__destroy(skel);
+ return err;
+}
+
+/* Verify that replace fails to set attach target from user namespace without bpf token */
+static int userns_obj_priv_freplace_prog_fail(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ struct priv_freplace_prog *fr_skel = NULL;
+ struct priv_prog *skel = NULL;
+ int err, tgt_fd;
+
+ err = userns_obj_priv_freplace_setup(mnt_fd, &fr_skel, &skel, &tgt_fd);
+ if (!ASSERT_OK(err, "setup"))
+ goto out;
+
+ err = bpf_program__set_attach_target(fr_skel->progs.new_xdp_prog2, tgt_fd, "xdp_prog1");
+ if (ASSERT_ERR(err, "attach fails"))
+ err = 0;
+ else
+ err = -EINVAL;
+
+out:
+ priv_freplace_prog__destroy(fr_skel);
+ priv_prog__destroy(skel);
+ return err;
+}
+
/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command,
* which should cause struct_ops application to fail, as BTF won't be uploaded
* into the kernel, even if STRUCT_OPS programs themselves are allowed
@@ -1004,12 +1083,28 @@ void test_token(void)
if (test__start_subtest("obj_priv_prog")) {
struct bpffs_opts opts = {
.cmds = bit(BPF_PROG_LOAD),
- .progs = bit(BPF_PROG_TYPE_KPROBE),
+ .progs = bit(BPF_PROG_TYPE_XDP),
.attachs = ~0ULL,
};
subtest_userns(&opts, userns_obj_priv_prog);
}
+ if (test__start_subtest("obj_priv_freplace_prog")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_PROG_LOAD) | bit(BPF_BTF_GET_FD_BY_ID),
+ .progs = bit(BPF_PROG_TYPE_EXT) | bit(BPF_PROG_TYPE_XDP),
+ .attachs = ~0ULL,
+ };
+ subtest_userns(&opts, userns_obj_priv_freplace_prog);
+ }
+ if (test__start_subtest("obj_priv_freplace_prog_fail")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_PROG_LOAD) | bit(BPF_BTF_GET_FD_BY_ID),
+ .progs = bit(BPF_PROG_TYPE_EXT) | bit(BPF_PROG_TYPE_XDP),
+ .attachs = ~0ULL,
+ };
+ subtest_userns(&opts, userns_obj_priv_freplace_prog_fail);
+ }
if (test__start_subtest("obj_priv_btf_fail")) {
struct bpffs_opts opts = {
/* disallow BTF loading */
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
index 56ed1eb9b527..495d66414b57 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -45,7 +45,7 @@ static void subtest_basic_usdt(void)
LIBBPF_OPTS(bpf_usdt_opts, opts);
struct test_usdt *skel;
struct test_usdt__bss *bss;
- int err;
+ int err, i;
skel = test_usdt__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open"))
@@ -75,6 +75,7 @@ static void subtest_basic_usdt(void)
ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie");
ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt");
ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret");
+ ASSERT_EQ(bss->usdt0_arg_size, -ENOENT, "usdt0_arg_size");
/* auto-attached usdt3 gets default zero cookie value */
ASSERT_EQ(bss->usdt3_cookie, 0, "usdt3_cookie");
@@ -86,6 +87,9 @@ static void subtest_basic_usdt(void)
ASSERT_EQ(bss->usdt3_args[0], 1, "usdt3_arg1");
ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+ ASSERT_EQ(bss->usdt3_arg_sizes[0], 4, "usdt3_arg1_size");
+ ASSERT_EQ(bss->usdt3_arg_sizes[1], 8, "usdt3_arg2_size");
+ ASSERT_EQ(bss->usdt3_arg_sizes[2], 8, "usdt3_arg3_size");
/* auto-attached usdt12 gets default zero cookie value */
ASSERT_EQ(bss->usdt12_cookie, 0, "usdt12_cookie");
@@ -104,6 +108,11 @@ static void subtest_basic_usdt(void)
ASSERT_EQ(bss->usdt12_args[10], nums[idx], "usdt12_arg11");
ASSERT_EQ(bss->usdt12_args[11], t1.y, "usdt12_arg12");
+ int usdt12_expected_arg_sizes[12] = { 4, 4, 8, 8, 4, 8, 8, 8, 4, 2, 2, 1 };
+
+ for (i = 0; i < 12; i++)
+ ASSERT_EQ(bss->usdt12_arg_sizes[i], usdt12_expected_arg_sizes[i], "usdt12_arg_size");
+
/* trigger_func() is marked __always_inline, so USDT invocations will be
* inlined in two different places, meaning that each USDT will have
* at least 2 different places to be attached to. This verifies that
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 8a0e1ff8a2dc..e66a57970d28 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -45,6 +45,7 @@
#include "verifier_ldsx.skel.h"
#include "verifier_leak_ptr.skel.h"
#include "verifier_linked_scalars.skel.h"
+#include "verifier_load_acquire.skel.h"
#include "verifier_loops1.skel.h"
#include "verifier_lwt.skel.h"
#include "verifier_map_in_map.skel.h"
@@ -80,6 +81,7 @@
#include "verifier_spill_fill.skel.h"
#include "verifier_spin_lock.skel.h"
#include "verifier_stack_ptr.skel.h"
+#include "verifier_store_release.skel.h"
#include "verifier_subprog_precision.skel.h"
#include "verifier_subreg.skel.h"
#include "verifier_tailcall_jit.skel.h"
@@ -121,7 +123,7 @@ static void run_tests_aux(const char *skel_name,
/* test_verifier tests are executed w/o CAP_SYS_ADMIN, do the same here */
err = cap_disable_effective(1ULL << CAP_SYS_ADMIN, &old_caps);
if (err) {
- PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(err));
+ PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(-err));
return;
}
@@ -131,7 +133,7 @@ static void run_tests_aux(const char *skel_name,
err = cap_enable_effective(old_caps, NULL);
if (err)
- PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(err));
+ PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(-err));
}
#define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes, NULL)
@@ -173,6 +175,7 @@ void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); }
void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); }
void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); }
void test_verifier_jit_convergence(void) { RUN(verifier_jit_convergence); }
+void test_verifier_load_acquire(void) { RUN(verifier_load_acquire); }
void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); }
void test_verifier_ldsx(void) { RUN(verifier_ldsx); }
void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); }
@@ -211,6 +214,7 @@ void test_verifier_sockmap_mutate(void) { RUN(verifier_sockmap_mutate); }
void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); }
void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); }
void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); }
+void test_verifier_store_release(void) { RUN(verifier_store_release); }
void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); }
void test_verifier_subreg(void) { RUN(verifier_subreg); }
void test_verifier_tailcall_jit(void) { RUN(verifier_tailcall_jit); }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
index 937da9b7532a..b9d9f0a502ce 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -4,12 +4,20 @@
#include "test_xdp_context_test_run.skel.h"
#include "test_xdp_meta.skel.h"
-#define TX_ADDR "10.0.0.1"
-#define RX_ADDR "10.0.0.2"
#define RX_NAME "veth0"
#define TX_NAME "veth1"
#define TX_NETNS "xdp_context_tx"
#define RX_NETNS "xdp_context_rx"
+#define TAP_NAME "tap0"
+#define TAP_NETNS "xdp_context_tuntap"
+
+#define TEST_PAYLOAD_LEN 32
+static const __u8 test_payload[TEST_PAYLOAD_LEN] = {
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+ 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+ 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+};
void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts,
__u32 data_meta, __u32 data, __u32 data_end,
@@ -112,7 +120,59 @@ void test_xdp_context_test_run(void)
test_xdp_context_test_run__destroy(skel);
}
-void test_xdp_context_functional(void)
+static int send_test_packet(int ifindex)
+{
+ int n, sock = -1;
+ __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
+
+ /* The ethernet header is not relevant for this test and doesn't need to
+ * be meaningful.
+ */
+ struct ethhdr eth = { 0 };
+
+ memcpy(packet, &eth, sizeof(eth));
+ memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN);
+
+ sock = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
+ if (!ASSERT_GE(sock, 0, "socket"))
+ goto err;
+
+ struct sockaddr_ll saddr = {
+ .sll_family = PF_PACKET,
+ .sll_ifindex = ifindex,
+ .sll_halen = ETH_ALEN
+ };
+ n = sendto(sock, packet, sizeof(packet), 0, (struct sockaddr *)&saddr,
+ sizeof(saddr));
+ if (!ASSERT_EQ(n, sizeof(packet), "sendto"))
+ goto err;
+
+ close(sock);
+ return 0;
+
+err:
+ if (sock >= 0)
+ close(sock);
+ return -1;
+}
+
+static void assert_test_result(struct test_xdp_meta *skel)
+{
+ int err;
+ __u32 map_key = 0;
+ __u8 map_value[TEST_PAYLOAD_LEN];
+
+ err = bpf_map__lookup_elem(skel->maps.test_result, &map_key,
+ sizeof(map_key), &map_value,
+ TEST_PAYLOAD_LEN, BPF_ANY);
+ if (!ASSERT_OK(err, "lookup test_result"))
+ return;
+
+ ASSERT_MEMEQ(&map_value, &test_payload, TEST_PAYLOAD_LEN,
+ "test_result map contains test payload");
+}
+
+void test_xdp_context_veth(void)
{
LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
@@ -120,7 +180,7 @@ void test_xdp_context_functional(void)
struct bpf_program *tc_prog, *xdp_prog;
struct test_xdp_meta *skel = NULL;
struct nstoken *nstoken = NULL;
- int rx_ifindex;
+ int rx_ifindex, tx_ifindex;
int ret;
tx_ns = netns_new(TX_NETNS, false);
@@ -138,7 +198,6 @@ void test_xdp_context_functional(void)
if (!ASSERT_OK_PTR(nstoken, "setns rx_ns"))
goto close;
- SYS(close, "ip addr add " RX_ADDR "/24 dev " RX_NAME);
SYS(close, "ip link set dev " RX_NAME " up");
skel = test_xdp_meta__open_and_load();
@@ -179,9 +238,17 @@ void test_xdp_context_functional(void)
if (!ASSERT_OK_PTR(nstoken, "setns tx_ns"))
goto close;
- SYS(close, "ip addr add " TX_ADDR "/24 dev " TX_NAME);
SYS(close, "ip link set dev " TX_NAME " up");
- ASSERT_OK(SYS_NOFAIL("ping -c 1 " RX_ADDR), "ping");
+
+ tx_ifindex = if_nametoindex(TX_NAME);
+ if (!ASSERT_GE(tx_ifindex, 0, "if_nametoindex tx"))
+ goto close;
+
+ ret = send_test_packet(tx_ifindex);
+ if (!ASSERT_OK(ret, "send_test_packet"))
+ goto close;
+
+ assert_test_result(skel);
close:
close_netns(nstoken);
@@ -190,3 +257,67 @@ close:
netns_free(tx_ns);
}
+void test_xdp_context_tuntap(void)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ struct netns_obj *ns = NULL;
+ struct test_xdp_meta *skel = NULL;
+ __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
+ int tap_fd = -1;
+ int tap_ifindex;
+ int ret;
+
+ ns = netns_new(TAP_NETNS, true);
+ if (!ASSERT_OK_PTR(ns, "create and open ns"))
+ return;
+
+ tap_fd = open_tuntap(TAP_NAME, true);
+ if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
+ goto close;
+
+ SYS(close, "ip link set dev " TAP_NAME " up");
+
+ skel = test_xdp_meta__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+ goto close;
+
+ tap_ifindex = if_nametoindex(TAP_NAME);
+ if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex"))
+ goto close;
+
+ tc_hook.ifindex = tap_ifindex;
+ ret = bpf_tc_hook_create(&tc_hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ goto close;
+
+ tc_opts.prog_fd = bpf_program__fd(skel->progs.ing_cls);
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto close;
+
+ ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(skel->progs.ing_xdp),
+ 0, NULL);
+ if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+ goto close;
+
+ /* The ethernet header is not relevant for this test and doesn't need to
+ * be meaningful.
+ */
+ struct ethhdr eth = { 0 };
+
+ memcpy(packet, &eth, sizeof(eth));
+ memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN);
+
+ ret = write(tap_fd, packet, sizeof(packet));
+ if (!ASSERT_EQ(ret, sizeof(packet), "write packet"))
+ goto close;
+
+ assert_test_result(skel);
+
+close:
+ if (tap_fd >= 0)
+ close(tap_fd);
+ test_xdp_meta__destroy(skel);
+ netns_free(ns);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index c7f74f068e78..df27535995af 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -52,10 +52,10 @@ static void test_xdp_with_cpumap_helpers(void)
ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
/* send a packet to trigger any potential bugs in there */
- char data[10] = {};
+ char data[ETH_HLEN] = {};
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
- .data_size_in = 10,
+ .data_size_in = sizeof(data),
.flags = BPF_F_TEST_XDP_LIVE_FRAMES,
.repeat = 1,
);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index 27ffed17d4be..461ab18705d5 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -23,7 +23,7 @@ static void test_xdp_with_devmap_helpers(void)
__u32 len = sizeof(info);
int err, dm_fd, dm_fd_redir, map_fd;
struct nstoken *nstoken = NULL;
- char data[10] = {};
+ char data[ETH_HLEN] = {};
__u32 idx = 0;
SYS(out_close, "ip netns add %s", TEST_NS);
@@ -58,7 +58,7 @@ static void test_xdp_with_devmap_helpers(void)
/* send a packet to trigger any potential bugs in there */
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
- .data_size_in = 10,
+ .data_size_in = sizeof(data),
.flags = BPF_F_TEST_XDP_LIVE_FRAMES,
.repeat = 1,
);
@@ -158,7 +158,7 @@ static void test_xdp_with_devmap_helpers_veth(void)
struct nstoken *nstoken = NULL;
__u32 len = sizeof(info);
int err, dm_fd, dm_fd_redir, map_fd, ifindex_dst;
- char data[10] = {};
+ char data[ETH_HLEN] = {};
__u32 idx = 0;
SYS(out_close, "ip netns add %s", TEST_NS);
@@ -208,7 +208,7 @@ static void test_xdp_with_devmap_helpers_veth(void)
/* send a packet to trigger any potential bugs in there */
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
- .data_size_in = 10,
+ .data_size_in = sizeof(data),
.flags = BPF_F_TEST_XDP_LIVE_FRAMES,
.repeat = 1,
);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c b/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c
new file mode 100644
index 000000000000..18dd25344de7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Network topology:
+ * ----------- -----------
+ * | NS1 | | NS2 |
+ * | veth0 -|--------|- veth0 |
+ * ----------- -----------
+ *
+ */
+
+#define _GNU_SOURCE
+#include <net/if.h>
+#include <uapi/linux/if_link.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+#include "test_xdp_vlan.skel.h"
+
+
+#define VETH_NAME "veth0"
+#define NS_MAX_SIZE 32
+#define NS1_NAME "ns-xdp-vlan-1-"
+#define NS2_NAME "ns-xdp-vlan-2-"
+#define NS1_IP_ADDR "100.64.10.1"
+#define NS2_IP_ADDR "100.64.10.2"
+#define VLAN_ID 4011
+
+static int setup_network(char *ns1, char *ns2)
+{
+ if (!ASSERT_OK(append_tid(ns1, NS_MAX_SIZE), "create ns1 name"))
+ goto fail;
+ if (!ASSERT_OK(append_tid(ns2, NS_MAX_SIZE), "create ns2 name"))
+ goto fail;
+
+ SYS(fail, "ip netns add %s", ns1);
+ SYS(fail, "ip netns add %s", ns2);
+ SYS(fail, "ip -n %s link add %s type veth peer name %s netns %s",
+ ns1, VETH_NAME, VETH_NAME, ns2);
+
+ /* NOTICE: XDP require VLAN header inside packet payload
+ * - Thus, disable VLAN offloading driver features
+ */
+ SYS(fail, "ip netns exec %s ethtool -K %s rxvlan off txvlan off", ns1, VETH_NAME);
+ SYS(fail, "ip netns exec %s ethtool -K %s rxvlan off txvlan off", ns2, VETH_NAME);
+
+ /* NS1 configuration */
+ SYS(fail, "ip -n %s addr add %s/24 dev %s", ns1, NS1_IP_ADDR, VETH_NAME);
+ SYS(fail, "ip -n %s link set %s up", ns1, VETH_NAME);
+
+ /* NS2 configuration */
+ SYS(fail, "ip -n %s link add link %s name %s.%d type vlan id %d",
+ ns2, VETH_NAME, VETH_NAME, VLAN_ID, VLAN_ID);
+ SYS(fail, "ip -n %s addr add %s/24 dev %s.%d", ns2, NS2_IP_ADDR, VETH_NAME, VLAN_ID);
+ SYS(fail, "ip -n %s link set %s up", ns2, VETH_NAME);
+ SYS(fail, "ip -n %s link set %s.%d up", ns2, VETH_NAME, VLAN_ID);
+
+ /* At this point ping should fail because VLAN tags are only used by NS2 */
+ return !SYS_NOFAIL("ip netns exec %s ping -W 1 -c1 %s", ns2, NS1_IP_ADDR);
+
+fail:
+ return -1;
+}
+
+static void cleanup_network(const char *ns1, const char *ns2)
+{
+ SYS_NOFAIL("ip netns del %s", ns1);
+ SYS_NOFAIL("ip netns del %s", ns2);
+}
+
+static void xdp_vlan(struct bpf_program *xdp, struct bpf_program *tc, u32 flags)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_EGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ char ns1[NS_MAX_SIZE] = NS1_NAME;
+ char ns2[NS_MAX_SIZE] = NS2_NAME;
+ struct nstoken *nstoken = NULL;
+ int interface;
+ int ret;
+
+ if (!ASSERT_OK(setup_network(ns1, ns2), "setup network"))
+ goto cleanup;
+
+ nstoken = open_netns(ns1);
+ if (!ASSERT_OK_PTR(nstoken, "open NS1"))
+ goto cleanup;
+
+ interface = if_nametoindex(VETH_NAME);
+ if (!ASSERT_NEQ(interface, 0, "get interface index"))
+ goto cleanup;
+
+ ret = bpf_xdp_attach(interface, bpf_program__fd(xdp), flags, NULL);
+ if (!ASSERT_OK(ret, "attach xdp_vlan_change"))
+ goto cleanup;
+
+ tc_hook.ifindex = interface;
+ ret = bpf_tc_hook_create(&tc_hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ goto detach_xdp;
+
+ /* Now we'll use BPF programs to pop/push the VLAN tags */
+ tc_opts.prog_fd = bpf_program__fd(tc);
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto detach_xdp;
+
+ close_netns(nstoken);
+ nstoken = NULL;
+
+ /* Now the namespaces can reach each-other, test with pings */
+ SYS(detach_tc, "ip netns exec %s ping -i 0.2 -W 2 -c 2 %s > /dev/null", ns1, NS2_IP_ADDR);
+ SYS(detach_tc, "ip netns exec %s ping -i 0.2 -W 2 -c 2 %s > /dev/null", ns2, NS1_IP_ADDR);
+
+
+detach_tc:
+ bpf_tc_detach(&tc_hook, &tc_opts);
+detach_xdp:
+ bpf_xdp_detach(interface, flags, NULL);
+cleanup:
+ close_netns(nstoken);
+ cleanup_network(ns1, ns2);
+}
+
+/* First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
+ * egress use TC to add back VLAN tag 4011
+ */
+void test_xdp_vlan_change(void)
+{
+ struct test_xdp_vlan *skel;
+
+ skel = test_xdp_vlan__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "xdp_vlan__open_and_load"))
+ return;
+
+ if (test__start_subtest("0"))
+ xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push, 0);
+
+ if (test__start_subtest("DRV_MODE"))
+ xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push,
+ XDP_FLAGS_DRV_MODE);
+
+ if (test__start_subtest("SKB_MODE"))
+ xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push,
+ XDP_FLAGS_SKB_MODE);
+
+ test_xdp_vlan__destroy(skel);
+}
+
+/* Second test: XDP prog fully remove vlan header
+ *
+ * Catch kernel bug for generic-XDP, that doesn't allow us to
+ * remove a VLAN header, because skb->protocol still contain VLAN
+ * ETH_P_8021Q indication, and this cause overwriting of our changes.
+ */
+void test_xdp_vlan_remove(void)
+{
+ struct test_xdp_vlan *skel;
+
+ skel = test_xdp_vlan__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "xdp_vlan__open_and_load"))
+ return;
+
+ if (test__start_subtest("0"))
+ xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push, 0);
+
+ if (test__start_subtest("DRV_MODE"))
+ xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push,
+ XDP_FLAGS_DRV_MODE);
+
+ if (test__start_subtest("SKB_MODE"))
+ xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push,
+ XDP_FLAGS_SKB_MODE);
+
+ test_xdp_vlan__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c
index 40dd57fca5cc..a52feff98112 100644
--- a/tools/testing/selftests/bpf/progs/arena_atomics.c
+++ b/tools/testing/selftests/bpf/progs/arena_atomics.c
@@ -6,6 +6,8 @@
#include <stdbool.h>
#include <stdatomic.h>
#include "bpf_arena_common.h"
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
struct {
__uint(type, BPF_MAP_TYPE_ARENA);
@@ -19,9 +21,17 @@ struct {
} arena SEC(".maps");
#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
-bool skip_tests __attribute((__section__(".data"))) = false;
+bool skip_all_tests __attribute((__section__(".data"))) = false;
#else
-bool skip_tests = true;
+bool skip_all_tests = true;
+#endif
+
+#if defined(ENABLE_ATOMICS_TESTS) && \
+ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+bool skip_lacq_srel_tests __attribute((__section__(".data"))) = false;
+#else
+bool skip_lacq_srel_tests = true;
#endif
__u32 pid = 0;
@@ -274,4 +284,111 @@ int uaf(const void *ctx)
return 0;
}
+#if __clang_major__ >= 18
+__u8 __arena_global load_acquire8_value = 0x12;
+__u16 __arena_global load_acquire16_value = 0x1234;
+__u32 __arena_global load_acquire32_value = 0x12345678;
+__u64 __arena_global load_acquire64_value = 0x1234567890abcdef;
+
+__u8 __arena_global load_acquire8_result = 0;
+__u16 __arena_global load_acquire16_result = 0;
+__u32 __arena_global load_acquire32_result = 0;
+__u64 __arena_global load_acquire64_result = 0;
+#else
+/* clang-17 crashes if the .addr_space.1 ELF section has holes. Work around
+ * this issue by defining the below variables as 64-bit.
+ */
+__u64 __arena_global load_acquire8_value;
+__u64 __arena_global load_acquire16_value;
+__u64 __arena_global load_acquire32_value;
+__u64 __arena_global load_acquire64_value;
+
+__u64 __arena_global load_acquire8_result;
+__u64 __arena_global load_acquire16_result;
+__u64 __arena_global load_acquire32_result;
+__u64 __arena_global load_acquire64_result;
+#endif
+
+SEC("raw_tp/sys_enter")
+int load_acquire(const void *ctx)
+{
+#if defined(ENABLE_ATOMICS_TESTS) && \
+ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+
+#define LOAD_ACQUIRE_ARENA(SIZEOP, SIZE, SRC, DST) \
+ { asm volatile ( \
+ "r1 = %[" #SRC "] ll;" \
+ "r1 = addr_space_cast(r1, 0x0, 0x1);" \
+ ".8byte %[load_acquire_insn];" \
+ "r3 = %[" #DST "] ll;" \
+ "r3 = addr_space_cast(r3, 0x0, 0x1);" \
+ "*(" #SIZE " *)(r3 + 0) = r2;" \
+ : \
+ : __imm_addr(SRC), \
+ __imm_insn(load_acquire_insn, \
+ BPF_ATOMIC_OP(BPF_##SIZEOP, BPF_LOAD_ACQ, \
+ BPF_REG_2, BPF_REG_1, 0)), \
+ __imm_addr(DST) \
+ : __clobber_all); } \
+
+ LOAD_ACQUIRE_ARENA(B, u8, load_acquire8_value, load_acquire8_result)
+ LOAD_ACQUIRE_ARENA(H, u16, load_acquire16_value,
+ load_acquire16_result)
+ LOAD_ACQUIRE_ARENA(W, u32, load_acquire32_value,
+ load_acquire32_result)
+ LOAD_ACQUIRE_ARENA(DW, u64, load_acquire64_value,
+ load_acquire64_result)
+#undef LOAD_ACQUIRE_ARENA
+
+#endif
+ return 0;
+}
+
+#if __clang_major__ >= 18
+__u8 __arena_global store_release8_result = 0;
+__u16 __arena_global store_release16_result = 0;
+__u32 __arena_global store_release32_result = 0;
+__u64 __arena_global store_release64_result = 0;
+#else
+/* clang-17 crashes if the .addr_space.1 ELF section has holes. Work around
+ * this issue by defining the below variables as 64-bit.
+ */
+__u64 __arena_global store_release8_result;
+__u64 __arena_global store_release16_result;
+__u64 __arena_global store_release32_result;
+__u64 __arena_global store_release64_result;
+#endif
+
+SEC("raw_tp/sys_enter")
+int store_release(const void *ctx)
+{
+#if defined(ENABLE_ATOMICS_TESTS) && \
+ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+
+#define STORE_RELEASE_ARENA(SIZEOP, DST, VAL) \
+ { asm volatile ( \
+ "r1 = " VAL ";" \
+ "r2 = %[" #DST "] ll;" \
+ "r2 = addr_space_cast(r2, 0x0, 0x1);" \
+ ".8byte %[store_release_insn];" \
+ : \
+ : __imm_addr(DST), \
+ __imm_insn(store_release_insn, \
+ BPF_ATOMIC_OP(BPF_##SIZEOP, BPF_STORE_REL, \
+ BPF_REG_2, BPF_REG_1, 0)) \
+ : __clobber_all); } \
+
+ STORE_RELEASE_ARENA(B, store_release8_result, "0x12")
+ STORE_RELEASE_ARENA(H, store_release16_result, "0x1234")
+ STORE_RELEASE_ARENA(W, store_release32_result, "0x12345678")
+ STORE_RELEASE_ARENA(DW, store_release64_result,
+ "0x1234567890abcdef ll")
+#undef STORE_RELEASE_ARENA
+
+#endif
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/arena_spin_lock.c b/tools/testing/selftests/bpf/progs/arena_spin_lock.c
new file mode 100644
index 000000000000..c4500c37f85e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_spin_lock.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_arena_spin_lock.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 100); /* number of pages */
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, 0x1ull << 32); /* start of mmap() region */
+#else
+ __ulong(map_extra, 0x1ull << 44); /* start of mmap() region */
+#endif
+} arena SEC(".maps");
+
+int cs_count;
+
+#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+arena_spinlock_t __arena lock;
+int test_skip = 1;
+#else
+int test_skip = 2;
+#endif
+
+int counter;
+int limit;
+
+SEC("tc")
+int prog(void *ctx)
+{
+ int ret = -2;
+
+#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ unsigned long flags;
+
+ if ((ret = arena_spin_lock_irqsave(&lock, flags)))
+ return ret;
+ if (counter != limit)
+ counter++;
+ bpf_repeat(cs_count);
+ ret = 0;
+ arena_spin_unlock_irqrestore(&lock, flags);
+#endif
+ return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
index bc10c4e4b4fa..966ee5a7b066 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
@@ -9,6 +9,13 @@ char _license[] SEC("license") = "GPL";
uint32_t tid = 0;
int num_unknown_tid = 0;
int num_known_tid = 0;
+void *user_ptr = 0;
+void *user_ptr_long = 0;
+uint32_t pid = 0;
+
+static char big_str1[5000];
+static char big_str2[5005];
+static char big_str3[4996];
SEC("iter/task")
int dump_task(struct bpf_iter__task *ctx)
@@ -35,7 +42,9 @@ int dump_task(struct bpf_iter__task *ctx)
}
int num_expected_failure_copy_from_user_task = 0;
+int num_expected_failure_copy_from_user_task_str = 0;
int num_success_copy_from_user_task = 0;
+int num_success_copy_from_user_task_str = 0;
SEC("iter.s/task")
int dump_task_sleepable(struct bpf_iter__task *ctx)
@@ -44,6 +53,9 @@ int dump_task_sleepable(struct bpf_iter__task *ctx)
struct task_struct *task = ctx->task;
static const char info[] = " === END ===";
struct pt_regs *regs;
+ char task_str1[10] = "aaaaaaaaaa";
+ char task_str2[10], task_str3[10];
+ char task_str4[20] = "aaaaaaaaaaaaaaaaaaaa";
void *ptr;
uint32_t user_data = 0;
int ret;
@@ -78,8 +90,106 @@ int dump_task_sleepable(struct bpf_iter__task *ctx)
BPF_SEQ_PRINTF(seq, "%s\n", info);
return 0;
}
+
++num_success_copy_from_user_task;
+ /* Read an invalid pointer and ensure we get an error */
+ ptr = NULL;
+ ret = bpf_copy_from_user_task_str((char *)task_str1, sizeof(task_str1), ptr, task, 0);
+ if (ret >= 0 || task_str1[9] != 'a' || task_str1[0] != '\0') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Read an invalid pointer and ensure we get error with pad zeros flag */
+ ptr = NULL;
+ ret = bpf_copy_from_user_task_str((char *)task_str1, sizeof(task_str1),
+ ptr, task, BPF_F_PAD_ZEROS);
+ if (ret >= 0 || task_str1[9] != '\0' || task_str1[0] != '\0') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ ++num_expected_failure_copy_from_user_task_str;
+
+ /* Same length as the string */
+ ret = bpf_copy_from_user_task_str((char *)task_str2, 10, user_ptr, task, 0);
+ /* only need to do the task pid check once */
+ if (bpf_strncmp(task_str2, 10, "test_data\0") != 0 || ret != 10 || task->tgid != pid) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Shorter length than the string */
+ ret = bpf_copy_from_user_task_str((char *)task_str3, 2, user_ptr, task, 0);
+ if (bpf_strncmp(task_str3, 2, "t\0") != 0 || ret != 2) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Longer length than the string */
+ ret = bpf_copy_from_user_task_str((char *)task_str4, 20, user_ptr, task, 0);
+ if (bpf_strncmp(task_str4, 10, "test_data\0") != 0 || ret != 10
+ || task_str4[sizeof(task_str4) - 1] != 'a') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Longer length than the string with pad zeros flag */
+ ret = bpf_copy_from_user_task_str((char *)task_str4, 20, user_ptr, task, BPF_F_PAD_ZEROS);
+ if (bpf_strncmp(task_str4, 10, "test_data\0") != 0 || ret != 10
+ || task_str4[sizeof(task_str4) - 1] != '\0') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Longer length than the string past a page boundary */
+ ret = bpf_copy_from_user_task_str(big_str1, 5000, user_ptr, task, 0);
+ if (bpf_strncmp(big_str1, 10, "test_data\0") != 0 || ret != 10) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* String that crosses a page boundary */
+ ret = bpf_copy_from_user_task_str(big_str1, 5000, user_ptr_long, task, BPF_F_PAD_ZEROS);
+ if (bpf_strncmp(big_str1, 4, "baba") != 0 || ret != 5000
+ || bpf_strncmp(big_str1 + 4996, 4, "bab\0") != 0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ for (int i = 0; i < 4999; ++i) {
+ if (i % 2 == 0) {
+ if (big_str1[i] != 'b') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+ } else {
+ if (big_str1[i] != 'a') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+ }
+ }
+
+ /* Longer length than the string that crosses a page boundary */
+ ret = bpf_copy_from_user_task_str(big_str2, 5005, user_ptr_long, task, BPF_F_PAD_ZEROS);
+ if (bpf_strncmp(big_str2, 4, "baba") != 0 || ret != 5000
+ || bpf_strncmp(big_str2 + 4996, 5, "bab\0\0") != 0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Shorter length than the string that crosses a page boundary */
+ ret = bpf_copy_from_user_task_str(big_str3, 4996, user_ptr_long, task, 0);
+ if (bpf_strncmp(big_str3, 4, "baba") != 0 || ret != 4996
+ || bpf_strncmp(big_str3 + 4992, 4, "bab\0") != 0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ ++num_success_copy_from_user_task_str;
+
if (ctx->meta->seq_num == 0)
BPF_SEQ_PRINTF(seq, " tgid gid data\n");
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
index d22449c69363..164640db3a29 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -99,10 +99,10 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
- timer_expires = icsk->icsk_timeout;
+ timer_expires = icsk->icsk_retransmit_timer.expires;
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
- timer_expires = icsk->icsk_timeout;
+ timer_expires = icsk->icsk_retransmit_timer.expires;
} else if (timer_pending(&sp->sk_timer)) {
timer_active = 2;
timer_expires = sp->sk_timer.expires;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
index 8b072666f9d9..591c703f5032 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -99,10 +99,10 @@ static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp,
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
- timer_expires = icsk->icsk_timeout;
+ timer_expires = icsk->icsk_retransmit_timer.expires;
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
- timer_expires = icsk->icsk_timeout;
+ timer_expires = icsk->icsk_retransmit_timer.expires;
} else if (timer_pending(&sp->sk_timer)) {
timer_active = 2;
timer_expires = sp->sk_timer.expires;
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index f45f4352feeb..13a2e22f5465 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -135,6 +135,8 @@
#define __arch_arm64 __arch("ARM64")
#define __arch_riscv64 __arch("RISCV64")
#define __caps_unpriv(caps) __attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps))))
+#define __load_if_JITed() __attribute__((btf_decl_tag("comment:load_mode=jited")))
+#define __load_if_no_JITed() __attribute__((btf_decl_tag("comment:load_mode=no_jited")))
/* Define common capabilities tested using __caps_unpriv */
#define CAP_NET_ADMIN 12
@@ -172,6 +174,9 @@
#elif defined(__TARGET_ARCH_riscv)
#define SYSCALL_WRAPPER 1
#define SYS_PREFIX "__riscv_"
+#elif defined(__TARGET_ARCH_powerpc)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX ""
#else
#define SYSCALL_WRAPPER 0
#define SYS_PREFIX "__se_"
@@ -208,4 +213,21 @@
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+#define CAN_USE_GOTOL
+#endif
+
+#if _clang_major__ >= 18
+#define CAN_USE_BPF_ST
+#endif
+
+#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+#define CAN_USE_LOAD_ACQ_STORE_REL
+#endif
+
#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 59843b430f76..659694162739 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -15,7 +15,11 @@
#define SO_KEEPALIVE 9
#define SO_PRIORITY 12
#define SO_REUSEPORT 15
+#if defined(__TARGET_ARCH_powerpc)
+#define SO_RCVLOWAT 16
+#else
#define SO_RCVLOWAT 18
+#endif
#define SO_BINDTODEVICE 25
#define SO_MARK 36
#define SO_MAX_PACING_RATE 47
@@ -49,6 +53,7 @@
#define TCP_SAVED_SYN 28
#define TCP_CA_NAME_MAX 16
#define TCP_NAGLE_OFF 1
+#define TCP_RTO_MAX_MS 44
#define TCP_ECN_OK 1
#define TCP_ECN_QUEUE_CWR 2
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c
new file mode 100644
index 000000000000..21a560427b10
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_bad_signed_arr_elem_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_preorder.c b/tools/testing/selftests/bpf/progs/cgroup_preorder.c
new file mode 100644
index 000000000000..4ef6202baa0a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_preorder.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+unsigned int idx;
+__u8 result[4];
+
+SEC("cgroup/getsockopt")
+int child(struct bpf_sockopt *ctx)
+{
+ if (idx < 4)
+ result[idx++] = 1;
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int child_2(struct bpf_sockopt *ctx)
+{
+ if (idx < 4)
+ result[idx++] = 2;
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int parent(struct bpf_sockopt *ctx)
+{
+ if (idx < 4)
+ result[idx++] = 3;
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int parent_2(struct bpf_sockopt *ctx)
+{
+ if (idx < 4)
+ result[idx++] = 4;
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data.c b/tools/testing/selftests/bpf/progs/changes_pkt_data.c
deleted file mode 100644
index 43cada48b28a..000000000000
--- a/tools/testing/selftests/bpf/progs/changes_pkt_data.c
+++ /dev/null
@@ -1,39 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-__noinline
-long changes_pkt_data(struct __sk_buff *sk)
-{
- return bpf_skb_pull_data(sk, 0);
-}
-
-__noinline __weak
-long does_not_change_pkt_data(struct __sk_buff *sk)
-{
- return 0;
-}
-
-SEC("?tc")
-int main_with_subprogs(struct __sk_buff *sk)
-{
- changes_pkt_data(sk);
- does_not_change_pkt_data(sk);
- return 0;
-}
-
-SEC("?tc")
-int main_changes(struct __sk_buff *sk)
-{
- bpf_skb_pull_data(sk, 0);
- return 0;
-}
-
-SEC("?tc")
-int main_does_not_change(struct __sk_buff *sk)
-{
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/compute_live_registers.c b/tools/testing/selftests/bpf/progs/compute_live_registers.c
new file mode 100644
index 000000000000..f3d79aecbf93
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/compute_live_registers.c
@@ -0,0 +1,424 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_arena_common.h"
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} test_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 1);
+} arena SEC(".maps");
+
+SEC("socket")
+__log_level(2)
+__msg(" 0: .......... (b7) r0 = 42")
+__msg(" 1: 0......... (bf) r1 = r0")
+__msg(" 2: .1........ (bf) r2 = r1")
+__msg(" 3: ..2....... (bf) r3 = r2")
+__msg(" 4: ...3...... (bf) r4 = r3")
+__msg(" 5: ....4..... (bf) r5 = r4")
+__msg(" 6: .....5.... (bf) r6 = r5")
+__msg(" 7: ......6... (bf) r7 = r6")
+__msg(" 8: .......7.. (bf) r8 = r7")
+__msg(" 9: ........8. (bf) r9 = r8")
+__msg("10: .........9 (bf) r0 = r9")
+__msg("11: 0......... (95) exit")
+__naked void assign_chain(void)
+{
+ asm volatile (
+ "r0 = 42;"
+ "r1 = r0;"
+ "r2 = r1;"
+ "r3 = r2;"
+ "r4 = r3;"
+ "r5 = r4;"
+ "r6 = r5;"
+ "r7 = r6;"
+ "r8 = r7;"
+ "r9 = r8;"
+ "r0 = r9;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: .......... (b7) r1 = 7")
+__msg("1: .1........ (07) r1 += 7")
+__msg("2: .......... (b7) r2 = 7")
+__msg("3: ..2....... (b7) r3 = 42")
+__msg("4: ..23...... (0f) r2 += r3")
+__msg("5: .......... (b7) r0 = 0")
+__msg("6: 0......... (95) exit")
+__naked void arithmetics(void)
+{
+ asm volatile (
+ "r1 = 7;"
+ "r1 += 7;"
+ "r2 = 7;"
+ "r3 = 42;"
+ "r2 += r3;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+#ifdef CAN_USE_BPF_ST
+SEC("socket")
+__log_level(2)
+__msg(" 1: .1........ (07) r1 += -8")
+__msg(" 2: .1........ (7a) *(u64 *)(r1 +0) = 7")
+__msg(" 3: .1........ (b7) r2 = 42")
+__msg(" 4: .12....... (7b) *(u64 *)(r1 +0) = r2")
+__msg(" 5: .12....... (7b) *(u64 *)(r1 +0) = r2")
+__msg(" 6: .......... (b7) r0 = 0")
+__naked void store(void)
+{
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "*(u64 *)(r1 +0) = 7;"
+ "r2 = 42;"
+ "*(u64 *)(r1 +0) = r2;"
+ "*(u64 *)(r1 +0) = r2;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+#endif
+
+SEC("socket")
+__log_level(2)
+__msg("1: ....4..... (07) r4 += -8")
+__msg("2: ....4..... (79) r5 = *(u64 *)(r4 +0)")
+__msg("3: ....45.... (07) r4 += -8")
+__naked void load(void)
+{
+ asm volatile (
+ "r4 = r10;"
+ "r4 += -8;"
+ "r5 = *(u64 *)(r4 +0);"
+ "r4 += -8;"
+ "r0 = r5;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: .1........ (61) r2 = *(u32 *)(r1 +0)")
+__msg("1: ..2....... (d4) r2 = le64 r2")
+__msg("2: ..2....... (bf) r0 = r2")
+__naked void endian(void)
+{
+ asm volatile (
+ "r2 = *(u32 *)(r1 +0);"
+ "r2 = le64 r2;"
+ "r0 = r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg(" 8: 0......... (b7) r1 = 1")
+__msg(" 9: 01........ (db) r1 = atomic64_fetch_add((u64 *)(r0 +0), r1)")
+__msg("10: 01........ (c3) lock *(u32 *)(r0 +0) += r1")
+__msg("11: 01........ (db) r1 = atomic64_xchg((u64 *)(r0 +0), r1)")
+__msg("12: 01........ (bf) r2 = r0")
+__msg("13: .12....... (bf) r0 = r1")
+__msg("14: 012....... (db) r0 = atomic64_cmpxchg((u64 *)(r2 +0), r0, r1)")
+__naked void atomic(void)
+{
+ asm volatile (
+ "r2 = r10;"
+ "r2 += -8;"
+ "r1 = 0;"
+ "*(u64 *)(r2 +0) = r1;"
+ "r1 = %[test_map] ll;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 == 0 goto 1f;"
+ "r1 = 1;"
+ "r1 = atomic_fetch_add((u64 *)(r0 +0), r1);"
+ ".8byte %[add_nofetch];" /* same as "lock *(u32 *)(r0 +0) += r1;" */
+ "r1 = xchg_64(r0 + 0, r1);"
+ "r2 = r0;"
+ "r0 = r1;"
+ "r0 = cmpxchg_64(r2 + 0, r0, r1);"
+ "1: exit;"
+ :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(test_map),
+ __imm_insn(add_nofetch, BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_0, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
+
+SEC("socket")
+__log_level(2)
+__msg("2: .12....... (db) store_release((u64 *)(r2 -8), r1)")
+__msg("3: .......... (bf) r3 = r10")
+__msg("4: ...3...... (db) r4 = load_acquire((u64 *)(r3 -8))")
+__naked void atomic_load_acq_store_rel(void)
+{
+ asm volatile (
+ "r1 = 42;"
+ "r2 = r10;"
+ ".8byte %[store_release_insn];" /* store_release((u64 *)(r2 - 8), r1); */
+ "r3 = r10;"
+ ".8byte %[load_acquire_insn];" /* r4 = load_acquire((u64 *)(r3 + 0)); */
+ "r0 = r4;"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_2, BPF_REG_1, -8)),
+ __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_4, BPF_REG_3, -8))
+ : __clobber_all);
+}
+
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+SEC("socket")
+__log_level(2)
+__msg("4: .12....7.. (85) call bpf_trace_printk#6")
+__msg("5: 0......7.. (0f) r0 += r7")
+__naked void regular_call(void)
+{
+ asm volatile (
+ "r7 = 1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 1;"
+ "call %[bpf_trace_printk];"
+ "r0 += r7;"
+ "exit;"
+ :
+ : __imm(bpf_trace_printk)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("2: 012....... (25) if r1 > 0x7 goto pc+1")
+__msg("3: ..2....... (bf) r0 = r2")
+__naked void if1(void)
+{
+ asm volatile (
+ "r0 = 1;"
+ "r2 = 2;"
+ "if r1 > 0x7 goto +1;"
+ "r0 = r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("3: 0123...... (2d) if r1 > r3 goto pc+1")
+__msg("4: ..2....... (bf) r0 = r2")
+__naked void if2(void)
+{
+ asm volatile (
+ "r0 = 1;"
+ "r2 = 2;"
+ "r3 = 7;"
+ "if r1 > r3 goto +1;"
+ "r0 = r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: .......... (b7) r1 = 0")
+__msg("1: .1........ (b7) r2 = 7")
+__msg("2: .12....... (25) if r1 > 0x7 goto pc+4")
+__msg("3: .12....... (07) r1 += 1")
+__msg("4: .12....... (27) r2 *= 2")
+__msg("5: .12....... (05) goto pc+0")
+__msg("6: .12....... (05) goto pc-5")
+__msg("7: .......... (b7) r0 = 0")
+__msg("8: 0......... (95) exit")
+__naked void loop(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ "r2 = 7;"
+ "if r1 > 0x7 goto +4;"
+ "r1 += 1;"
+ "r2 *= 2;"
+ "goto +0;"
+ "goto -5;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_trace_printk)
+ : __clobber_all);
+}
+
+#ifdef CAN_USE_GOTOL
+SEC("socket")
+__log_level(2)
+__msg("2: .123...... (25) if r1 > 0x7 goto pc+2")
+__msg("3: ..2....... (bf) r0 = r2")
+__msg("4: 0......... (06) gotol pc+1")
+__msg("5: ...3...... (bf) r0 = r3")
+__msg("6: 0......... (95) exit")
+__naked void gotol(void)
+{
+ asm volatile (
+ "r2 = 42;"
+ "r3 = 24;"
+ "if r1 > 0x7 goto +2;"
+ "r0 = r2;"
+ "gotol +1;"
+ "r0 = r3;"
+ "exit;"
+ :
+ : __imm(bpf_trace_printk)
+ : __clobber_all);
+}
+#endif
+
+SEC("socket")
+__log_level(2)
+__msg("0: .......... (b7) r1 = 1")
+__msg("1: .1........ (e5) may_goto pc+1")
+__msg("2: .......... (05) goto pc-3")
+__msg("3: .1........ (bf) r0 = r1")
+__msg("4: 0......... (95) exit")
+__naked void may_goto(void)
+{
+ asm volatile (
+ "1: r1 = 1;"
+ ".8byte %[may_goto];"
+ "goto 1b;"
+ "r0 = r1;"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id),
+ __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, +1 /* offset */, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("1: 0......... (18) r2 = 0x7")
+__msg("3: 0.2....... (0f) r0 += r2")
+__naked void ldimm64(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "r2 = 0x7 ll;"
+ "r0 += r2;"
+ "exit;"
+ :
+ :: __clobber_all);
+}
+
+/* No rules specific for LD_ABS/LD_IND, default behaviour kicks in */
+SEC("socket")
+__log_level(2)
+__msg("2: 0123456789 (30) r0 = *(u8 *)skb[42]")
+__msg("3: 012.456789 (0f) r7 += r0")
+__msg("4: 012.456789 (b7) r3 = 42")
+__msg("5: 0123456789 (50) r0 = *(u8 *)skb[r3 + 0]")
+__msg("6: 0......7.. (0f) r7 += r0")
+__naked void ldabs(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "r7 = 0;"
+ "r0 = *(u8 *)skb[42];"
+ "r7 += r0;"
+ "r3 = 42;"
+ ".8byte %[ld_ind];" /* same as "r0 = *(u8 *)skb[r3];" */
+ "r7 += r0;"
+ "r0 = r7;"
+ "exit;"
+ :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_B, BPF_REG_3, 0))
+ : __clobber_all);
+}
+
+
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__log_level(2)
+__msg(" 6: .12345.... (85) call bpf_arena_alloc_pages")
+__msg(" 7: 0......... (bf) r1 = addr_space_cast(r0, 0, 1)")
+__msg(" 8: .1........ (b7) r2 = 42")
+__naked void addr_space_cast(void)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = 0;"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r1 = addr_space_cast(r0, 0, 1);"
+ "r2 = 42;"
+ "*(u64 *)(r1 +0) = r2;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena)
+ : __clobber_all);
+}
+#endif
+
+static __used __naked int aux1(void)
+{
+ asm volatile (
+ "r0 = r1;"
+ "r0 += r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: ....45.... (b7) r1 = 1")
+__msg("1: .1..45.... (b7) r2 = 2")
+__msg("2: .12.45.... (b7) r3 = 3")
+/* Conservative liveness for subprog parameters. */
+__msg("3: .12345.... (85) call pc+2")
+__msg("4: .......... (b7) r0 = 0")
+__msg("5: 0......... (95) exit")
+__msg("6: .12....... (bf) r0 = r1")
+__msg("7: 0.2....... (0f) r0 += r2")
+/* Conservative liveness for subprog return value. */
+__msg("8: 0......... (95) exit")
+__naked void subprog1(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "r2 = 2;"
+ "r3 = 3;"
+ "call aux1;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* to retain debug info for BTF generation */
+void kfunc_root(void)
+{
+ bpf_arena_alloc_pages(0, 0, 0, 0, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/connect4_dropper.c b/tools/testing/selftests/bpf/progs/connect4_dropper.c
index d3f4c5e4fb69..a3819a5d09c8 100644
--- a/tools/testing/selftests/bpf/progs/connect4_dropper.c
+++ b/tools/testing/selftests/bpf/progs/connect4_dropper.c
@@ -13,12 +13,14 @@
#define VERDICT_REJECT 0
#define VERDICT_PROCEED 1
+int port;
+
SEC("cgroup/connect4")
int connect_v4_dropper(struct bpf_sock_addr *ctx)
{
if (ctx->type != SOCK_STREAM)
return VERDICT_PROCEED;
- if (ctx->user_port == bpf_htons(60120))
+ if (ctx->user_port == bpf_htons(port))
return VERDICT_REJECT;
return VERDICT_PROCEED;
}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index fd8e1b4c6762..5760ae015e09 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -347,6 +347,7 @@ struct core_reloc_nesting___err_too_deep {
*/
struct core_reloc_arrays_output {
int a2;
+ int a3;
char b123;
int c1c;
int d00d;
@@ -455,6 +456,15 @@ struct core_reloc_arrays___err_bad_zero_sz_arr {
struct core_reloc_arrays_substruct d[1][2];
};
+struct core_reloc_arrays___err_bad_signed_arr_elem_sz {
+ /* int -> short (signed!): not supported case */
+ short a[5];
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+ struct core_reloc_arrays_substruct f[][2];
+};
+
/*
* PRIMITIVES
*/
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
index 4ece7873ba60..86085b79f5ca 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_common.h
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -61,6 +61,7 @@ u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym __weak;
u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1,
const struct cpumask *src2) __ksym __weak;
u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym __weak;
+int bpf_cpumask_populate(struct cpumask *cpumask, void *src, size_t src__sz) __ksym __weak;
void bpf_rcu_read_lock(void) __ksym __weak;
void bpf_rcu_read_unlock(void) __ksym __weak;
diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c
index b40b52548ffb..8a2fd596c8a3 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_failure.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c
@@ -222,3 +222,41 @@ int BPF_PROG(test_invalid_nested_array, struct task_struct *task, u64 clone_flag
return 0;
}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("type=scalar expected=fp")
+int BPF_PROG(test_populate_invalid_destination, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *invalid = (struct bpf_cpumask *)0x123456;
+ u64 bits;
+ int ret;
+
+ ret = bpf_cpumask_populate((struct cpumask *)invalid, &bits, sizeof(bits));
+ if (!ret)
+ err = 2;
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("leads to invalid memory access")
+int BPF_PROG(test_populate_invalid_source, struct task_struct *task, u64 clone_flags)
+{
+ void *garbage = (void *)0x123456;
+ struct bpf_cpumask *local;
+ int ret;
+
+ local = create_cpumask();
+ if (!local) {
+ err = 1;
+ return 0;
+ }
+
+ ret = bpf_cpumask_populate((struct cpumask *)local, garbage, 8);
+ if (!ret)
+ err = 2;
+
+ bpf_cpumask_release(local);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c
index 80ee469b0b60..0e04c31b91c0 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_success.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_success.c
@@ -749,7 +749,6 @@ out:
}
SEC("tp_btf/task_newtask")
-__success
int BPF_PROG(test_refcount_null_tracking, struct task_struct *task, u64 clone_flags)
{
struct bpf_cpumask *mask1, *mask2;
@@ -770,3 +769,122 @@ free_masks_return:
bpf_cpumask_release(mask2);
return 0;
}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_populate_reject_small_mask, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *local;
+ u8 toofewbits;
+ int ret;
+
+ if (!is_test_task())
+ return 0;
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ /* The kfunc should prevent this operation */
+ ret = bpf_cpumask_populate((struct cpumask *)local, &toofewbits, sizeof(toofewbits));
+ if (ret != -EACCES)
+ err = 2;
+
+ bpf_cpumask_release(local);
+
+ return 0;
+}
+
+/* Mask is guaranteed to be large enough for bpf_cpumask_t. */
+#define CPUMASK_TEST_MASKLEN (sizeof(cpumask_t))
+
+/* Add an extra word for the test_populate_reject_unaligned test. */
+u64 bits[CPUMASK_TEST_MASKLEN / 8 + 1];
+extern bool CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS __kconfig __weak;
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_populate_reject_unaligned, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask;
+ char *src;
+ int ret;
+
+ if (!is_test_task())
+ return 0;
+
+ /* Skip if unaligned accesses are fine for this arch. */
+ if (CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+ return 0;
+
+ mask = bpf_cpumask_create();
+ if (!mask) {
+ err = 1;
+ return 0;
+ }
+
+ /* Misalign the source array by a byte. */
+ src = &((char *)bits)[1];
+
+ ret = bpf_cpumask_populate((struct cpumask *)mask, src, CPUMASK_TEST_MASKLEN);
+ if (ret != -EINVAL)
+ err = 2;
+
+ bpf_cpumask_release(mask);
+
+ return 0;
+}
+
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_populate, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask;
+ bool bit;
+ int ret;
+ int i;
+
+ if (!is_test_task())
+ return 0;
+
+ /* Set only odd bits. */
+ __builtin_memset(bits, 0xaa, CPUMASK_TEST_MASKLEN);
+
+ mask = bpf_cpumask_create();
+ if (!mask) {
+ err = 1;
+ return 0;
+ }
+
+ /* Pass the entire bits array, the kfunc will only copy the valid bits. */
+ ret = bpf_cpumask_populate((struct cpumask *)mask, bits, CPUMASK_TEST_MASKLEN);
+ if (ret) {
+ err = 2;
+ goto out;
+ }
+
+ /*
+ * Test is there to appease the verifier. We cannot directly
+ * access NR_CPUS, the upper bound for nr_cpus, so we infer
+ * it from the size of cpumask_t.
+ */
+ if (nr_cpus < 0 || nr_cpus >= CPUMASK_TEST_MASKLEN * 8) {
+ err = 3;
+ goto out;
+ }
+
+ bpf_for(i, 0, nr_cpus) {
+ /* Odd-numbered bits should be set, even ones unset. */
+ bit = bpf_cpumask_test_cpu(i, (const struct cpumask *)mask);
+ if (bit == (i % 2 != 0))
+ continue;
+
+ err = 4;
+ break;
+ }
+
+out:
+ bpf_cpumask_release(mask);
+
+ return 0;
+}
+
+#undef CPUMASK_TEST_MASKLEN
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index bfcc85686cf0..e1fba28e4a86 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -1,20 +1,19 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2022 Facebook */
+#include <vmlinux.h>
#include <string.h>
#include <stdbool.h>
-#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
-#include "bpf_kfuncs.h"
#include "errno.h"
char _license[] SEC("license") = "GPL";
int pid, err, val;
-struct sample {
+struct ringbuf_sample {
int pid;
int seq;
long value;
@@ -121,7 +120,7 @@ int test_dynptr_data(void *ctx)
static int ringbuf_callback(__u32 index, void *data)
{
- struct sample *sample;
+ struct ringbuf_sample *sample;
struct bpf_dynptr *ptr = (struct bpf_dynptr *)data;
@@ -138,7 +137,7 @@ SEC("?tp/syscalls/sys_enter_nanosleep")
int test_ringbuf(void *ctx)
{
struct bpf_dynptr ptr;
- struct sample *sample;
+ struct ringbuf_sample *sample;
if (bpf_get_current_pid_tgid() >> 32 != pid)
return 0;
@@ -567,3 +566,117 @@ int BPF_PROG(test_dynptr_skb_tp_btf, void *skb, void *location)
return 1;
}
+
+static inline int bpf_memcmp(const char *a, const char *b, u32 size)
+{
+ int i;
+
+ bpf_for(i, 0, size) {
+ if (a[i] != b[i])
+ return a[i] < b[i] ? -1 : 1;
+ }
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_copy(void *ctx)
+{
+ char data[] = "hello there, world!!";
+ char buf[32] = {'\0'};
+ __u32 sz = sizeof(data);
+ struct bpf_dynptr src, dst;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sz, 0, &src);
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sz, 0, &dst);
+
+ /* Test basic case of copying contiguous memory backed dynptrs */
+ err = bpf_dynptr_write(&src, 0, data, sz, 0);
+ err = err ?: bpf_dynptr_copy(&dst, 0, &src, 0, sz);
+ err = err ?: bpf_dynptr_read(buf, sz, &dst, 0, 0);
+ err = err ?: bpf_memcmp(data, buf, sz);
+
+ /* Test that offsets are handled correctly */
+ err = err ?: bpf_dynptr_copy(&dst, 3, &src, 5, sz - 5);
+ err = err ?: bpf_dynptr_read(buf, sz - 5, &dst, 3, 0);
+ err = err ?: bpf_memcmp(data + 5, buf, sz - 5);
+
+ bpf_ringbuf_discard_dynptr(&src, 0);
+ bpf_ringbuf_discard_dynptr(&dst, 0);
+ return 0;
+}
+
+SEC("xdp")
+int test_dynptr_copy_xdp(struct xdp_md *xdp)
+{
+ struct bpf_dynptr ptr_buf, ptr_xdp;
+ char data[] = "qwertyuiopasdfghjkl";
+ char buf[32] = {'\0'};
+ __u32 len = sizeof(data);
+ int i, chunks = 200;
+
+ /* ptr_xdp is backed by non-contiguous memory */
+ bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+ bpf_ringbuf_reserve_dynptr(&ringbuf, len * chunks, 0, &ptr_buf);
+
+ /* Destination dynptr is backed by non-contiguous memory */
+ bpf_for(i, 0, chunks) {
+ err = bpf_dynptr_write(&ptr_buf, i * len, data, len, 0);
+ if (err)
+ goto out;
+ }
+
+ err = bpf_dynptr_copy(&ptr_xdp, 0, &ptr_buf, 0, len * chunks);
+ if (err)
+ goto out;
+
+ bpf_for(i, 0, chunks) {
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = bpf_dynptr_read(&buf, len, &ptr_xdp, i * len, 0);
+ if (err)
+ goto out;
+ if (bpf_memcmp(data, buf, len) != 0)
+ goto out;
+ }
+
+ /* Source dynptr is backed by non-contiguous memory */
+ __builtin_memset(buf, 0, sizeof(buf));
+ bpf_for(i, 0, chunks) {
+ err = bpf_dynptr_write(&ptr_buf, i * len, buf, len, 0);
+ if (err)
+ goto out;
+ }
+
+ err = bpf_dynptr_copy(&ptr_buf, 0, &ptr_xdp, 0, len * chunks);
+ if (err)
+ goto out;
+
+ bpf_for(i, 0, chunks) {
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = bpf_dynptr_read(&buf, len, &ptr_buf, i * len, 0);
+ if (err)
+ goto out;
+ if (bpf_memcmp(data, buf, len) != 0)
+ goto out;
+ }
+
+ /* Both source and destination dynptrs are backed by non-contiguous memory */
+ err = bpf_dynptr_copy(&ptr_xdp, 2, &ptr_xdp, len, len * (chunks - 1));
+ if (err)
+ goto out;
+
+ bpf_for(i, 0, chunks - 1) {
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = bpf_dynptr_read(&buf, len, &ptr_xdp, 2 + i * len, 0);
+ if (err)
+ goto out;
+ if (bpf_memcmp(data, buf, len) != 0)
+ goto out;
+ }
+
+ if (bpf_dynptr_copy(&ptr_xdp, 2000, &ptr_xdp, 0, len * chunks) != -E2BIG)
+ err = 1;
+
+out:
+ bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
+ return XDP_DROP;
+}
diff --git a/tools/testing/selftests/bpf/progs/fexit_noreturns.c b/tools/testing/selftests/bpf/progs/fexit_noreturns.c
new file mode 100644
index 000000000000..54654539f550
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fexit_noreturns.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("fexit/do_exit")
+__failure __msg("Attaching fexit/fmod_ret to __noreturn functions is rejected.")
+int BPF_PROG(noreturns)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/find_vma.c b/tools/testing/selftests/bpf/progs/find_vma.c
index 38034fb82530..02b82774469c 100644
--- a/tools/testing/selftests/bpf/progs/find_vma.c
+++ b/tools/testing/selftests/bpf/progs/find_vma.c
@@ -25,7 +25,7 @@ static long check_vma(struct task_struct *task, struct vm_area_struct *vma,
{
if (vma->vm_file)
bpf_probe_read_kernel_str(d_iname, DNAME_INLINE_LEN - 1,
- vma->vm_file->f_path.dentry->d_iname);
+ vma->vm_file->f_path.dentry->d_shortname.string);
/* check for VM_EXEC */
if (vma->vm_flags & VM_EXEC)
diff --git a/tools/testing/selftests/bpf/progs/irq.c b/tools/testing/selftests/bpf/progs/irq.c
index b0b53d980964..74d912b22de9 100644
--- a/tools/testing/selftests/bpf/progs/irq.c
+++ b/tools/testing/selftests/bpf/progs/irq.c
@@ -11,6 +11,9 @@ extern void bpf_local_irq_save(unsigned long *) __weak __ksym;
extern void bpf_local_irq_restore(unsigned long *) __weak __ksym;
extern int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void *unsafe_ptr__ign, u64 flags) __weak __ksym;
+struct bpf_res_spin_lock lockA __hidden SEC(".data.A");
+struct bpf_res_spin_lock lockB __hidden SEC(".data.B");
+
SEC("?tc")
__failure __msg("arg#0 doesn't point to an irq flag on stack")
int irq_save_bad_arg(struct __sk_buff *ctx)
@@ -222,7 +225,7 @@ int __noinline global_local_irq_balance(void)
}
SEC("?tc")
-__failure __msg("global function calls are not allowed with IRQs disabled")
+__success
int irq_global_subprog(struct __sk_buff *ctx)
{
unsigned long flags;
@@ -441,4 +444,123 @@ int irq_ooo_refs_array(struct __sk_buff *ctx)
return 0;
}
+int __noinline
+global_subprog(int i)
+{
+ if (i)
+ bpf_printk("%p", &i);
+ return i;
+}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+ global_subprog(i);
+ return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+ if (!i)
+ global_sleepable_kfunc_subprog(i);
+ return i;
+}
+
+SEC("?syscall")
+__success
+int irq_non_sleepable_global_subprog(void *ctx)
+{
+ unsigned long flags;
+
+ bpf_local_irq_save(&flags);
+ global_subprog(0);
+ bpf_local_irq_restore(&flags);
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int irq_sleepable_helper_global_subprog(void *ctx)
+{
+ unsigned long flags;
+
+ bpf_local_irq_save(&flags);
+ global_sleepable_helper_subprog(0);
+ bpf_local_irq_restore(&flags);
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int irq_sleepable_global_subprog_indirect(void *ctx)
+{
+ unsigned long flags;
+
+ bpf_local_irq_save(&flags);
+ global_subprog_calling_sleepable_global(0);
+ bpf_local_irq_restore(&flags);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_ooo_lock_cond_inv(struct __sk_buff *ctx)
+{
+ unsigned long flags1, flags2;
+
+ if (bpf_res_spin_lock_irqsave(&lockA, &flags1))
+ return 0;
+ if (bpf_res_spin_lock_irqsave(&lockB, &flags2)) {
+ bpf_res_spin_unlock_irqrestore(&lockA, &flags1);
+ return 0;
+ }
+
+ bpf_res_spin_unlock_irqrestore(&lockB, &flags1);
+ bpf_res_spin_unlock_irqrestore(&lockA, &flags2);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed")
+int irq_wrong_kfunc_class_1(struct __sk_buff *ctx)
+{
+ unsigned long flags1;
+
+ if (bpf_res_spin_lock_irqsave(&lockA, &flags1))
+ return 0;
+ /* For now, bpf_local_irq_restore is not allowed in critical section,
+ * but this test ensures error will be caught with kfunc_class when it's
+ * opened up. Tested by temporarily permitting this kfunc in critical
+ * section.
+ */
+ bpf_local_irq_restore(&flags1);
+ bpf_res_spin_unlock_irqrestore(&lockA, &flags1);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed")
+int irq_wrong_kfunc_class_2(struct __sk_buff *ctx)
+{
+ unsigned long flags1, flags2;
+
+ bpf_local_irq_save(&flags1);
+ if (bpf_res_spin_lock_irqsave(&lockA, &flags2))
+ return 0;
+ bpf_local_irq_restore(&flags2);
+ bpf_res_spin_unlock_irqrestore(&lockA, &flags1);
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 190822b2f08b..427b72954b87 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -7,6 +7,8 @@
#include "bpf_misc.h"
#include "bpf_compiler.h"
+#define unlikely(x) __builtin_expect(!!(x), 0)
+
static volatile int zero = 0;
int my_pid;
@@ -1175,6 +1177,122 @@ __naked int loop_state_deps2(void)
}
SEC("?raw_tp")
+__failure
+__msg("math between fp pointer and register with unbounded")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked int loop_state_deps3(void)
+{
+ /* This is equivalent to a C program below.
+ *
+ * if (random() != 24) { // assume false branch is placed first
+ * i = iter_new(); // fp[-8]
+ * while (iter_next(i));
+ * iter_destroy(i);
+ * return;
+ * }
+ *
+ * for (i = 10; i > 0; i--); // increase dfs_depth for child states
+ *
+ * i = iter_new(); // fp[-8]
+ * b = -24; // r8
+ * for (;;) { // checkpoint (L)
+ * if (iter_next(i)) // checkpoint (N)
+ * break;
+ * if (random() == 77) { // assume false branch is placed first
+ * *(u64 *)(r10 + b) = 7; // this is not safe when b == -25
+ * iter_destroy(i);
+ * return;
+ * }
+ * if (random() == 42) { // assume false branch is placed first
+ * b = -25;
+ * }
+ * }
+ * iter_destroy(i);
+ *
+ * In case of a buggy verifier first loop might poison
+ * env->cur_state->loop_entry with a state having 0 branches
+ * and small dfs_depth. This would trigger NOT_EXACT states
+ * comparison for some states within second loop.
+ * Specifically, checkpoint (L) might be problematic if:
+ * - branch with '*(u64 *)(r10 + b) = 7' is not explored yet;
+ * - checkpoint (L) is first reached in state {b=-24};
+ * - traversal is pruned at checkpoint (N) setting checkpoint's (L)
+ * branch count to 0, thus making it eligible for use in pruning;
+ * - checkpoint (L) is next reached in state {b=-25},
+ * this would cause NOT_EXACT comparison with a state {b=-24}
+ * while 'b' is not marked precise yet.
+ */
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 24 goto 2f;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 5;"
+ "call %[bpf_iter_num_new];"
+ "1:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 != 0 goto 1b;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ "2:"
+ /* loop to increase dfs_depth */
+ "r0 = 10;"
+ "3:"
+ "r0 -= 1;"
+ "if r0 != 0 goto 3b;"
+ /* end of loop */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r8 = -24;"
+ "main_loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto main_loop_end_%=;"
+ /* first if */
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 77 goto unsafe_write_%=;"
+ /* second if */
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 42 goto poison_r8_%=;"
+ /* iterate */
+ "goto main_loop_%=;"
+ "main_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+
+ "unsafe_write_%=:"
+ "r0 = r10;"
+ "r0 += r8;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "goto main_loop_end_%=;"
+
+ "poison_r8_%=:"
+ "r8 = -25;"
+ "goto main_loop_%=;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
__success
__naked int triple_continue(void)
{
@@ -1512,4 +1630,25 @@ int iter_destroy_bad_arg(const void *ctx)
return 0;
}
+SEC("raw_tp")
+__success
+int clean_live_states(const void *ctx)
+{
+ char buf[1];
+ int i, j, k, l, m, n, o;
+
+ bpf_for(i, 0, 10)
+ bpf_for(j, 0, 10)
+ bpf_for(k, 0, 10)
+ bpf_for(l, 0, 10)
+ bpf_for(m, 0, 10)
+ bpf_for(n, 0, 10)
+ bpf_for(o, 0, 10) {
+ if (unlikely(bpf_get_prandom_u32()))
+ buf[0] = 42;
+ bpf_printk("%s", buf);
+ }
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/net_timestamping.c b/tools/testing/selftests/bpf/progs/net_timestamping.c
new file mode 100644
index 000000000000..b4c2f0f2be11
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/net_timestamping.c
@@ -0,0 +1,248 @@
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+#include <errno.h>
+
+__u32 monitored_pid = 0;
+
+int nr_active;
+int nr_snd;
+int nr_passive;
+int nr_sched;
+int nr_txsw;
+int nr_ack;
+
+struct sk_stg {
+ __u64 sendmsg_ns; /* record ts when sendmsg is called */
+};
+
+struct sk_tskey {
+ u64 cookie;
+ u32 tskey;
+};
+
+struct delay_info {
+ u64 sendmsg_ns; /* record ts when sendmsg is called */
+ u32 sched_delay; /* SCHED_CB - sendmsg_ns */
+ u32 snd_sw_delay; /* SND_SW_CB - SCHED_CB */
+ u32 ack_delay; /* ACK_CB - SND_SW_CB */
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct sk_stg);
+} sk_stg_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct sk_tskey);
+ __type(value, struct delay_info);
+ __uint(max_entries, 1024);
+} time_map SEC(".maps");
+
+static u64 delay_tolerance_nsec = 10000000000; /* 10 second as an example */
+
+extern int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops, u64 flags) __ksym;
+
+static int bpf_test_sockopt(void *ctx, const struct sock *sk, int expected)
+{
+ int tmp, new = SK_BPF_CB_TX_TIMESTAMPING;
+ int opt = SK_BPF_CB_FLAGS;
+ int level = SOL_SOCKET;
+
+ if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)) != expected)
+ return 1;
+
+ if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) != expected ||
+ (!expected && tmp != new))
+ return 1;
+
+ return 0;
+}
+
+static bool bpf_test_access_sockopt(void *ctx, const struct sock *sk)
+{
+ if (bpf_test_sockopt(ctx, sk, -EOPNOTSUPP))
+ return true;
+ return false;
+}
+
+static bool bpf_test_access_load_hdr_opt(struct bpf_sock_ops *skops)
+{
+ u8 opt[3] = {0};
+ int load_flags = 0;
+ int ret;
+
+ ret = bpf_load_hdr_opt(skops, opt, sizeof(opt), load_flags);
+ if (ret != -EOPNOTSUPP)
+ return true;
+
+ return false;
+}
+
+static bool bpf_test_access_cb_flags_set(struct bpf_sock_ops *skops)
+{
+ int ret;
+
+ ret = bpf_sock_ops_cb_flags_set(skops, 0);
+ if (ret != -EOPNOTSUPP)
+ return true;
+
+ return false;
+}
+
+/* In the timestamping callbacks, we're not allowed to call the following
+ * BPF CALLs for the safety concern. Return false if expected.
+ */
+static bool bpf_test_access_bpf_calls(struct bpf_sock_ops *skops,
+ const struct sock *sk)
+{
+ if (bpf_test_access_sockopt(skops, sk))
+ return true;
+
+ if (bpf_test_access_load_hdr_opt(skops))
+ return true;
+
+ if (bpf_test_access_cb_flags_set(skops))
+ return true;
+
+ return false;
+}
+
+static bool bpf_test_delay(struct bpf_sock_ops *skops, const struct sock *sk)
+{
+ struct bpf_sock_ops_kern *skops_kern;
+ u64 timestamp = bpf_ktime_get_ns();
+ struct skb_shared_info *shinfo;
+ struct delay_info dinfo = {0};
+ struct sk_tskey key = {0};
+ struct delay_info *val;
+ struct sk_buff *skb;
+ struct sk_stg *stg;
+ u64 prior_ts, delay;
+
+ if (bpf_test_access_bpf_calls(skops, sk))
+ return false;
+
+ skops_kern = bpf_cast_to_kern_ctx(skops);
+ skb = skops_kern->skb;
+ shinfo = bpf_core_cast(skb->head + skb->end, struct skb_shared_info);
+
+ key.cookie = bpf_get_socket_cookie(skops);
+ if (!key.cookie)
+ return false;
+
+ if (skops->op == BPF_SOCK_OPS_TSTAMP_SENDMSG_CB) {
+ stg = bpf_sk_storage_get(&sk_stg_map, (void *)sk, 0, 0);
+ if (!stg)
+ return false;
+ dinfo.sendmsg_ns = stg->sendmsg_ns;
+ bpf_sock_ops_enable_tx_tstamp(skops_kern, 0);
+ key.tskey = shinfo->tskey;
+ if (!key.tskey)
+ return false;
+ bpf_map_update_elem(&time_map, &key, &dinfo, BPF_ANY);
+ return true;
+ }
+
+ key.tskey = shinfo->tskey;
+ if (!key.tskey)
+ return false;
+
+ val = bpf_map_lookup_elem(&time_map, &key);
+ if (!val)
+ return false;
+
+ switch (skops->op) {
+ case BPF_SOCK_OPS_TSTAMP_SCHED_CB:
+ val->sched_delay = timestamp - val->sendmsg_ns;
+ delay = val->sched_delay;
+ break;
+ case BPF_SOCK_OPS_TSTAMP_SND_SW_CB:
+ prior_ts = val->sched_delay + val->sendmsg_ns;
+ val->snd_sw_delay = timestamp - prior_ts;
+ delay = val->snd_sw_delay;
+ break;
+ case BPF_SOCK_OPS_TSTAMP_ACK_CB:
+ prior_ts = val->snd_sw_delay + val->sched_delay + val->sendmsg_ns;
+ val->ack_delay = timestamp - prior_ts;
+ delay = val->ack_delay;
+ break;
+ }
+
+ if (delay >= delay_tolerance_nsec)
+ return false;
+
+ /* Since it's the last one, remove from the map after latency check */
+ if (skops->op == BPF_SOCK_OPS_TSTAMP_ACK_CB)
+ bpf_map_delete_elem(&time_map, &key);
+
+ return true;
+}
+
+SEC("fentry/tcp_sendmsg_locked")
+int BPF_PROG(trace_tcp_sendmsg_locked, struct sock *sk, struct msghdr *msg,
+ size_t size)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ u64 timestamp = bpf_ktime_get_ns();
+ u32 flag = sk->sk_bpf_cb_flags;
+ struct sk_stg *stg;
+
+ if (pid != monitored_pid || !flag)
+ return 0;
+
+ stg = bpf_sk_storage_get(&sk_stg_map, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!stg)
+ return 0;
+
+ stg->sendmsg_ns = timestamp;
+ nr_snd += 1;
+ return 0;
+}
+
+SEC("sockops")
+int skops_sockopt(struct bpf_sock_ops *skops)
+{
+ struct bpf_sock *bpf_sk = skops->sk;
+ const struct sock *sk;
+
+ if (!bpf_sk)
+ return 1;
+
+ sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk);
+ if (!sk)
+ return 1;
+
+ switch (skops->op) {
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ nr_active += !bpf_test_sockopt(skops, sk, 0);
+ break;
+ case BPF_SOCK_OPS_TSTAMP_SENDMSG_CB:
+ if (bpf_test_delay(skops, sk))
+ nr_snd += 1;
+ break;
+ case BPF_SOCK_OPS_TSTAMP_SCHED_CB:
+ if (bpf_test_delay(skops, sk))
+ nr_sched += 1;
+ break;
+ case BPF_SOCK_OPS_TSTAMP_SND_SW_CB:
+ if (bpf_test_delay(skops, sk))
+ nr_txsw += 1;
+ break;
+ case BPF_SOCK_OPS_TSTAMP_ACK_CB:
+ if (bpf_test_delay(skops, sk))
+ nr_ack += 1;
+ break;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
index c6edf8dbefeb..94040714af18 100644
--- a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
+++ b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
@@ -28,6 +28,7 @@ struct {
} sock_map SEC(".maps");
int tcx_init_netns_cookie, tcx_netns_cookie;
+int cgroup_skb_init_netns_cookie, cgroup_skb_netns_cookie;
SEC("sockops")
int get_netns_cookie_sockops(struct bpf_sock_ops *ctx)
@@ -91,4 +92,12 @@ int get_netns_cookie_tcx(struct __sk_buff *skb)
return TCX_PASS;
}
+SEC("cgroup_skb/ingress")
+int get_netns_cookie_cgroup_skb(struct __sk_buff *skb)
+{
+ cgroup_skb_init_netns_cookie = bpf_get_netns_cookie(NULL);
+ cgroup_skb_netns_cookie = bpf_get_netns_cookie(skb);
+ return SK_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/preempt_lock.c b/tools/testing/selftests/bpf/progs/preempt_lock.c
index 6c5797bf0ead..7d04254e61f1 100644
--- a/tools/testing/selftests/bpf/progs/preempt_lock.c
+++ b/tools/testing/selftests/bpf/progs/preempt_lock.c
@@ -134,7 +134,7 @@ int __noinline preempt_global_subprog(void)
}
SEC("?tc")
-__failure __msg("global function calls are not allowed with preemption disabled")
+__success
int preempt_global_subprog_test(struct __sk_buff *ctx)
{
preempt_disable();
@@ -143,4 +143,70 @@ int preempt_global_subprog_test(struct __sk_buff *ctx)
return 0;
}
+int __noinline
+global_subprog(int i)
+{
+ if (i)
+ bpf_printk("%p", &i);
+ return i;
+}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+ global_subprog(i);
+ return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+ if (!i)
+ global_sleepable_kfunc_subprog(i);
+ return i;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int preempt_global_sleepable_helper_subprog(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ if (ctx->mark)
+ global_sleepable_helper_subprog(ctx->mark);
+ preempt_enable();
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int preempt_global_sleepable_kfunc_subprog(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ if (ctx->mark)
+ global_sleepable_kfunc_subprog(ctx->mark);
+ preempt_enable();
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int preempt_global_sleepable_subprog_indirect(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ if (ctx->mark)
+ global_subprog_calling_sleepable_global(ctx->mark);
+ preempt_enable();
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/prepare.c b/tools/testing/selftests/bpf/progs/prepare.c
new file mode 100644
index 000000000000..1f1dd547e4ee
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/prepare.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+//#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int err;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} ringbuf SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} array_map SEC(".maps");
+
+SEC("cgroup_skb/egress")
+int program(struct __sk_buff *skb)
+{
+ err = 0;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/priv_freplace_prog.c b/tools/testing/selftests/bpf/progs/priv_freplace_prog.c
new file mode 100644
index 000000000000..ccf1b04010ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/priv_freplace_prog.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("freplace/xdp_prog1")
+int new_xdp_prog2(struct xdp_md *xd)
+{
+ return XDP_DROP;
+}
diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c
index 3c7b2b618c8a..725e29595079 100644
--- a/tools/testing/selftests/bpf/progs/priv_prog.c
+++ b/tools/testing/selftests/bpf/progs/priv_prog.c
@@ -6,8 +6,8 @@
char _license[] SEC("license") = "GPL";
-SEC("kprobe")
-int kprobe_prog(void *ctx)
+SEC("xdp")
+int xdp_prog1(struct xdp_md *xdp)
{
- return 1;
+ return XDP_DROP;
}
diff --git a/tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c b/tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c
new file mode 100644
index 000000000000..a5a8f08ac8fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+void __kfunc_btf_root(void)
+{
+ bpf_kfunc_st_ops_inc10(NULL);
+}
+
+static __noinline __used int subprog(struct st_ops_args *args)
+{
+ args->a += 1;
+ return args->a;
+}
+
+__success
+/* prologue */
+__xlated("0: r8 = r1")
+__xlated("1: r1 = 0")
+__xlated("2: call kernel-function")
+__xlated("3: if r0 != 0x0 goto pc+5")
+__xlated("4: r6 = *(u64 *)(r8 +0)")
+__xlated("5: r7 = *(u64 *)(r6 +0)")
+__xlated("6: r7 += 1000")
+__xlated("7: *(u64 *)(r6 +0) = r7")
+__xlated("8: goto pc+2")
+__xlated("9: r1 = r0")
+__xlated("10: call kernel-function")
+__xlated("11: r1 = r8")
+/* save __u64 *ctx to stack */
+__xlated("12: *(u64 *)(r10 -8) = r1")
+/* main prog */
+__xlated("13: r1 = *(u64 *)(r1 +0)")
+__xlated("14: r6 = r1")
+__xlated("15: call kernel-function")
+__xlated("16: r1 = r6")
+__xlated("17: call pc+")
+/* epilogue */
+__xlated("18: r1 = 0")
+__xlated("19: r6 = 0")
+__xlated("20: call kernel-function")
+__xlated("21: if r0 != 0x0 goto pc+6")
+__xlated("22: r1 = *(u64 *)(r10 -8)")
+__xlated("23: r1 = *(u64 *)(r1 +0)")
+__xlated("24: r6 = *(u64 *)(r1 +0)")
+__xlated("25: r6 += 10000")
+__xlated("26: *(u64 *)(r1 +0) = r6")
+__xlated("27: goto pc+2")
+__xlated("28: r1 = r0")
+__xlated("29: call kernel-function")
+__xlated("30: r0 = r6")
+__xlated("31: r0 *= 2")
+__xlated("32: exit")
+SEC("struct_ops/test_pro_epilogue")
+__naked int test_kfunc_pro_epilogue(void)
+{
+ asm volatile (
+ "r1 = *(u64 *)(r1 +0);"
+ "r6 = r1;"
+ "call %[bpf_kfunc_st_ops_inc10];"
+ "r1 = r6;"
+ "call subprog;"
+ "exit;"
+ :
+ : __imm(bpf_kfunc_st_ops_inc10)
+ : __clobber_all);
+}
+
+SEC("syscall")
+__retval(22022) /* (PROLOGUE_A [1000] + KFUNC_INC10 + SUBPROG_A [1] + EPILOGUE_A [10000]) * 2 */
+int syscall_pro_epilogue(void *ctx)
+{
+ struct st_ops_args args = {};
+
+ return bpf_kfunc_st_ops_test_pro_epilogue(&args);
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_st_ops pro_epilogue_with_kfunc = {
+ .test_pro_epilogue = (void *)test_kfunc_pro_epilogue,
+};
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index ab3a532b7dd6..43637ee2cdcd 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -242,7 +242,8 @@ out:
}
SEC("?lsm.s/bpf")
-int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size,
+ bool kernel)
{
struct bpf_key *bkey;
@@ -439,3 +440,61 @@ int rcu_read_lock_global_subprog_unlock(void *ctx)
ret += global_subprog_unlock(ret);
return 0;
}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+ global_subprog(i);
+ return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+ if (!i)
+ global_sleepable_kfunc_subprog(i);
+ return i;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_sleepable_helper_global_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += global_sleepable_helper_subprog(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_sleepable_kfunc_global_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += global_sleepable_kfunc_subprog(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_sleepable_global_subprog_indirect(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += global_subprog_calling_sleepable_global(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/read_vsyscall.c b/tools/testing/selftests/bpf/progs/read_vsyscall.c
index 39ebef430059..395591374d4f 100644
--- a/tools/testing/selftests/bpf/progs/read_vsyscall.c
+++ b/tools/testing/selftests/bpf/progs/read_vsyscall.c
@@ -8,14 +8,16 @@
int target_pid = 0;
void *user_ptr = 0;
-int read_ret[9];
+int read_ret[10];
char _license[] SEC("license") = "GPL";
/*
- * This is the only kfunc, the others are helpers
+ * These are the kfuncs, the others are helpers
*/
int bpf_copy_from_user_str(void *dst, u32, const void *, u64) __weak __ksym;
+int bpf_copy_from_user_task_str(void *dst, u32, const void *,
+ struct task_struct *, u64) __weak __ksym;
SEC("fentry/" SYS_PREFIX "sys_nanosleep")
int do_probe_read(void *ctx)
@@ -47,6 +49,11 @@ int do_copy_from_user(void *ctx)
read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr,
bpf_get_current_task_btf(), 0);
read_ret[8] = bpf_copy_from_user_str((char *)buf, sizeof(buf), user_ptr, 0);
+ read_ret[9] = bpf_copy_from_user_task_str((char *)buf,
+ sizeof(buf),
+ user_ptr,
+ bpf_get_current_task_btf(),
+ 0);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/res_spin_lock.c b/tools/testing/selftests/bpf/progs/res_spin_lock.c
new file mode 100644
index 000000000000..b33385dfbd35
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/res_spin_lock.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024-2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define EDEADLK 35
+#define ETIMEDOUT 110
+
+struct arr_elem {
+ struct bpf_res_spin_lock lock;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 64);
+ __type(key, int);
+ __type(value, struct arr_elem);
+} arrmap SEC(".maps");
+
+struct bpf_res_spin_lock lockA __hidden SEC(".data.A");
+struct bpf_res_spin_lock lockB __hidden SEC(".data.B");
+
+SEC("tc")
+int res_spin_lock_test(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem1, *elem2;
+ int r;
+
+ elem1 = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem1)
+ return -1;
+ elem2 = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem2)
+ return -1;
+
+ r = bpf_res_spin_lock(&elem1->lock);
+ if (r)
+ return r;
+ if (!bpf_res_spin_lock(&elem2->lock)) {
+ bpf_res_spin_unlock(&elem2->lock);
+ bpf_res_spin_unlock(&elem1->lock);
+ return -1;
+ }
+ bpf_res_spin_unlock(&elem1->lock);
+ return 0;
+}
+
+SEC("tc")
+int res_spin_lock_test_AB(struct __sk_buff *ctx)
+{
+ int r;
+
+ r = bpf_res_spin_lock(&lockA);
+ if (r)
+ return !r;
+ /* Only unlock if we took the lock. */
+ if (!bpf_res_spin_lock(&lockB))
+ bpf_res_spin_unlock(&lockB);
+ bpf_res_spin_unlock(&lockA);
+ return 0;
+}
+
+int err;
+
+SEC("tc")
+int res_spin_lock_test_BA(struct __sk_buff *ctx)
+{
+ int r;
+
+ r = bpf_res_spin_lock(&lockB);
+ if (r)
+ return !r;
+ if (!bpf_res_spin_lock(&lockA))
+ bpf_res_spin_unlock(&lockA);
+ else
+ err = -EDEADLK;
+ bpf_res_spin_unlock(&lockB);
+ return err ?: 0;
+}
+
+SEC("tc")
+int res_spin_lock_test_held_lock_max(struct __sk_buff *ctx)
+{
+ struct bpf_res_spin_lock *locks[48] = {};
+ struct arr_elem *e;
+ u64 time_beg, time;
+ int ret = 0, i;
+
+ _Static_assert(ARRAY_SIZE(((struct rqspinlock_held){}).locks) == 31,
+ "RES_NR_HELD assumed to be 31");
+
+ for (i = 0; i < 34; i++) {
+ int key = i;
+
+ /* We cannot pass in i as it will get spilled/filled by the compiler and
+ * loses bounds in verifier state.
+ */
+ e = bpf_map_lookup_elem(&arrmap, &key);
+ if (!e)
+ return 1;
+ locks[i] = &e->lock;
+ }
+
+ for (; i < 48; i++) {
+ int key = i - 2;
+
+ /* We cannot pass in i as it will get spilled/filled by the compiler and
+ * loses bounds in verifier state.
+ */
+ e = bpf_map_lookup_elem(&arrmap, &key);
+ if (!e)
+ return 1;
+ locks[i] = &e->lock;
+ }
+
+ time_beg = bpf_ktime_get_ns();
+ for (i = 0; i < 34; i++) {
+ if (bpf_res_spin_lock(locks[i]))
+ goto end;
+ }
+
+ /* Trigger AA, after exhausting entries in the held lock table. This
+ * time, only the timeout can save us, as AA detection won't succeed.
+ */
+ if (!bpf_res_spin_lock(locks[34])) {
+ bpf_res_spin_unlock(locks[34]);
+ ret = 1;
+ goto end;
+ }
+
+end:
+ for (i = i - 1; i >= 0; i--)
+ bpf_res_spin_unlock(locks[i]);
+ time = bpf_ktime_get_ns() - time_beg;
+ /* Time spent should be easily above our limit (1/4 s), since AA
+ * detection won't be expedited due to lack of held lock entry.
+ */
+ return ret ?: (time > 1000000000 / 4 ? 0 : 1);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c
new file mode 100644
index 000000000000..330682a88c16
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024-2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct arr_elem {
+ struct bpf_res_spin_lock lock;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct arr_elem);
+} arrmap SEC(".maps");
+
+long value;
+
+struct bpf_spin_lock lock __hidden SEC(".data.A");
+struct bpf_res_spin_lock res_lock __hidden SEC(".data.B");
+
+SEC("?tc")
+__failure __msg("point to map value or allocated object")
+int res_spin_lock_arg(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ bpf_res_spin_lock((struct bpf_res_spin_lock *)bpf_core_cast(&elem->lock, struct __sk_buff));
+ bpf_res_spin_lock(&elem->lock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("AA deadlock detected")
+int res_spin_lock_AA(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ bpf_res_spin_lock(&elem->lock);
+ bpf_res_spin_lock(&elem->lock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("AA deadlock detected")
+int res_spin_lock_cond_AA(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ if (bpf_res_spin_lock(&elem->lock))
+ return 0;
+ bpf_res_spin_lock(&elem->lock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("unlock of different lock")
+int res_spin_lock_mismatch_1(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ if (bpf_res_spin_lock(&elem->lock))
+ return 0;
+ bpf_res_spin_unlock(&res_lock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("unlock of different lock")
+int res_spin_lock_mismatch_2(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ if (bpf_res_spin_lock(&res_lock))
+ return 0;
+ bpf_res_spin_unlock(&elem->lock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("unlock of different lock")
+int res_spin_lock_irq_mismatch_1(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+ unsigned long f1;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ bpf_local_irq_save(&f1);
+ if (bpf_res_spin_lock(&res_lock))
+ return 0;
+ bpf_res_spin_unlock_irqrestore(&res_lock, &f1);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("unlock of different lock")
+int res_spin_lock_irq_mismatch_2(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+ unsigned long f1;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ if (bpf_res_spin_lock_irqsave(&res_lock, &f1))
+ return 0;
+ bpf_res_spin_unlock(&res_lock);
+ return 0;
+}
+
+SEC("?tc")
+__success
+int res_spin_lock_ooo(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ if (bpf_res_spin_lock(&res_lock))
+ return 0;
+ if (bpf_res_spin_lock(&elem->lock)) {
+ bpf_res_spin_unlock(&res_lock);
+ return 0;
+ }
+ bpf_res_spin_unlock(&elem->lock);
+ bpf_res_spin_unlock(&res_lock);
+ return 0;
+}
+
+SEC("?tc")
+__success
+int res_spin_lock_ooo_irq(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+ unsigned long f1, f2;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ if (bpf_res_spin_lock_irqsave(&res_lock, &f1))
+ return 0;
+ if (bpf_res_spin_lock_irqsave(&elem->lock, &f2)) {
+ bpf_res_spin_unlock_irqrestore(&res_lock, &f1);
+ /* We won't have a unreleased IRQ flag error here. */
+ return 0;
+ }
+ bpf_res_spin_unlock_irqrestore(&elem->lock, &f2);
+ bpf_res_spin_unlock_irqrestore(&res_lock, &f1);
+ return 0;
+}
+
+struct bpf_res_spin_lock lock1 __hidden SEC(".data.OO1");
+struct bpf_res_spin_lock lock2 __hidden SEC(".data.OO2");
+
+SEC("?tc")
+__failure __msg("bpf_res_spin_unlock cannot be out of order")
+int res_spin_lock_ooo_unlock(struct __sk_buff *ctx)
+{
+ if (bpf_res_spin_lock(&lock1))
+ return 0;
+ if (bpf_res_spin_lock(&lock2)) {
+ bpf_res_spin_unlock(&lock1);
+ return 0;
+ }
+ bpf_res_spin_unlock(&lock1);
+ bpf_res_spin_unlock(&lock2);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("off 1 doesn't point to 'struct bpf_res_spin_lock' that is at 0")
+int res_spin_lock_bad_off(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ bpf_res_spin_lock((void *)&elem->lock + 1);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("R1 doesn't have constant offset. bpf_res_spin_lock has to be at the constant offset")
+int res_spin_lock_var_off(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+ u64 val = value;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem) {
+ // FIXME: Only inline assembly use in assert macro doesn't emit
+ // BTF definition.
+ bpf_throw(0);
+ return 0;
+ }
+ bpf_assert_range(val, 0, 40);
+ bpf_res_spin_lock((void *)&value + val);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("map 'res_spin.bss' has no valid bpf_res_spin_lock")
+int res_spin_lock_no_lock_map(struct __sk_buff *ctx)
+{
+ bpf_res_spin_lock((void *)&value + 1);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("local 'kptr' has no valid bpf_res_spin_lock")
+int res_spin_lock_no_lock_kptr(struct __sk_buff *ctx)
+{
+ struct { int i; } *p = bpf_obj_new(typeof(*p));
+
+ if (!p)
+ return 0;
+ bpf_res_spin_lock((void *)p);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/set_global_vars.c b/tools/testing/selftests/bpf/progs/set_global_vars.c
new file mode 100644
index 000000000000..9adb5ba4cd4d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/set_global_vars.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include "bpf_experimental.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include <stdbool.h>
+
+char _license[] SEC("license") = "GPL";
+
+enum Enum { EA1 = 0, EA2 = 11 };
+enum Enumu64 {EB1 = 0llu, EB2 = 12llu };
+enum Enums64 { EC1 = 0ll, EC2 = 13ll };
+
+const volatile __s64 var_s64 = -1;
+const volatile __u64 var_u64 = 0;
+const volatile __s32 var_s32 = -1;
+const volatile __u32 var_u32 = 0;
+const volatile __s16 var_s16 = -1;
+const volatile __u16 var_u16 = 0;
+const volatile __s8 var_s8 = -1;
+const volatile __u8 var_u8 = 0;
+const volatile enum Enum var_ea = EA1;
+const volatile enum Enumu64 var_eb = EB1;
+const volatile enum Enums64 var_ec = EC1;
+const volatile bool var_b = false;
+
+char arr[4] = {0};
+
+SEC("socket")
+int test_set_globals(void *ctx)
+{
+ volatile __s8 a;
+
+ a = var_s64;
+ a = var_u64;
+ a = var_s32;
+ a = var_u32;
+ a = var_s16;
+ a = var_u16;
+ a = var_s8;
+ a = var_u8;
+ a = var_ea;
+ a = var_eb;
+ a = var_ec;
+ a = var_b;
+ return a;
+}
diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c
index 6dd4318debbf..0107a24b7522 100644
--- a/tools/testing/selftests/bpf/progs/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c
@@ -61,6 +61,9 @@ static const struct sockopt_test sol_tcp_tests[] = {
{ .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, },
{ .opt = TCP_BPF_SOCK_OPS_CB_FLAGS, .new = BPF_SOCK_OPS_ALL_CB_FLAGS,
.expected = BPF_SOCK_OPS_ALL_CB_FLAGS, },
+ { .opt = TCP_BPF_DELACK_MAX, .new = 30000, .expected = 30000, },
+ { .opt = TCP_BPF_RTO_MIN, .new = 30000, .expected = 30000, },
+ { .opt = TCP_RTO_MAX_MS, .new = 2000, .expected = 2000, },
{ .opt = 0, },
};
diff --git a/tools/testing/selftests/bpf/progs/strncmp_bench.c b/tools/testing/selftests/bpf/progs/strncmp_bench.c
index 18373a7df76e..f47bf88f8d2a 100644
--- a/tools/testing/selftests/bpf/progs/strncmp_bench.c
+++ b/tools/testing/selftests/bpf/progs/strncmp_bench.c
@@ -35,7 +35,10 @@ static __always_inline int local_strncmp(const char *s1, unsigned int sz,
SEC("tp/syscalls/sys_enter_getpgid")
int strncmp_no_helper(void *ctx)
{
- if (local_strncmp(str, cmp_str_len + 1, target) < 0)
+ const char *target_str = target;
+
+ barrier_var(target_str);
+ if (local_strncmp(str, cmp_str_len + 1, target_str) < 0)
__sync_add_and_fetch(&hits, 1);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c
new file mode 100644
index 000000000000..36386b3c23a1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c
@@ -0,0 +1,30 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * allow a referenced kptr or a NULL pointer to be returned. A referenced kptr to task
+ * here is acquried automatically as the task argument is tagged with "__ref".
+ */
+SEC("struct_ops/test_return_ref_kptr")
+struct task_struct *BPF_PROG(kptr_return, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ if (dummy % 2) {
+ bpf_task_release(task);
+ return NULL;
+ }
+ return task;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return,
+};
+
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c
new file mode 100644
index 000000000000..caeea158ef69
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c
@@ -0,0 +1,26 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a non-zero scalar value.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("At program exit the register R0 has smin=1 smax=1 should have been in [0, 0]")
+struct task_struct *BPF_PROG(kptr_return_fail__invalid_scalar, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ bpf_task_release(task);
+ return (struct task_struct *)1;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return_fail__invalid_scalar,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c
new file mode 100644
index 000000000000..b8b4f05c3d7f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c
@@ -0,0 +1,34 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a local kptr.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("At program exit the register R0 is not a known value (ptr_or_null_)")
+struct task_struct *BPF_PROG(kptr_return_fail__local_kptr, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ struct task_struct *t;
+
+ bpf_task_release(task);
+
+ t = bpf_obj_new(typeof(*task));
+ if (!t)
+ return NULL;
+
+ return t;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return_fail__local_kptr,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c
new file mode 100644
index 000000000000..7ddeb28c2329
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c
@@ -0,0 +1,25 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a modified referenced kptr.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("dereference of modified trusted_ptr_ ptr R0 off={{[0-9]+}} disallowed")
+struct task_struct *BPF_PROG(kptr_return_fail__nonzero_offset, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ return (struct task_struct *)&task->jobctl;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return_fail__nonzero_offset,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c
new file mode 100644
index 000000000000..6a2dd5367802
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c
@@ -0,0 +1,30 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a referenced kptr of the wrong type.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("At program exit the register R0 is not a known value (ptr_or_null_)")
+struct task_struct *BPF_PROG(kptr_return_fail__wrong_type, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ struct task_struct *ret;
+
+ ret = (struct task_struct *)bpf_cgroup_acquire(cgrp);
+ bpf_task_release(task);
+
+ return ret;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return_fail__wrong_type,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c
new file mode 100644
index 000000000000..76dcb6089d7f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c
@@ -0,0 +1,31 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__attribute__((nomerge)) extern void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This is a test BPF program that uses struct_ops to access a referenced
+ * kptr argument. This is a test for the verifier to ensure that it
+ * 1) recongnizes the task as a referenced object (i.e., ref_obj_id > 0), and
+ * 2) the same reference can be acquired from multiple paths as long as it
+ * has not been released.
+ */
+SEC("struct_ops/test_refcounted")
+int BPF_PROG(refcounted, int dummy, struct task_struct *task)
+{
+ if (dummy == 1)
+ bpf_task_release(task);
+ else
+ bpf_task_release(task);
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_refcounted = {
+ .test_refcounted = (void *)refcounted,
+};
+
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c
new file mode 100644
index 000000000000..ae074aa62852
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c
@@ -0,0 +1,39 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+extern void bpf_task_release(struct task_struct *p) __ksym;
+
+__noinline int subprog_release(__u64 *ctx __arg_ctx)
+{
+ struct task_struct *task = (struct task_struct *)ctx[1];
+ int dummy = (int)ctx[0];
+
+ bpf_task_release(task);
+
+ return dummy + 1;
+}
+
+/* Test that the verifier rejects a program that contains a global
+ * subprogram with referenced kptr arguments
+ */
+SEC("struct_ops/test_refcounted")
+__failure __log_level(2)
+__msg("Validating subprog_release() func#1...")
+__msg("invalid bpf_context access off=8. Reference may already be released")
+int refcounted_fail__global_subprog(unsigned long long *ctx)
+{
+ struct task_struct *task = (struct task_struct *)ctx[1];
+
+ bpf_task_release(task);
+
+ return subprog_release(ctx);
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_ref_acquire = {
+ .test_refcounted = (void *)refcounted_fail__global_subprog,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c
new file mode 100644
index 000000000000..e945b1a04294
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c
@@ -0,0 +1,22 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Test that the verifier rejects a program that acquires a referenced
+ * kptr through context without releasing the reference
+ */
+SEC("struct_ops/test_refcounted")
+__failure __msg("Unreleased reference id=1 alloc_insn=0")
+int BPF_PROG(refcounted_fail__ref_leak, int dummy,
+ struct task_struct *task)
+{
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_ref_acquire = {
+ .test_refcounted = (void *)refcounted_fail__ref_leak,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c
new file mode 100644
index 000000000000..3b125025a1f2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} prog_array SEC(".maps");
+
+/* Test that the verifier rejects a program with referenced kptr arguments
+ * that tail call
+ */
+SEC("struct_ops/test_refcounted")
+__failure __msg("program with __ref argument cannot tail call")
+int refcounted_fail__tail_call(unsigned long long *ctx)
+{
+ struct task_struct *task = (struct task_struct *)ctx[1];
+
+ bpf_task_release(task);
+ bpf_tail_call(ctx, &prog_array, 0);
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_ref_acquire = {
+ .test_refcounted = (void *)refcounted_fail__tail_call,
+};
+
diff --git a/tools/testing/selftests/bpf/progs/summarization.c b/tools/testing/selftests/bpf/progs/summarization.c
new file mode 100644
index 000000000000..f89effe82c9e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/summarization.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+__noinline
+long changes_pkt_data(struct __sk_buff *sk)
+{
+ return bpf_skb_pull_data(sk, 0);
+}
+
+__noinline __weak
+long does_not_change_pkt_data(struct __sk_buff *sk)
+{
+ return 0;
+}
+
+SEC("?tc")
+int main_changes_with_subprogs(struct __sk_buff *sk)
+{
+ changes_pkt_data(sk);
+ does_not_change_pkt_data(sk);
+ return 0;
+}
+
+SEC("?tc")
+int main_changes(struct __sk_buff *sk)
+{
+ bpf_skb_pull_data(sk, 0);
+ return 0;
+}
+
+SEC("?tc")
+int main_does_not_change(struct __sk_buff *sk)
+{
+ return 0;
+}
+
+__noinline
+long might_sleep(struct pt_regs *ctx __arg_ctx)
+{
+ int i;
+
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+__noinline __weak
+long does_not_sleep(struct pt_regs *ctx __arg_ctx)
+{
+ return 0;
+}
+
+SEC("?uprobe.s")
+int main_might_sleep_with_subprogs(struct pt_regs *ctx)
+{
+ might_sleep(ctx);
+ does_not_sleep(ctx);
+ return 0;
+}
+
+SEC("?uprobe.s")
+int main_might_sleep(struct pt_regs *ctx)
+{
+ int i;
+
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+SEC("?uprobe.s")
+int main_does_not_sleep(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c b/tools/testing/selftests/bpf/progs/summarization_freplace.c
index f9a622705f1b..935f00e0e9ea 100644
--- a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c
+++ b/tools/testing/selftests/bpf/progs/summarization_freplace.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
SEC("?freplace")
@@ -15,4 +15,19 @@ long does_not_change_pkt_data(struct __sk_buff *sk)
return 0;
}
+SEC("?freplace")
+long might_sleep(struct pt_regs *ctx)
+{
+ int i;
+
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+SEC("?freplace")
+long does_not_sleep(struct pt_regs *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
index 44628865fe1d..4fee0fdc7607 100644
--- a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
+++ b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
@@ -51,13 +51,13 @@ out:
}
SEC("lsm/bpf")
-int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
return bpf_link_create_verify(cmd);
}
SEC("lsm.s/bpf")
-int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
return bpf_link_create_verify(cmd);
}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
index 51b3f79df523..448403634eea 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
@@ -15,6 +15,7 @@ struct {
struct core_reloc_arrays_output {
int a2;
+ int a3;
char b123;
int c1c;
int d00d;
@@ -41,6 +42,7 @@ int test_core_arrays(void *ctx)
{
struct core_reloc_arrays *in = (void *)&data.in;
struct core_reloc_arrays_output *out = (void *)&data.out;
+ int *a;
if (CORE_READ(&out->a2, &in->a[2]))
return 1;
@@ -53,6 +55,9 @@ int test_core_arrays(void *ctx)
if (CORE_READ(&out->f01c, &in->f[0][1].c))
return 1;
+ a = __builtin_preserve_access_index(({ in->a; }));
+ out->a3 = a[0] + a[1] + a[2] + a[3];
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_get_xattr.c b/tools/testing/selftests/bpf/progs/test_get_xattr.c
index 66e737720f7c..54305f4c9f2d 100644
--- a/tools/testing/selftests/bpf/progs/test_get_xattr.c
+++ b/tools/testing/selftests/bpf/progs/test_get_xattr.c
@@ -6,6 +6,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_kfuncs.h"
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -17,12 +18,23 @@ static const char expected_value[] = "hello";
char value1[32];
char value2[32];
+/* Matches caller of test_get_xattr() in prog_tests/fs_kfuncs.c */
+static const char xattr_names[][64] = {
+ /* The following work. */
+ "user.kfuncs",
+ "security.bpf.xxx",
+
+ /* The following do not work. */
+ "security.bpf",
+ "security.selinux"
+};
+
SEC("lsm.s/file_open")
int BPF_PROG(test_file_open, struct file *f)
{
struct bpf_dynptr value_ptr;
__u32 pid;
- int ret;
+ int ret, i;
pid = bpf_get_current_pid_tgid() >> 32;
if (pid != monitored_pid)
@@ -30,7 +42,11 @@ int BPF_PROG(test_file_open, struct file *f)
bpf_dynptr_from_mem(value1, sizeof(value1), 0, &value_ptr);
- ret = bpf_get_file_xattr(f, "user.kfuncs", &value_ptr);
+ for (i = 0; i < ARRAY_SIZE(xattr_names); i++) {
+ ret = bpf_get_file_xattr(f, xattr_names[i], &value_ptr);
+ if (ret == sizeof(expected_value))
+ break;
+ }
if (ret != sizeof(expected_value))
return 0;
if (bpf_strncmp(value1, ret, expected_value))
@@ -44,7 +60,7 @@ int BPF_PROG(test_inode_getxattr, struct dentry *dentry, char *name)
{
struct bpf_dynptr value_ptr;
__u32 pid;
- int ret;
+ int ret, i;
pid = bpf_get_current_pid_tgid() >> 32;
if (pid != monitored_pid)
@@ -52,7 +68,11 @@ int BPF_PROG(test_inode_getxattr, struct dentry *dentry, char *name)
bpf_dynptr_from_mem(value2, sizeof(value2), 0, &value_ptr);
- ret = bpf_get_dentry_xattr(dentry, "user.kfuncs", &value_ptr);
+ for (i = 0; i < ARRAY_SIZE(xattr_names); i++) {
+ ret = bpf_get_dentry_xattr(dentry, xattr_names[i], &value_ptr);
+ if (ret == sizeof(expected_value))
+ break;
+ }
if (ret != sizeof(expected_value))
return 0;
if (bpf_strncmp(value2, ret, expected_value))
diff --git a/tools/testing/selftests/bpf/progs/test_kernel_flag.c b/tools/testing/selftests/bpf/progs/test_kernel_flag.c
new file mode 100644
index 000000000000..b45fab3be352
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_kernel_flag.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2025 Microsoft Corporation
+ *
+ * Author: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 monitored_tid;
+
+SEC("lsm.s/bpf")
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
+{
+ __u32 tid;
+
+ tid = bpf_get_current_pid_tgid() & 0xFFFFFFFF;
+ if (!kernel || tid != monitored_tid)
+ return 0;
+ else
+ return -EINVAL;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
index cd4d752bd089..061befb004c2 100644
--- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
@@ -36,7 +36,7 @@ char _license[] SEC("license") = "GPL";
SEC("?lsm.s/bpf")
__failure __msg("cannot pass in dynptr at an offset=-8")
-int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
unsigned long val;
@@ -46,7 +46,7 @@ int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
SEC("?lsm.s/bpf")
__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr")
-int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
unsigned long val = 0;
@@ -55,7 +55,7 @@ int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size)
}
SEC("lsm.s/bpf")
-int BPF_PROG(dynptr_data_null, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(dynptr_data_null, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
struct bpf_key *trusted_keyring;
struct bpf_dynptr ptr;
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_key.c b/tools/testing/selftests/bpf/progs/test_lookup_key.c
index c73776990ae3..cdbbb12f1491 100644
--- a/tools/testing/selftests/bpf/progs/test_lookup_key.c
+++ b/tools/testing/selftests/bpf/progs/test_lookup_key.c
@@ -23,7 +23,7 @@ extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
extern void bpf_key_put(struct bpf_key *key) __ksym;
SEC("lsm.s/bpf")
-int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
struct bpf_key *bkey;
__u32 pid;
diff --git a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
index 2fdc44e76624..89b0cd5a3e06 100644
--- a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
+++ b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
@@ -7,7 +7,7 @@
char tp_name[128];
SEC("lsm.s/bpf")
-int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
switch (cmd) {
case BPF_RAW_TRACEPOINT_OPEN:
diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
index 5eb25c6ad75b..a5be3267dbb0 100644
--- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018 Facebook */
-#include <stdlib.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
diff --git a/tools/testing/selftests/bpf/progs/test_set_remove_xattr.c b/tools/testing/selftests/bpf/progs/test_set_remove_xattr.c
new file mode 100644
index 000000000000..6a612cf168d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_set_remove_xattr.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__u32 monitored_pid;
+
+const char xattr_foo[] = "security.bpf.foo";
+const char xattr_bar[] = "security.bpf.bar";
+static const char xattr_selinux[] = "security.selinux";
+char value_bar[] = "world";
+char read_value[32];
+
+bool set_security_bpf_bar_success;
+bool remove_security_bpf_bar_success;
+bool set_security_selinux_fail;
+bool remove_security_selinux_fail;
+
+char name_buf[32];
+
+static inline bool name_match_foo(const char *name)
+{
+ bpf_probe_read_kernel(name_buf, sizeof(name_buf), name);
+
+ return !bpf_strncmp(name_buf, sizeof(xattr_foo), xattr_foo);
+}
+
+/* Test bpf_set_dentry_xattr and bpf_remove_dentry_xattr */
+SEC("lsm.s/inode_getxattr")
+int BPF_PROG(test_inode_getxattr, struct dentry *dentry, char *name)
+{
+ struct bpf_dynptr value_ptr;
+ __u32 pid;
+ int ret;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ /* Only do the following for security.bpf.foo */
+ if (!name_match_foo(name))
+ return 0;
+
+ bpf_dynptr_from_mem(read_value, sizeof(read_value), 0, &value_ptr);
+
+ /* read security.bpf.bar */
+ ret = bpf_get_dentry_xattr(dentry, xattr_bar, &value_ptr);
+
+ if (ret < 0) {
+ /* If security.bpf.bar doesn't exist, set it */
+ bpf_dynptr_from_mem(value_bar, sizeof(value_bar), 0, &value_ptr);
+
+ ret = bpf_set_dentry_xattr(dentry, xattr_bar, &value_ptr, 0);
+ if (!ret)
+ set_security_bpf_bar_success = true;
+ ret = bpf_set_dentry_xattr(dentry, xattr_selinux, &value_ptr, 0);
+ if (ret)
+ set_security_selinux_fail = true;
+ } else {
+ /* If security.bpf.bar exists, remove it */
+ ret = bpf_remove_dentry_xattr(dentry, xattr_bar);
+ if (!ret)
+ remove_security_bpf_bar_success = true;
+
+ ret = bpf_remove_dentry_xattr(dentry, xattr_selinux);
+ if (ret)
+ remove_security_selinux_fail = true;
+ }
+
+ return 0;
+}
+
+bool locked_set_security_bpf_bar_success;
+bool locked_remove_security_bpf_bar_success;
+bool locked_set_security_selinux_fail;
+bool locked_remove_security_selinux_fail;
+
+/* Test bpf_set_dentry_xattr_locked and bpf_remove_dentry_xattr_locked.
+ * It not necessary to differentiate the _locked version and the
+ * not-_locked version in the BPF program. The verifier will fix them up
+ * properly.
+ */
+SEC("lsm.s/inode_setxattr")
+int BPF_PROG(test_inode_setxattr, struct mnt_idmap *idmap,
+ struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct bpf_dynptr value_ptr;
+ __u32 pid;
+ int ret;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ /* Only do the following for security.bpf.foo */
+ if (!name_match_foo(name))
+ return 0;
+
+ bpf_dynptr_from_mem(read_value, sizeof(read_value), 0, &value_ptr);
+
+ /* read security.bpf.bar */
+ ret = bpf_get_dentry_xattr(dentry, xattr_bar, &value_ptr);
+
+ if (ret < 0) {
+ /* If security.bpf.bar doesn't exist, set it */
+ bpf_dynptr_from_mem(value_bar, sizeof(value_bar), 0, &value_ptr);
+
+ ret = bpf_set_dentry_xattr(dentry, xattr_bar, &value_ptr, 0);
+ if (!ret)
+ locked_set_security_bpf_bar_success = true;
+ ret = bpf_set_dentry_xattr(dentry, xattr_selinux, &value_ptr, 0);
+ if (ret)
+ locked_set_security_selinux_fail = true;
+ } else {
+ /* If security.bpf.bar exists, remove it */
+ ret = bpf_remove_dentry_xattr(dentry, xattr_bar);
+ if (!ret)
+ locked_remove_security_bpf_bar_success = true;
+
+ ret = bpf_remove_dentry_xattr(dentry, xattr_selinux);
+ if (ret)
+ locked_remove_security_selinux_fail = true;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_strp.c b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c
new file mode 100644
index 000000000000..dde3d5bec515
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+int verdict_max_size = 10000;
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map SEC(".maps");
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ __u32 one = 1;
+
+ if (skb->len > verdict_max_size)
+ return SK_PASS;
+
+ return bpf_sk_redirect_map(skb, &sock_map, one, 0);
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict_pass(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser_partial(struct __sk_buff *skb)
+{
+ /* agreement with the test program on a 4-byte size header
+ * and 6-byte body.
+ */
+ if (skb->len < 4) {
+ /* need more header to determine full length */
+ return 0;
+ }
+ /* return full length decoded from header.
+ * the return value may be larger than skb->len which
+ * means framework must wait body coming.
+ */
+ return 10;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
index 1c8b678e2e9a..f678ee6bd7ea 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
@@ -245,4 +245,73 @@ int lock_global_subprog_call2(struct __sk_buff *ctx)
return ret;
}
+int __noinline
+global_subprog_int(int i)
+{
+ if (i)
+ bpf_printk("%p", &i);
+ return i;
+}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+ global_subprog_int(i);
+ return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+ if (!i)
+ global_sleepable_kfunc_subprog(i);
+ return i;
+}
+
+SEC("?syscall")
+int lock_global_sleepable_helper_subprog(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = global_sleepable_helper_subprog(ctx->mark);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("?syscall")
+int lock_global_sleepable_kfunc_subprog(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = global_sleepable_kfunc_subprog(ctx->mark);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("?syscall")
+int lock_global_sleepable_subprog_indirect(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = global_subprog_calling_sleepable_global(ctx->mark);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
index 7e750309ce27..0b74b8bd22e8 100644
--- a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
+++ b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
@@ -49,7 +49,7 @@ out:
}
SEC("lsm.s/bpf")
-int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
struct cgroup *cgrp = NULL;
struct task_struct *task;
diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c
index 505aab9a5234..096488f47fbc 100644
--- a/tools/testing/selftests/bpf/progs/test_usdt.c
+++ b/tools/testing/selftests/bpf/progs/test_usdt.c
@@ -11,6 +11,7 @@ int usdt0_called;
u64 usdt0_cookie;
int usdt0_arg_cnt;
int usdt0_arg_ret;
+int usdt0_arg_size;
SEC("usdt")
int usdt0(struct pt_regs *ctx)
@@ -26,6 +27,7 @@ int usdt0(struct pt_regs *ctx)
usdt0_arg_cnt = bpf_usdt_arg_cnt(ctx);
/* should return -ENOENT for any arg_num */
usdt0_arg_ret = bpf_usdt_arg(ctx, bpf_get_prandom_u32(), &tmp);
+ usdt0_arg_size = bpf_usdt_arg_size(ctx, bpf_get_prandom_u32());
return 0;
}
@@ -34,6 +36,7 @@ u64 usdt3_cookie;
int usdt3_arg_cnt;
int usdt3_arg_rets[3];
u64 usdt3_args[3];
+int usdt3_arg_sizes[3];
SEC("usdt//proc/self/exe:test:usdt3")
int usdt3(struct pt_regs *ctx)
@@ -50,12 +53,15 @@ int usdt3(struct pt_regs *ctx)
usdt3_arg_rets[0] = bpf_usdt_arg(ctx, 0, &tmp);
usdt3_args[0] = (int)tmp;
+ usdt3_arg_sizes[0] = bpf_usdt_arg_size(ctx, 0);
usdt3_arg_rets[1] = bpf_usdt_arg(ctx, 1, &tmp);
usdt3_args[1] = (long)tmp;
+ usdt3_arg_sizes[1] = bpf_usdt_arg_size(ctx, 1);
usdt3_arg_rets[2] = bpf_usdt_arg(ctx, 2, &tmp);
usdt3_args[2] = (uintptr_t)tmp;
+ usdt3_arg_sizes[2] = bpf_usdt_arg_size(ctx, 2);
return 0;
}
@@ -64,12 +70,15 @@ int usdt12_called;
u64 usdt12_cookie;
int usdt12_arg_cnt;
u64 usdt12_args[12];
+int usdt12_arg_sizes[12];
SEC("usdt//proc/self/exe:test:usdt12")
int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
long a6, __u64 a7, uintptr_t a8, int a9, short a10,
short a11, signed char a12)
{
+ int i;
+
if (my_pid != (bpf_get_current_pid_tgid() >> 32))
return 0;
@@ -90,6 +99,11 @@ int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
usdt12_args[9] = a10;
usdt12_args[10] = a11;
usdt12_args[11] = a12;
+
+ bpf_for(i, 0, 12) {
+ usdt12_arg_sizes[i] = bpf_usdt_arg_size(ctx, i);
+ }
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
index 12034a73ee2d..e96d09e11115 100644
--- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
+++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
@@ -37,7 +37,7 @@ struct {
char _license[] SEC("license") = "GPL";
SEC("lsm.s/bpf")
-int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
struct bpf_dynptr data_ptr, sig_ptr;
struct data *data_val;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
index fe2d71ae0e71..fcf6ca14f2ea 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
@@ -4,37 +4,50 @@
#include <bpf/bpf_helpers.h>
-#define __round_mask(x, y) ((__typeof__(x))((y) - 1))
-#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1)
+#define META_SIZE 32
+
#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
+/* Demonstrates how metadata can be passed from an XDP program to a TC program
+ * using bpf_xdp_adjust_meta.
+ * For the sake of testing the metadata support in drivers, the XDP program uses
+ * a fixed-size payload after the Ethernet header as metadata. The TC program
+ * copies the metadata it receives into a map so it can be checked from
+ * userspace.
+ */
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __uint(value_size, META_SIZE);
+} test_result SEC(".maps");
+
SEC("tc")
int ing_cls(struct __sk_buff *ctx)
{
- __u8 *data, *data_meta, *data_end;
- __u32 diff = 0;
+ __u8 *data, *data_meta;
+ __u32 key = 0;
data_meta = ctx_ptr(ctx, data_meta);
- data_end = ctx_ptr(ctx, data_end);
data = ctx_ptr(ctx, data);
- if (data + ETH_ALEN > data_end ||
- data_meta + round_up(ETH_ALEN, 4) > data)
+ if (data_meta + META_SIZE > data)
return TC_ACT_SHOT;
- diff |= ((__u32 *)data_meta)[0] ^ ((__u32 *)data)[0];
- diff |= ((__u16 *)data_meta)[2] ^ ((__u16 *)data)[2];
+ bpf_map_update_elem(&test_result, &key, data_meta, BPF_ANY);
- return diff ? TC_ACT_SHOT : TC_ACT_OK;
+ return TC_ACT_SHOT;
}
SEC("xdp")
int ing_xdp(struct xdp_md *ctx)
{
- __u8 *data, *data_meta, *data_end;
+ __u8 *data, *data_meta, *data_end, *payload;
+ struct ethhdr *eth;
int ret;
- ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, 4));
+ ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
if (ret < 0)
return XDP_DROP;
@@ -42,11 +55,21 @@ int ing_xdp(struct xdp_md *ctx)
data_end = ctx_ptr(ctx, data_end);
data = ctx_ptr(ctx, data);
- if (data + ETH_ALEN > data_end ||
- data_meta + round_up(ETH_ALEN, 4) > data)
+ eth = (struct ethhdr *)data;
+ payload = data + sizeof(struct ethhdr);
+
+ if (payload + META_SIZE > data_end ||
+ data_meta + META_SIZE > data)
+ return XDP_DROP;
+
+ /* The Linux networking stack may send other packets on the test
+ * interface that interfere with the test. Just drop them.
+ * The test packets can be recognized by their ethertype of zero.
+ */
+ if (eth->h_proto != 0)
return XDP_DROP;
- __builtin_memcpy(data_meta, data, ETH_ALEN);
+ __builtin_memcpy(data_meta, payload, META_SIZE);
return XDP_PASS;
}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
index a7588302268d..a80cc5f2f4f2 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
@@ -102,8 +102,8 @@ bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt)
#define TESTVLAN 4011 /* 0xFAB */
// #define TO_VLAN 4000 /* 0xFA0 (hint 0xOA0 = 160) */
-SEC("xdp_drop_vlan_4011")
-int xdp_prognum0(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_drop_vlan_4011(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
@@ -144,8 +144,8 @@ Load prog with ip tool:
/* Changing VLAN to zero, have same practical effect as removing the VLAN. */
#define TO_VLAN 0
-SEC("xdp_vlan_change")
-int xdp_prognum1(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_vlan_change(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
@@ -178,8 +178,8 @@ int xdp_prognum1(struct xdp_md *ctx)
#endif
#define VLAN_HDR_SZ 4 /* bytes */
-SEC("xdp_vlan_remove_outer")
-int xdp_prognum2(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_vlan_remove_outer(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
@@ -224,8 +224,8 @@ void shift_mac_4bytes_32bit(void *data)
p[1] = p[0];
}
-SEC("xdp_vlan_remove_outer2")
-int xdp_prognum3(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_vlan_remove_outer2(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
@@ -254,8 +254,8 @@ int xdp_prognum3(struct xdp_md *ctx)
* The TC-clsact eBPF programs (currently) need to be attach via TC commands
*/
-SEC("tc_vlan_push")
-int _tc_progA(struct __sk_buff *ctx)
+SEC("tc")
+int tc_vlan_push(struct __sk_buff *ctx)
{
bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN);
diff --git a/tools/testing/selftests/bpf/progs/verifier_array_access.c b/tools/testing/selftests/bpf/progs/verifier_array_access.c
index 29eb9568633f..0a187ff725cc 100644
--- a/tools/testing/selftests/bpf/progs/verifier_array_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_array_access.c
@@ -713,4 +713,19 @@ unsigned int non_stack_key_lookup(void)
return val->index;
}
+SEC("socket")
+__description("doesn't reject UINT64_MAX as s64 for irrelevant maps")
+__success __retval(42)
+unsigned int doesnt_reject_irrelevant_maps(void)
+{
+ __u64 key = 0xFFFFFFFFFFFFFFFF;
+ struct test_val *val;
+
+ val = bpf_map_lookup_elem(&map_hash_48b, &key);
+ if (val)
+ return val->index;
+
+ return 42;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
index 5094c288cfd7..a9be6ae49454 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
@@ -620,23 +620,61 @@ __naked void helper_call_does_not_prevent_bpf_fastcall(void)
SEC("raw_tp")
__arch_x86_64
+__log_level(4) __msg("stack depth 24")
+/* may_goto counter at -24 */
+__xlated("0: *(u64 *)(r10 -24) =")
+/* may_goto timestamp at -16 */
+__xlated("1: *(u64 *)(r10 -16) =")
+__xlated("2: r1 = 1")
+__xlated("...")
+__xlated("4: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+/* may_goto expansion starts */
+__xlated("6: r11 = *(u64 *)(r10 -24)")
+__xlated("7: if r11 == 0x0 goto pc+6")
+__xlated("8: r11 -= 1")
+__xlated("9: if r11 != 0x0 goto pc+2")
+__xlated("10: r11 = -24")
+__xlated("11: call unknown")
+__xlated("12: *(u64 *)(r10 -24) = r11")
+/* may_goto expansion ends */
+__xlated("13: *(u64 *)(r10 -8) = r1")
+__xlated("14: exit")
+__success
+__naked void may_goto_interaction_x86_64(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 16);"
+ ".8byte %[may_goto];"
+ /* just touch some stack at -8 */
+ "*(u64 *)(r10 - 8) = r1;"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id),
+ __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, +1 /* offset */, 0))
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_arm64
__log_level(4) __msg("stack depth 16")
/* may_goto counter at -16 */
__xlated("0: *(u64 *)(r10 -16) =")
__xlated("1: r1 = 1")
-__xlated("...")
-__xlated("3: r0 = &(void __percpu *)(r0)")
-__xlated("...")
+__xlated("2: call bpf_get_smp_processor_id")
/* may_goto expansion starts */
-__xlated("5: r11 = *(u64 *)(r10 -16)")
-__xlated("6: if r11 == 0x0 goto pc+3")
-__xlated("7: r11 -= 1")
-__xlated("8: *(u64 *)(r10 -16) = r11")
+__xlated("3: r11 = *(u64 *)(r10 -16)")
+__xlated("4: if r11 == 0x0 goto pc+3")
+__xlated("5: r11 -= 1")
+__xlated("6: *(u64 *)(r10 -16) = r11")
/* may_goto expansion ends */
-__xlated("9: *(u64 *)(r10 -8) = r1")
-__xlated("10: exit")
+__xlated("7: *(u64 *)(r10 -8) = r1")
+__xlated("8: exit")
__success
-__naked void may_goto_interaction(void)
+__naked void may_goto_interaction_arm64(void)
{
asm volatile (
"r1 = 1;"
diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c
index 05a329ee45ee..d5d8f24df394 100644
--- a/tools/testing/selftests/bpf/progs/verifier_gotol.c
+++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c
@@ -4,11 +4,7 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
-#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
- (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
- defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
- defined(__TARGET_ARCH_loongarch)) && \
- __clang_major__ >= 18
+#ifdef CAN_USE_GOTOL
SEC("socket")
__description("gotol, small_imm")
diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
index e54bb5385bc1..75dd922e4e9f 100644
--- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
+++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
@@ -407,11 +407,7 @@ l0_%=: call %[bpf_jiffies64]; \
: __clobber_all);
}
-#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
- (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
- defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
- defined(__TARGET_ARCH_loongarch)) && \
- __clang_major__ >= 18
+#ifdef CAN_USE_GOTOL
SEC("socket")
__success __retval(0)
__naked void gotol_and_may_goto(void)
diff --git a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
new file mode 100644
index 000000000000..77698d5a19e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
+
+SEC("socket")
+__description("load-acquire, 8-bit")
+__success __success_unpriv __retval(0x12)
+__naked void load_acquire_8(void)
+{
+ asm volatile (
+ "w1 = 0x12;"
+ "*(u8 *)(r10 - 1) = w1;"
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r10 - 1));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -1))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire, 16-bit")
+__success __success_unpriv __retval(0x1234)
+__naked void load_acquire_16(void)
+{
+ asm volatile (
+ "w1 = 0x1234;"
+ "*(u16 *)(r10 - 2) = w1;"
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u16 *)(r10 - 2));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -2))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire, 32-bit")
+__success __success_unpriv __retval(0x12345678)
+__naked void load_acquire_32(void)
+{
+ asm volatile (
+ "w1 = 0x12345678;"
+ "*(u32 *)(r10 - 4) = w1;"
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 4));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -4))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire, 64-bit")
+__success __success_unpriv __retval(0x1234567890abcdef)
+__naked void load_acquire_64(void)
+{
+ asm volatile (
+ "r1 = 0x1234567890abcdef ll;"
+ "*(u64 *)(r10 - 8) = r1;"
+ ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r10 - 8));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -8))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire with uninitialized src_reg")
+__failure __failure_unpriv __msg("R2 !read_ok")
+__naked void load_acquire_with_uninitialized_src_reg(void)
+{
+ asm volatile (
+ ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r2 + 0));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire with non-pointer src_reg")
+__failure __failure_unpriv __msg("R1 invalid mem access 'scalar'")
+__naked void load_acquire_with_non_pointer_src_reg(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r1 + 0));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("misaligned load-acquire")
+__failure __failure_unpriv __msg("misaligned stack access off")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void load_acquire_misaligned(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ "*(u64 *)(r10 - 8) = r1;"
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 5));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -5))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire from ctx pointer")
+__failure __failure_unpriv __msg("BPF_ATOMIC loads from R1 ctx is not allowed")
+__naked void load_acquire_from_ctx_pointer(void)
+{
+ asm volatile (
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r1 + 0));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("load-acquire from pkt pointer")
+__failure __msg("BPF_ATOMIC loads from R2 pkt is not allowed")
+__naked void load_acquire_from_pkt_pointer(void)
+{
+ asm volatile (
+ "r2 = *(u32 *)(r1 + %[xdp_md_data]);"
+ "r3 = *(u32 *)(r1 + %[xdp_md_data_end]);"
+ "r1 = r2;"
+ "r1 += 8;"
+ "if r1 >= r3 goto l0_%=;"
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r2 + 0));
+"l0_%=: r0 = 0;"
+ "exit;"
+ :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0))
+ : __clobber_all);
+}
+
+SEC("flow_dissector")
+__description("load-acquire from flow_keys pointer")
+__failure __msg("BPF_ATOMIC loads from R2 flow_keys is not allowed")
+__naked void load_acquire_from_flow_keys_pointer(void)
+{
+ asm volatile (
+ "r2 = *(u64 *)(r1 + %[__sk_buff_flow_keys]);"
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r2 + 0));
+ "exit;"
+ :
+ : __imm_const(__sk_buff_flow_keys,
+ offsetof(struct __sk_buff, flow_keys)),
+ __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0))
+ : __clobber_all);
+}
+
+SEC("sk_reuseport")
+__description("load-acquire from sock pointer")
+__failure __msg("BPF_ATOMIC loads from R2 sock is not allowed")
+__naked void load_acquire_from_sock_pointer(void)
+{
+ asm volatile (
+ "r2 = *(u64 *)(r1 + %[sk_reuseport_md_sk]);"
+ // w0 = load_acquire((u8 *)(r2 + offsetof(struct bpf_sock, family)));
+ ".8byte %[load_acquire_insn];"
+ "exit;"
+ :
+ : __imm_const(sk_reuseport_md_sk, offsetof(struct sk_reuseport_md, sk)),
+ __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2,
+ offsetof(struct bpf_sock, family)))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire with invalid register R15")
+__failure __failure_unpriv __msg("R15 is invalid")
+__naked void load_acquire_with_invalid_reg(void)
+{
+ asm volatile (
+ ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r15 + 0));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, 15 /* invalid reg */, 0))
+ : __clobber_all);
+}
+
+#else /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+SEC("socket")
+__description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support load-acquire, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
index e81097c96fe2..3966d827f288 100644
--- a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
+++ b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
@@ -69,8 +69,38 @@ __naked void may_goto_batch_1(void)
}
SEC("raw_tp")
-__description("may_goto batch with offsets 2/0")
+__description("may_goto batch with offsets 2/0 - x86_64")
__arch_x86_64
+__xlated("0: *(u64 *)(r10 -16) = 65535")
+__xlated("1: *(u64 *)(r10 -8) = 0")
+__xlated("2: r11 = *(u64 *)(r10 -16)")
+__xlated("3: if r11 == 0x0 goto pc+6")
+__xlated("4: r11 -= 1")
+__xlated("5: if r11 != 0x0 goto pc+2")
+__xlated("6: r11 = -16")
+__xlated("7: call unknown")
+__xlated("8: *(u64 *)(r10 -16) = r11")
+__xlated("9: r0 = 1")
+__xlated("10: r0 = 2")
+__xlated("11: exit")
+__success
+__naked void may_goto_batch_2_x86_64(void)
+{
+ asm volatile (
+ ".8byte %[may_goto1];"
+ ".8byte %[may_goto3];"
+ "r0 = 1;"
+ "r0 = 2;"
+ "exit;"
+ :
+ : __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 2 /* offset */, 0)),
+ __imm_insn(may_goto3, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0))
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__description("may_goto batch with offsets 2/0 - arm64")
+__arch_arm64
__xlated("0: *(u64 *)(r10 -8) = 8388608")
__xlated("1: r11 = *(u64 *)(r10 -8)")
__xlated("2: if r11 == 0x0 goto pc+3")
@@ -80,7 +110,7 @@ __xlated("5: r0 = 1")
__xlated("6: r0 = 2")
__xlated("7: exit")
__success
-__naked void may_goto_batch_2(void)
+__naked void may_goto_batch_2_arm64(void)
{
asm volatile (
".8byte %[may_goto1];"
diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c
index 6b564d4c0986..6662d4b39969 100644
--- a/tools/testing/selftests/bpf/progs/verifier_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_precision.c
@@ -2,6 +2,7 @@
/* Copyright (C) 2023 SUSE LLC */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
#include "bpf_misc.h"
SEC("?raw_tp")
@@ -90,6 +91,54 @@ __naked int bpf_end_bswap(void)
::: __clobber_all);
}
+#if defined(ENABLE_ATOMICS_TESTS) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r3 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (db) r2 = load_acquire((u64 *)(r10 -8))")
+__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1")
+__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8")
+__naked int bpf_load_acquire(void)
+{
+ asm volatile (
+ "r1 = 8;"
+ "*(u64 *)(r10 - 8) = r1;"
+ ".8byte %[load_acquire_insn];" /* r2 = load_acquire((u64 *)(r10 - 8)); */
+ "r3 = r10;"
+ "r3 += r2;" /* mark_precise */
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -8))
+ : __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r2 = r10")
+__msg("mark_precise: frame0: regs=r1 stack= before 2: (79) r1 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 1: (db) store_release((u64 *)(r10 -8), r1)")
+__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8")
+__naked int bpf_store_release(void)
+{
+ asm volatile (
+ "r1 = 8;"
+ ".8byte %[store_release_insn];" /* store_release((u64 *)(r10 - 8), r1); */
+ "r1 = *(u64 *)(r10 - 8);"
+ "r2 = r10;"
+ "r2 += r1;" /* mark_precise */
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8))
+ : __clobber_all);
+}
+
+#endif /* load-acquire, store-release */
#endif /* v4 instruction */
SEC("?raw_tp")
diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
index 417c61cd4b19..24aabc6083fd 100644
--- a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
@@ -481,4 +481,56 @@ l1_%=: r0 = 42; \
: __clobber_all);
}
+SEC("socket")
+__description("PTR_TO_STACK stack size > 512")
+__failure __msg("invalid write to stack R1 off=-520 size=8")
+__naked void stack_check_size_gt_512(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -520; \
+ r0 = 42; \
+ *(u64*)(r1 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+#ifdef __BPF_FEATURE_MAY_GOTO
+SEC("socket")
+__description("PTR_TO_STACK stack size 512 with may_goto with jit")
+__load_if_JITed()
+__success __retval(42)
+__naked void stack_check_size_512_with_may_goto_jit(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -512; \
+ r0 = 42; \
+ *(u32*)(r1 + 0) = r0; \
+ may_goto l0_%=; \
+ r2 = 100; \
+ l0_%=: \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK stack size 512 with may_goto without jit")
+__load_if_no_JITed()
+__failure __msg("stack size 520(extra 8) is too large")
+__naked void stack_check_size_512_with_may_goto(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -512; \
+ r0 = 42; \
+ *(u32*)(r1 + 0) = r0; \
+ may_goto l0_%=; \
+ r2 = 100; \
+ l0_%=: \
+ exit; \
+" ::: __clobber_all);
+}
+#endif
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_store_release.c b/tools/testing/selftests/bpf/progs/verifier_store_release.c
new file mode 100644
index 000000000000..c0442d5bb049
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_store_release.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+
+SEC("socket")
+__description("store-release, 8-bit")
+__success __success_unpriv __retval(0x12)
+__naked void store_release_8(void)
+{
+ asm volatile (
+ "w1 = 0x12;"
+ ".8byte %[store_release_insn];" // store_release((u8 *)(r10 - 1), w1);
+ "w0 = *(u8 *)(r10 - 1);"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -1))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, 16-bit")
+__success __success_unpriv __retval(0x1234)
+__naked void store_release_16(void)
+{
+ asm volatile (
+ "w1 = 0x1234;"
+ ".8byte %[store_release_insn];" // store_release((u16 *)(r10 - 2), w1);
+ "w0 = *(u16 *)(r10 - 2);"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_H, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -2))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, 32-bit")
+__success __success_unpriv __retval(0x12345678)
+__naked void store_release_32(void)
+{
+ asm volatile (
+ "w1 = 0x12345678;"
+ ".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 4), w1);
+ "w0 = *(u32 *)(r10 - 4);"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_W, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -4))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, 64-bit")
+__success __success_unpriv __retval(0x1234567890abcdef)
+__naked void store_release_64(void)
+{
+ asm volatile (
+ "r1 = 0x1234567890abcdef ll;"
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1);
+ "r0 = *(u64 *)(r10 - 8);"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with uninitialized src_reg")
+__failure __failure_unpriv __msg("R2 !read_ok")
+__naked void store_release_with_uninitialized_src_reg(void)
+{
+ asm volatile (
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r2);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_2, -8))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with uninitialized dst_reg")
+__failure __failure_unpriv __msg("R2 !read_ok")
+__naked void store_release_with_uninitialized_dst_reg(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r2 - 8), r1);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_2, BPF_REG_1, -8))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with non-pointer dst_reg")
+__failure __failure_unpriv __msg("R1 invalid mem access 'scalar'")
+__naked void store_release_with_non_pointer_dst_reg(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r1 + 0), r1);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_1, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("misaligned store-release")
+__failure __failure_unpriv __msg("misaligned stack access off")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void store_release_misaligned(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ ".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 5), w0);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_W, BPF_STORE_REL, BPF_REG_10, BPF_REG_0, -5))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release to ctx pointer")
+__failure __failure_unpriv __msg("BPF_ATOMIC stores into R1 ctx is not allowed")
+__naked void store_release_to_ctx_pointer(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ // store_release((u8 *)(r1 + offsetof(struct __sk_buff, cb[0])), w0);
+ ".8byte %[store_release_insn];"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("store-release to pkt pointer")
+__failure __msg("BPF_ATOMIC stores into R2 pkt is not allowed")
+__naked void store_release_to_pkt_pointer(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ "r2 = *(u32 *)(r1 + %[xdp_md_data]);"
+ "r3 = *(u32 *)(r1 + %[xdp_md_data_end]);"
+ "r1 = r2;"
+ "r1 += 8;"
+ "if r1 >= r3 goto l0_%=;"
+ ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0);
+"l0_%=: r0 = 0;"
+ "exit;"
+ :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0))
+ : __clobber_all);
+}
+
+SEC("flow_dissector")
+__description("store-release to flow_keys pointer")
+__failure __msg("BPF_ATOMIC stores into R2 flow_keys is not allowed")
+__naked void store_release_to_flow_keys_pointer(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ "r2 = *(u64 *)(r1 + %[__sk_buff_flow_keys]);"
+ ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0);
+ "exit;"
+ :
+ : __imm_const(__sk_buff_flow_keys,
+ offsetof(struct __sk_buff, flow_keys)),
+ __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0))
+ : __clobber_all);
+}
+
+SEC("sk_reuseport")
+__description("store-release to sock pointer")
+__failure __msg("R2 cannot write into sock")
+__naked void store_release_to_sock_pointer(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ "r2 = *(u64 *)(r1 + %[sk_reuseport_md_sk]);"
+ ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0);
+ "exit;"
+ :
+ : __imm_const(sk_reuseport_md_sk, offsetof(struct sk_reuseport_md, sk)),
+ __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, leak pointer to stack")
+__success __success_unpriv __retval(0)
+__naked void store_release_leak_pointer_to_stack(void)
+{
+ asm volatile (
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1);
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8))
+ : __clobber_all);
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("store-release, leak pointer to map")
+__success __retval(0)
+__failure_unpriv __msg_unpriv("R6 leaks addr into map")
+__naked void store_release_leak_pointer_to_map(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "r1 = %[map_hash_8b] ll;"
+ "r2 = 0;"
+ "*(u64 *)(r10 - 8) = r2;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 == 0 goto l0_%=;"
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r0 + 0), r6);
+"l0_%=:"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_addr(map_hash_8b),
+ __imm(bpf_map_lookup_elem),
+ __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_0, BPF_REG_6, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with invalid register R15")
+__failure __failure_unpriv __msg("R15 is invalid")
+__naked void store_release_with_invalid_reg(void)
+{
+ asm volatile (
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r15 + 0), r1);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, 15 /* invalid reg */, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+#else
+
+SEC("socket")
+__description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support store-release, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
index 682dda8dabbc..50c8958f94e5 100644
--- a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
@@ -1,7 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/if_ether.h>
+
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
struct {
__uint(type, BPF_MAP_TYPE_DEVMAP);
@@ -28,4 +31,89 @@ int xdp_redirect_map_2(struct xdp_md *xdp)
return bpf_redirect_map(&tx_port, 2, 0);
}
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} rxcnt SEC(".maps");
+
+static int xdp_count(struct xdp_md *xdp, __u32 key)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ __u64 *count;
+
+ if (data + sizeof(*eth) > data_end)
+ return XDP_DROP;
+
+ if (bpf_htons(eth->h_proto) == ETH_P_IP) {
+ /* We only count IPv4 packets */
+ count = bpf_map_lookup_elem(&rxcnt, &key);
+ if (count)
+ *count += 1;
+ }
+
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_count_0(struct xdp_md *xdp)
+{
+ return xdp_count(xdp, 0);
+}
+
+SEC("xdp")
+int xdp_count_1(struct xdp_md *xdp)
+{
+ return xdp_count(xdp, 1);
+}
+
+SEC("xdp")
+int xdp_count_2(struct xdp_md *xdp)
+{
+ return xdp_count(xdp, 2);
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __be64);
+} rx_mac SEC(".maps");
+
+static int store_mac(struct xdp_md *xdp, __u32 id)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ __u32 key = id;
+ __be64 mac = 0;
+
+ if (data + sizeof(*eth) > data_end)
+ return XDP_DROP;
+
+ /* Only store IPv4 MAC to avoid being polluted by IPv6 packets */
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ __builtin_memcpy(&mac, eth->h_source, ETH_ALEN);
+ bpf_map_update_elem(&rx_mac, &key, &mac, 0);
+ bpf_printk("%s - %x", __func__, mac);
+ }
+
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int store_mac_1(struct xdp_md *xdp)
+{
+ return store_mac(xdp, 0);
+}
+
+SEC("xdp")
+int store_mac_2(struct xdp_md *xdp)
+{
+ return store_mac(xdp, 1);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
index 97b26a30b59a..bc2945ed8a80 100644
--- a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
@@ -34,6 +34,14 @@ struct {
__uint(max_entries, 128);
} mac_map SEC(".maps");
+/* map to store redirect flags for each protocol*/
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u16);
+ __type(value, __u64);
+ __uint(max_entries, 16);
+} redirect_flags SEC(".maps");
+
SEC("xdp")
int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
{
@@ -41,25 +49,34 @@ int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
void *data = (void *)(long)ctx->data;
int if_index = ctx->ingress_ifindex;
struct ethhdr *eth = data;
+ __u64 *flags_from_map;
__u16 h_proto;
__u64 nh_off;
+ __u64 flags;
nh_off = sizeof(*eth);
if (data + nh_off > data_end)
return XDP_DROP;
- h_proto = eth->h_proto;
-
- /* Using IPv4 for (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) testing */
- if (h_proto == bpf_htons(ETH_P_IP))
- return bpf_redirect_map(&map_all, 0,
- BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
- /* Using IPv6 for none flag testing */
- else if (h_proto == bpf_htons(ETH_P_IPV6))
- return bpf_redirect_map(&map_all, if_index, 0);
- /* All others for BPF_F_BROADCAST testing */
- else
- return bpf_redirect_map(&map_all, 0, BPF_F_BROADCAST);
+ h_proto = bpf_htons(eth->h_proto);
+
+ flags_from_map = bpf_map_lookup_elem(&redirect_flags, &h_proto);
+
+ /* Default flags for IPv4 : (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) */
+ if (h_proto == ETH_P_IP) {
+ flags = flags_from_map ? *flags_from_map : BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS;
+ return bpf_redirect_map(&map_all, 0, flags);
+ }
+ /* Default flags for IPv6 : 0 */
+ if (h_proto == ETH_P_IPV6) {
+ flags = flags_from_map ? *flags_from_map : 0;
+ return bpf_redirect_map(&map_all, if_index, flags);
+ }
+ /* Default flags for others BPF_F_BROADCAST : 0 */
+ else {
+ flags = flags_from_map ? *flags_from_map : BPF_F_BROADCAST;
+ return bpf_redirect_map(&map_all, 0, flags);
+ }
}
/* The following 2 progs are for 2nd devmap prog testing */
diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h
index fb4f4714eeb4..e65889ab4adf 100644
--- a/tools/testing/selftests/bpf/test_btf.h
+++ b/tools/testing/selftests/bpf/test_btf.h
@@ -72,9 +72,15 @@
#define BTF_TYPE_FLOAT_ENC(name, sz) \
BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
+#define BTF_DECL_ATTR_ENC(value, type, component_idx) \
+ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), type), (component_idx)
+
#define BTF_DECL_TAG_ENC(value, type, component_idx) \
BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx)
+#define BTF_TYPE_ATTR_ENC(value, type) \
+ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 1, 0), type)
+
#define BTF_TYPE_TAG_ENC(value, type) \
BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type)
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
index cc9dde507aba..3220f1d28697 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
@@ -1130,6 +1130,7 @@ static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
};
static const struct bpf_verifier_ops bpf_testmod_verifier_ops = {
+ .get_func_proto = bpf_base_func_proto,
.is_valid_access = bpf_testmod_ops_is_valid_access,
};
@@ -1176,10 +1177,25 @@ static int bpf_testmod_ops__test_maybe_null(int dummy,
return 0;
}
+static int bpf_testmod_ops__test_refcounted(int dummy,
+ struct task_struct *task__ref)
+{
+ return 0;
+}
+
+static struct task_struct *
+bpf_testmod_ops__test_return_ref_kptr(int dummy, struct task_struct *task__ref,
+ struct cgroup *cgrp)
+{
+ return NULL;
+}
+
static struct bpf_testmod_ops __bpf_testmod_ops = {
.test_1 = bpf_testmod_test_1,
.test_2 = bpf_testmod_test_2,
.test_maybe_null = bpf_testmod_ops__test_maybe_null,
+ .test_refcounted = bpf_testmod_ops__test_refcounted,
+ .test_return_ref_kptr = bpf_testmod_ops__test_return_ref_kptr,
};
struct bpf_struct_ops bpf_bpf_testmod_ops = {
@@ -1293,6 +1309,85 @@ static int bpf_test_mod_st_ops__test_pro_epilogue(struct st_ops_args *args)
return 0;
}
+static int bpf_cgroup_from_id_id;
+static int bpf_cgroup_release_id;
+
+static int st_ops_gen_prologue_with_kfunc(struct bpf_insn *insn_buf, bool direct_write,
+ const struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ /* r8 = r1; // r8 will be "u64 *ctx".
+ * r1 = 0;
+ * r0 = bpf_cgroup_from_id(r1);
+ * if r0 != 0 goto pc+5;
+ * r6 = r8[0]; // r6 will be "struct st_ops *args".
+ * r7 = r6->a;
+ * r7 += 1000;
+ * r6->a = r7;
+ * goto pc+2;
+ * r1 = r0;
+ * bpf_cgroup_release(r1);
+ * r1 = r8;
+ */
+ *insn++ = BPF_MOV64_REG(BPF_REG_8, BPF_REG_1);
+ *insn++ = BPF_MOV64_IMM(BPF_REG_1, 0);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_from_id_id);
+ *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 5);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_8, 0);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_6, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1000);
+ *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2);
+ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id),
+ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_8);
+ *insn++ = prog->insnsi[0];
+
+ return insn - insn_buf;
+}
+
+static int st_ops_gen_epilogue_with_kfunc(struct bpf_insn *insn_buf, const struct bpf_prog *prog,
+ s16 ctx_stack_off)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ /* r1 = 0;
+ * r6 = 0;
+ * r0 = bpf_cgroup_from_id(r1);
+ * if r0 != 0 goto pc+6;
+ * r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx"
+ * r1 = r1[0]; // r1 will be "struct st_ops *args"
+ * r6 = r1->a;
+ * r6 += 10000;
+ * r1->a = r6;
+ * goto pc+2
+ * r1 = r0;
+ * bpf_cgroup_release(r1);
+ * r0 = r6;
+ * r0 *= 2;
+ * BPF_EXIT;
+ */
+ *insn++ = BPF_MOV64_IMM(BPF_REG_1, 0);
+ *insn++ = BPF_MOV64_IMM(BPF_REG_6, 0);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_from_id_id);
+ *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 6);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_FP, ctx_stack_off);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 10000);
+ *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2);
+ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id),
+ *insn++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_6);
+ *insn++ = BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, 2);
+ *insn++ = BPF_EXIT_INSN();
+
+ return insn - insn_buf;
+}
+
+#define KFUNC_PRO_EPI_PREFIX "test_kfunc_"
static int st_ops_gen_prologue(struct bpf_insn *insn_buf, bool direct_write,
const struct bpf_prog *prog)
{
@@ -1302,6 +1397,9 @@ static int st_ops_gen_prologue(struct bpf_insn *insn_buf, bool direct_write,
strcmp(prog->aux->attach_func_name, "test_pro_epilogue"))
return 0;
+ if (!strncmp(prog->aux->name, KFUNC_PRO_EPI_PREFIX, strlen(KFUNC_PRO_EPI_PREFIX)))
+ return st_ops_gen_prologue_with_kfunc(insn_buf, direct_write, prog);
+
/* r6 = r1[0]; // r6 will be "struct st_ops *args". r1 is "u64 *ctx".
* r7 = r6->a;
* r7 += 1000;
@@ -1325,6 +1423,9 @@ static int st_ops_gen_epilogue(struct bpf_insn *insn_buf, const struct bpf_prog
strcmp(prog->aux->attach_func_name, "test_pro_epilogue"))
return 0;
+ if (!strncmp(prog->aux->name, KFUNC_PRO_EPI_PREFIX, strlen(KFUNC_PRO_EPI_PREFIX)))
+ return st_ops_gen_epilogue_with_kfunc(insn_buf, prog, ctx_stack_off);
+
/* r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx"
* r1 = r1[0]; // r1 will be "struct st_ops *args"
* r6 = r1->a;
@@ -1395,6 +1496,13 @@ static void st_ops_unreg(void *kdata, struct bpf_link *link)
static int st_ops_init(struct btf *btf)
{
+ struct btf *kfunc_btf;
+
+ bpf_cgroup_from_id_id = bpf_find_btf_id("bpf_cgroup_from_id", BTF_KIND_FUNC, &kfunc_btf);
+ bpf_cgroup_release_id = bpf_find_btf_id("bpf_cgroup_release", BTF_KIND_FUNC, &kfunc_btf);
+ if (bpf_cgroup_from_id_id < 0 || bpf_cgroup_release_id < 0)
+ return -EINVAL;
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
index 356803d1c10e..c9fab51f16e2 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
@@ -6,6 +6,7 @@
#include <linux/types.h>
struct task_struct;
+struct cgroup;
struct bpf_testmod_test_read_ctx {
char *buf;
@@ -36,6 +37,11 @@ struct bpf_testmod_ops {
/* Used to test nullable arguments. */
int (*test_maybe_null)(int dummy, struct task_struct *task);
int (*unsupported_ops)(void);
+ /* Used to test ref_acquired arguments. */
+ int (*test_refcounted)(int dummy, struct task_struct *task);
+ /* Used to test returning referenced kptr. */
+ struct task_struct *(*test_return_ref_kptr)(int dummy, struct task_struct *task,
+ struct cgroup *cgrp);
/* The following fields are used to test shadow copies. */
char onebyte;
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 53b06647cf57..49f2fc61061f 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -37,6 +37,7 @@
#define TEST_TAG_JITED_PFX "comment:test_jited="
#define TEST_TAG_JITED_PFX_UNPRIV "comment:test_jited_unpriv="
#define TEST_TAG_CAPS_UNPRIV "comment:test_caps_unpriv="
+#define TEST_TAG_LOAD_MODE_PFX "comment:load_mode="
/* Warning: duplicated in bpf_misc.h */
#define POINTER_VALUE 0xcafe4all
@@ -55,6 +56,11 @@ enum mode {
UNPRIV = 2
};
+enum load_mode {
+ JITED = 1 << 0,
+ NO_JITED = 1 << 1,
+};
+
struct expect_msg {
const char *substr; /* substring match */
regex_t regex;
@@ -87,6 +93,7 @@ struct test_spec {
int prog_flags;
int mode_mask;
int arch_mask;
+ int load_mask;
bool auxiliary;
bool valid;
};
@@ -406,6 +413,7 @@ static int parse_test_spec(struct test_loader *tester,
bool collect_jit = false;
int func_id, i, err = 0;
u32 arch_mask = 0;
+ u32 load_mask = 0;
struct btf *btf;
enum arch arch;
@@ -580,10 +588,22 @@ static int parse_test_spec(struct test_loader *tester,
if (err)
goto cleanup;
spec->mode_mask |= UNPRIV;
+ } else if (str_has_pfx(s, TEST_TAG_LOAD_MODE_PFX)) {
+ val = s + sizeof(TEST_TAG_LOAD_MODE_PFX) - 1;
+ if (strcmp(val, "jited") == 0) {
+ load_mask = JITED;
+ } else if (strcmp(val, "no_jited") == 0) {
+ load_mask = NO_JITED;
+ } else {
+ PRINT_FAIL("bad load spec: '%s'", val);
+ err = -EINVAL;
+ goto cleanup;
+ }
}
}
spec->arch_mask = arch_mask ?: -1;
+ spec->load_mask = load_mask ?: (JITED | NO_JITED);
if (spec->mode_mask == 0)
spec->mode_mask = PRIV;
@@ -773,7 +793,7 @@ static int drop_capabilities(struct cap_state *caps)
err = cap_disable_effective(caps_to_drop, &caps->old_caps);
if (err) {
- PRINT_FAIL("failed to drop capabilities: %i, %s\n", err, strerror(err));
+ PRINT_FAIL("failed to drop capabilities: %i, %s\n", err, strerror(-err));
return err;
}
@@ -790,7 +810,7 @@ static int restore_capabilities(struct cap_state *caps)
err = cap_enable_effective(caps->old_caps, NULL);
if (err)
- PRINT_FAIL("failed to restore capabilities: %i, %s\n", err, strerror(err));
+ PRINT_FAIL("failed to restore capabilities: %i, %s\n", err, strerror(-err));
caps->initialized = false;
return err;
}
@@ -928,6 +948,7 @@ void run_subtest(struct test_loader *tester,
bool unpriv)
{
struct test_subspec *subspec = unpriv ? &spec->unpriv : &spec->priv;
+ int current_runtime = is_jit_enabled() ? JITED : NO_JITED;
struct bpf_program *tprog = NULL, *tprog_iter;
struct bpf_link *link, *links[32] = {};
struct test_spec *spec_iter;
@@ -946,6 +967,11 @@ void run_subtest(struct test_loader *tester,
return;
}
+ if ((current_runtime & spec->load_mask) == 0) {
+ test__skip();
+ return;
+ }
+
if (unpriv) {
if (!can_execute_unpriv(tester, spec)) {
test__skip();
@@ -959,7 +985,7 @@ void run_subtest(struct test_loader *tester,
if (subspec->caps) {
err = cap_enable_effective(subspec->caps, NULL);
if (err) {
- PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(err));
+ PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(-err));
goto subtest_cleanup;
}
}
diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
deleted file mode 100755
index 1e565f47aca9..000000000000
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ /dev/null
@@ -1,476 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Setup/topology:
-#
-# NS1 NS2 NS3
-# veth1 <---> veth2 veth3 <---> veth4 (the top route)
-# veth5 <---> veth6 veth7 <---> veth8 (the bottom route)
-#
-# each vethN gets IPv[4|6]_N address
-#
-# IPv*_SRC = IPv*_1
-# IPv*_DST = IPv*_4
-#
-# all tests test pings from IPv*_SRC to IPv*_DST
-#
-# by default, routes are configured to allow packets to go
-# IP*_1 <=> IP*_2 <=> IP*_3 <=> IP*_4 (the top route)
-#
-# a GRE device is installed in NS3 with IPv*_GRE, and
-# NS1/NS2 are configured to route packets to IPv*_GRE via IP*_8
-# (the bottom route)
-#
-# Tests:
-#
-# 1. routes NS2->IPv*_DST are brought down, so the only way a ping
-# from IP*_SRC to IP*_DST can work is via IPv*_GRE
-#
-# 2a. in an egress test, a bpf LWT_XMIT program is installed on veth1
-# that encaps the packets with an IP/GRE header to route to IPv*_GRE
-#
-# ping: SRC->[encap at veth1:egress]->GRE:decap->DST
-# ping replies go DST->SRC directly
-#
-# 2b. in an ingress test, a bpf LWT_IN program is installed on veth2
-# that encaps the packets with an IP/GRE header to route to IPv*_GRE
-#
-# ping: SRC->[encap at veth2:ingress]->GRE:decap->DST
-# ping replies go DST->SRC directly
-
-BPF_FILE="test_lwt_ip_encap.bpf.o"
-if [[ $EUID -ne 0 ]]; then
- echo "This script must be run as root"
- echo "FAIL"
- exit 1
-fi
-
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-readonly NS3="ns3-$(mktemp -u XXXXXX)"
-
-readonly IPv4_1="172.16.1.100"
-readonly IPv4_2="172.16.2.100"
-readonly IPv4_3="172.16.3.100"
-readonly IPv4_4="172.16.4.100"
-readonly IPv4_5="172.16.5.100"
-readonly IPv4_6="172.16.6.100"
-readonly IPv4_7="172.16.7.100"
-readonly IPv4_8="172.16.8.100"
-readonly IPv4_GRE="172.16.16.100"
-
-readonly IPv4_SRC=$IPv4_1
-readonly IPv4_DST=$IPv4_4
-
-readonly IPv6_1="fb01::1"
-readonly IPv6_2="fb02::1"
-readonly IPv6_3="fb03::1"
-readonly IPv6_4="fb04::1"
-readonly IPv6_5="fb05::1"
-readonly IPv6_6="fb06::1"
-readonly IPv6_7="fb07::1"
-readonly IPv6_8="fb08::1"
-readonly IPv6_GRE="fb10::1"
-
-readonly IPv6_SRC=$IPv6_1
-readonly IPv6_DST=$IPv6_4
-
-TEST_STATUS=0
-TESTS_SUCCEEDED=0
-TESTS_FAILED=0
-
-TMPFILE=""
-
-process_test_results()
-{
- if [[ "${TEST_STATUS}" -eq 0 ]] ; then
- echo "PASS"
- TESTS_SUCCEEDED=$((TESTS_SUCCEEDED+1))
- else
- echo "FAIL"
- TESTS_FAILED=$((TESTS_FAILED+1))
- fi
-}
-
-print_test_summary_and_exit()
-{
- echo "passed tests: ${TESTS_SUCCEEDED}"
- echo "failed tests: ${TESTS_FAILED}"
- if [ "${TESTS_FAILED}" -eq "0" ] ; then
- exit 0
- else
- exit 1
- fi
-}
-
-setup()
-{
- set -e # exit on error
- TEST_STATUS=0
-
- # create devices and namespaces
- ip netns add "${NS1}"
- ip netns add "${NS2}"
- ip netns add "${NS3}"
-
- # rp_filter gets confused by what these tests are doing, so disable it
- ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0
- ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0
- ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0
-
- # disable IPv6 DAD because it sometimes takes too long and fails tests
- ip netns exec ${NS1} sysctl -wq net.ipv6.conf.all.accept_dad=0
- ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.accept_dad=0
- ip netns exec ${NS3} sysctl -wq net.ipv6.conf.all.accept_dad=0
- ip netns exec ${NS1} sysctl -wq net.ipv6.conf.default.accept_dad=0
- ip netns exec ${NS2} sysctl -wq net.ipv6.conf.default.accept_dad=0
- ip netns exec ${NS3} sysctl -wq net.ipv6.conf.default.accept_dad=0
-
- ip link add veth1 type veth peer name veth2
- ip link add veth3 type veth peer name veth4
- ip link add veth5 type veth peer name veth6
- ip link add veth7 type veth peer name veth8
-
- ip netns exec ${NS2} sysctl -wq net.ipv4.ip_forward=1
- ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip link set veth1 netns ${NS1}
- ip link set veth2 netns ${NS2}
- ip link set veth3 netns ${NS2}
- ip link set veth4 netns ${NS3}
- ip link set veth5 netns ${NS1}
- ip link set veth6 netns ${NS2}
- ip link set veth7 netns ${NS2}
- ip link set veth8 netns ${NS3}
-
- if [ ! -z "${VRF}" ] ; then
- ip -netns ${NS1} link add red type vrf table 1001
- ip -netns ${NS1} link set red up
- ip -netns ${NS1} route add table 1001 unreachable default metric 8192
- ip -netns ${NS1} -6 route add table 1001 unreachable default metric 8192
- ip -netns ${NS1} link set veth1 vrf red
- ip -netns ${NS1} link set veth5 vrf red
-
- ip -netns ${NS2} link add red type vrf table 1001
- ip -netns ${NS2} link set red up
- ip -netns ${NS2} route add table 1001 unreachable default metric 8192
- ip -netns ${NS2} -6 route add table 1001 unreachable default metric 8192
- ip -netns ${NS2} link set veth2 vrf red
- ip -netns ${NS2} link set veth3 vrf red
- ip -netns ${NS2} link set veth6 vrf red
- ip -netns ${NS2} link set veth7 vrf red
- fi
-
- # configure addesses: the top route (1-2-3-4)
- ip -netns ${NS1} addr add ${IPv4_1}/24 dev veth1
- ip -netns ${NS2} addr add ${IPv4_2}/24 dev veth2
- ip -netns ${NS2} addr add ${IPv4_3}/24 dev veth3
- ip -netns ${NS3} addr add ${IPv4_4}/24 dev veth4
- ip -netns ${NS1} -6 addr add ${IPv6_1}/128 nodad dev veth1
- ip -netns ${NS2} -6 addr add ${IPv6_2}/128 nodad dev veth2
- ip -netns ${NS2} -6 addr add ${IPv6_3}/128 nodad dev veth3
- ip -netns ${NS3} -6 addr add ${IPv6_4}/128 nodad dev veth4
-
- # configure addresses: the bottom route (5-6-7-8)
- ip -netns ${NS1} addr add ${IPv4_5}/24 dev veth5
- ip -netns ${NS2} addr add ${IPv4_6}/24 dev veth6
- ip -netns ${NS2} addr add ${IPv4_7}/24 dev veth7
- ip -netns ${NS3} addr add ${IPv4_8}/24 dev veth8
- ip -netns ${NS1} -6 addr add ${IPv6_5}/128 nodad dev veth5
- ip -netns ${NS2} -6 addr add ${IPv6_6}/128 nodad dev veth6
- ip -netns ${NS2} -6 addr add ${IPv6_7}/128 nodad dev veth7
- ip -netns ${NS3} -6 addr add ${IPv6_8}/128 nodad dev veth8
-
- ip -netns ${NS1} link set dev veth1 up
- ip -netns ${NS2} link set dev veth2 up
- ip -netns ${NS2} link set dev veth3 up
- ip -netns ${NS3} link set dev veth4 up
- ip -netns ${NS1} link set dev veth5 up
- ip -netns ${NS2} link set dev veth6 up
- ip -netns ${NS2} link set dev veth7 up
- ip -netns ${NS3} link set dev veth8 up
-
- # configure routes: IP*_SRC -> veth1/IP*_2 (= top route) default;
- # the bottom route to specific bottom addresses
-
- # NS1
- # top route
- ip -netns ${NS1} route add ${IPv4_2}/32 dev veth1 ${VRF}
- ip -netns ${NS1} route add default dev veth1 via ${IPv4_2} ${VRF} # go top by default
- ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1 ${VRF}
- ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} ${VRF} # go top by default
- # bottom route
- ip -netns ${NS1} route add ${IPv4_6}/32 dev veth5 ${VRF}
- ip -netns ${NS1} route add ${IPv4_7}/32 dev veth5 via ${IPv4_6} ${VRF}
- ip -netns ${NS1} route add ${IPv4_8}/32 dev veth5 via ${IPv4_6} ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5 ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6} ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6} ${VRF}
-
- # NS2
- # top route
- ip -netns ${NS2} route add ${IPv4_1}/32 dev veth2 ${VRF}
- ip -netns ${NS2} route add ${IPv4_4}/32 dev veth3 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3 ${VRF}
- # bottom route
- ip -netns ${NS2} route add ${IPv4_5}/32 dev veth6 ${VRF}
- ip -netns ${NS2} route add ${IPv4_8}/32 dev veth7 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7 ${VRF}
-
- # NS3
- # top route
- ip -netns ${NS3} route add ${IPv4_3}/32 dev veth4
- ip -netns ${NS3} route add ${IPv4_1}/32 dev veth4 via ${IPv4_3}
- ip -netns ${NS3} route add ${IPv4_2}/32 dev veth4 via ${IPv4_3}
- ip -netns ${NS3} -6 route add ${IPv6_3}/128 dev veth4
- ip -netns ${NS3} -6 route add ${IPv6_1}/128 dev veth4 via ${IPv6_3}
- ip -netns ${NS3} -6 route add ${IPv6_2}/128 dev veth4 via ${IPv6_3}
- # bottom route
- ip -netns ${NS3} route add ${IPv4_7}/32 dev veth8
- ip -netns ${NS3} route add ${IPv4_5}/32 dev veth8 via ${IPv4_7}
- ip -netns ${NS3} route add ${IPv4_6}/32 dev veth8 via ${IPv4_7}
- ip -netns ${NS3} -6 route add ${IPv6_7}/128 dev veth8
- ip -netns ${NS3} -6 route add ${IPv6_5}/128 dev veth8 via ${IPv6_7}
- ip -netns ${NS3} -6 route add ${IPv6_6}/128 dev veth8 via ${IPv6_7}
-
- # configure IPv4 GRE device in NS3, and a route to it via the "bottom" route
- ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255
- ip -netns ${NS3} link set gre_dev up
- ip -netns ${NS3} addr add ${IPv4_GRE} dev gre_dev
- ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6} ${VRF}
- ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8} ${VRF}
-
-
- # configure IPv6 GRE device in NS3, and a route to it via the "bottom" route
- ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote ${IPv6_1} local ${IPv6_GRE} ttl 255
- ip -netns ${NS3} link set gre6_dev up
- ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev
- ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF}
-
- TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX)
-
- sleep 1 # reduce flakiness
- set +e
-}
-
-cleanup()
-{
- if [ -f ${TMPFILE} ] ; then
- rm ${TMPFILE}
- fi
-
- ip netns del ${NS1} 2> /dev/null
- ip netns del ${NS2} 2> /dev/null
- ip netns del ${NS3} 2> /dev/null
-}
-
-trap cleanup EXIT
-
-remove_routes_to_gredev()
-{
- ip -netns ${NS1} route del ${IPv4_GRE} dev veth5 ${VRF}
- ip -netns ${NS2} route del ${IPv4_GRE} dev veth7 ${VRF}
- ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5 ${VRF}
- ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7 ${VRF}
-}
-
-add_unreachable_routes_to_gredev()
-{
- ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32 ${VRF}
- ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32 ${VRF}
- ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128 ${VRF}
- ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128 ${VRF}
-}
-
-test_ping()
-{
- local readonly PROTO=$1
- local readonly EXPECTED=$2
- local RET=0
-
- if [ "${PROTO}" == "IPv4" ] ; then
- ip netns exec ${NS1} ping -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null
- RET=$?
- elif [ "${PROTO}" == "IPv6" ] ; then
- ip netns exec ${NS1} ping6 -c 1 -W 1 -I veth1 ${IPv6_DST} 2>&1 > /dev/null
- RET=$?
- else
- echo " test_ping: unknown PROTO: ${PROTO}"
- TEST_STATUS=1
- fi
-
- if [ "0" != "${RET}" ]; then
- RET=1
- fi
-
- if [ "${EXPECTED}" != "${RET}" ] ; then
- echo " test_ping failed: expected: ${EXPECTED}; got ${RET}"
- TEST_STATUS=1
- fi
-}
-
-test_gso()
-{
- local readonly PROTO=$1
- local readonly PKT_SZ=5000
- local IP_DST=""
- : > ${TMPFILE} # trim the capture file
-
- # check that nc is present
- command -v nc >/dev/null 2>&1 || \
- { echo >&2 "nc is not available: skipping TSO tests"; return; }
-
- # listen on port 9000, capture TCP into $TMPFILE
- if [ "${PROTO}" == "IPv4" ] ; then
- IP_DST=${IPv4_DST}
- ip netns exec ${NS3} bash -c \
- "nc -4 -l -p 9000 > ${TMPFILE} &"
- elif [ "${PROTO}" == "IPv6" ] ; then
- IP_DST=${IPv6_DST}
- ip netns exec ${NS3} bash -c \
- "nc -6 -l -p 9000 > ${TMPFILE} &"
- RET=$?
- else
- echo " test_gso: unknown PROTO: ${PROTO}"
- TEST_STATUS=1
- fi
- sleep 1 # let nc start listening
-
- # send a packet larger than MTU
- ip netns exec ${NS1} bash -c \
- "dd if=/dev/zero bs=$PKT_SZ count=1 > /dev/tcp/${IP_DST}/9000 2>/dev/null"
- sleep 2 # let the packet get delivered
-
- # verify we received all expected bytes
- SZ=$(stat -c %s ${TMPFILE})
- if [ "$SZ" != "$PKT_SZ" ] ; then
- echo " test_gso failed: ${PROTO}"
- TEST_STATUS=1
- fi
-}
-
-test_egress()
-{
- local readonly ENCAP=$1
- echo "starting egress ${ENCAP} encap test ${VRF}"
- setup
-
- # by default, pings work
- test_ping IPv4 0
- test_ping IPv6 0
-
- # remove NS2->DST routes, ping fails
- ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 ${VRF}
- ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF}
- test_ping IPv4 1
- test_ping IPv6 1
-
- # install replacement routes (LWT/eBPF), pings succeed
- if [ "${ENCAP}" == "IPv4" ] ; then
- ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
- ${BPF_FILE} sec encap_gre dev veth1 ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
- ${BPF_FILE} sec encap_gre dev veth1 ${VRF}
- elif [ "${ENCAP}" == "IPv6" ] ; then
- ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
- ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
- ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF}
- else
- echo " unknown encap ${ENCAP}"
- TEST_STATUS=1
- fi
- test_ping IPv4 0
- test_ping IPv6 0
-
- # skip GSO tests with VRF: VRF routing needs properly assigned
- # source IP/device, which is easy to do with ping and hard with dd/nc.
- if [ -z "${VRF}" ] ; then
- test_gso IPv4
- test_gso IPv6
- fi
-
- # a negative test: remove routes to GRE devices: ping fails
- remove_routes_to_gredev
- test_ping IPv4 1
- test_ping IPv6 1
-
- # another negative test
- add_unreachable_routes_to_gredev
- test_ping IPv4 1
- test_ping IPv6 1
-
- cleanup
- process_test_results
-}
-
-test_ingress()
-{
- local readonly ENCAP=$1
- echo "starting ingress ${ENCAP} encap test ${VRF}"
- setup
-
- # need to wait a bit for IPv6 to autoconf, otherwise
- # ping6 sometimes fails with "unable to bind to address"
-
- # by default, pings work
- test_ping IPv4 0
- test_ping IPv6 0
-
- # remove NS2->DST routes, pings fail
- ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 ${VRF}
- ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF}
- test_ping IPv4 1
- test_ping IPv6 1
-
- # install replacement routes (LWT/eBPF), pings succeed
- if [ "${ENCAP}" == "IPv4" ] ; then
- ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
- ${BPF_FILE} sec encap_gre dev veth2 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
- ${BPF_FILE} sec encap_gre dev veth2 ${VRF}
- elif [ "${ENCAP}" == "IPv6" ] ; then
- ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
- ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
- ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF}
- else
- echo "FAIL: unknown encap ${ENCAP}"
- TEST_STATUS=1
- fi
- test_ping IPv4 0
- test_ping IPv6 0
-
- # a negative test: remove routes to GRE devices: ping fails
- remove_routes_to_gredev
- test_ping IPv4 1
- test_ping IPv6 1
-
- # another negative test
- add_unreachable_routes_to_gredev
- test_ping IPv4 1
- test_ping IPv6 1
-
- cleanup
- process_test_results
-}
-
-VRF=""
-test_egress IPv4
-test_egress IPv6
-test_ingress IPv4
-test_ingress IPv6
-
-VRF="vrf red"
-test_egress IPv4
-test_egress IPv6
-test_ingress IPv4
-test_ingress IPv6
-
-print_test_summary_and_exit
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
deleted file mode 100755
index 0efea2292d6a..000000000000
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh
+++ /dev/null
@@ -1,156 +0,0 @@
-#!/bin/bash
-# Connects 6 network namespaces through veths.
-# Each NS may have different IPv6 global scope addresses :
-# NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6
-# fb00::1 fd00::1 fd00::2 fd00::3 fb00::6
-# fc42::1 fd00::4
-#
-# All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
-# IPv6 header with a Segment Routing Header, with segments :
-# fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
-#
-# 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
-# - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
-# - fd00::2 : remove the TLV, change the flags, add a tag
-# - fd00::3 : apply an End.T action to fd00::4, through routing table 117
-#
-# fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
-# Each End.BPF action will validate the operations applied on the SRH by the
-# previous BPF program in the chain, otherwise the packet is dropped.
-#
-# An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
-# datagram can be read on NS6 when binding to fb00::6.
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-BPF_FILE="test_lwt_seg6local.bpf.o"
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-readonly NS3="ns3-$(mktemp -u XXXXXX)"
-readonly NS4="ns4-$(mktemp -u XXXXXX)"
-readonly NS5="ns5-$(mktemp -u XXXXXX)"
-readonly NS6="ns6-$(mktemp -u XXXXXX)"
-
-msg="skip all tests:"
-if [ $UID != 0 ]; then
- echo $msg please run this as root >&2
- exit $ksft_skip
-fi
-
-TMP_FILE="/tmp/selftest_lwt_seg6local.txt"
-
-cleanup()
-{
- if [ "$?" = "0" ]; then
- echo "selftests: test_lwt_seg6local [PASS]";
- else
- echo "selftests: test_lwt_seg6local [FAILED]";
- fi
-
- set +e
- ip netns del ${NS1} 2> /dev/null
- ip netns del ${NS2} 2> /dev/null
- ip netns del ${NS3} 2> /dev/null
- ip netns del ${NS4} 2> /dev/null
- ip netns del ${NS5} 2> /dev/null
- ip netns del ${NS6} 2> /dev/null
- rm -f $TMP_FILE
-}
-
-set -e
-
-ip netns add ${NS1}
-ip netns add ${NS2}
-ip netns add ${NS3}
-ip netns add ${NS4}
-ip netns add ${NS5}
-ip netns add ${NS6}
-
-trap cleanup 0 2 3 6 9
-
-ip link add veth1 type veth peer name veth2
-ip link add veth3 type veth peer name veth4
-ip link add veth5 type veth peer name veth6
-ip link add veth7 type veth peer name veth8
-ip link add veth9 type veth peer name veth10
-
-ip link set veth1 netns ${NS1}
-ip link set veth2 netns ${NS2}
-ip link set veth3 netns ${NS2}
-ip link set veth4 netns ${NS3}
-ip link set veth5 netns ${NS3}
-ip link set veth6 netns ${NS4}
-ip link set veth7 netns ${NS4}
-ip link set veth8 netns ${NS5}
-ip link set veth9 netns ${NS5}
-ip link set veth10 netns ${NS6}
-
-ip netns exec ${NS1} ip link set dev veth1 up
-ip netns exec ${NS2} ip link set dev veth2 up
-ip netns exec ${NS2} ip link set dev veth3 up
-ip netns exec ${NS3} ip link set dev veth4 up
-ip netns exec ${NS3} ip link set dev veth5 up
-ip netns exec ${NS4} ip link set dev veth6 up
-ip netns exec ${NS4} ip link set dev veth7 up
-ip netns exec ${NS5} ip link set dev veth8 up
-ip netns exec ${NS5} ip link set dev veth9 up
-ip netns exec ${NS6} ip link set dev veth10 up
-ip netns exec ${NS6} ip link set dev lo up
-
-# All link scope addresses and routes required between veths
-ip netns exec ${NS1} ip -6 addr add fb00::12/16 dev veth1 scope link
-ip netns exec ${NS1} ip -6 route add fb00::21 dev veth1 scope link
-ip netns exec ${NS2} ip -6 addr add fb00::21/16 dev veth2 scope link
-ip netns exec ${NS2} ip -6 addr add fb00::34/16 dev veth3 scope link
-ip netns exec ${NS2} ip -6 route add fb00::43 dev veth3 scope link
-ip netns exec ${NS3} ip -6 route add fb00::65 dev veth5 scope link
-ip netns exec ${NS3} ip -6 addr add fb00::43/16 dev veth4 scope link
-ip netns exec ${NS3} ip -6 addr add fb00::56/16 dev veth5 scope link
-ip netns exec ${NS4} ip -6 addr add fb00::65/16 dev veth6 scope link
-ip netns exec ${NS4} ip -6 addr add fb00::78/16 dev veth7 scope link
-ip netns exec ${NS4} ip -6 route add fb00::87 dev veth7 scope link
-ip netns exec ${NS5} ip -6 addr add fb00::87/16 dev veth8 scope link
-ip netns exec ${NS5} ip -6 addr add fb00::910/16 dev veth9 scope link
-ip netns exec ${NS5} ip -6 route add fb00::109 dev veth9 scope link
-ip netns exec ${NS5} ip -6 route add fb00::109 table 117 dev veth9 scope link
-ip netns exec ${NS6} ip -6 addr add fb00::109/16 dev veth10 scope link
-
-ip netns exec ${NS1} ip -6 addr add fb00::1/16 dev lo
-ip netns exec ${NS1} ip -6 route add fb00::6 dev veth1 via fb00::21
-
-ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj ${BPF_FILE} sec encap_srh dev veth2
-ip netns exec ${NS2} ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
-
-ip netns exec ${NS3} ip -6 route add fc42::1 dev veth5 via fb00::65
-ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec add_egr_x dev veth4
-
-ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec pop_egr dev veth6
-ip netns exec ${NS4} ip -6 addr add fc42::1 dev lo
-ip netns exec ${NS4} ip -6 route add fd00::3 dev veth7 via fb00::87
-
-ip netns exec ${NS5} ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
-ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec inspect_t dev veth8
-
-ip netns exec ${NS6} ip -6 addr add fb00::6/16 dev lo
-ip netns exec ${NS6} ip -6 addr add fd00::4/16 dev lo
-
-ip netns exec ${NS1} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${NS2} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${NS3} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${NS4} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${NS5} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-
-ip netns exec ${NS6} sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
-ip netns exec ${NS6} sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
-ip netns exec ${NS6} sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
-
-ip netns exec ${NS6} nc -l -6 -u -d 7330 > $TMP_FILE &
-ip netns exec ${NS1} bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
-sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
-kill -TERM $!
-
-if [[ $(< $TMP_FILE) != "foobar" ]]; then
- exit 1
-fi
-
-exit 0
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 8b40e9496af1..986ce32b113a 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1396,9 +1396,10 @@ static void test_map_stress(void)
#define MAX_DELAY_US 50000
#define MIN_DELAY_RANGE_US 5000
-static bool retry_for_again_or_busy(int err)
+static bool can_retry(int err)
{
- return (err == EAGAIN || err == EBUSY);
+ return (err == EAGAIN || err == EBUSY ||
+ (err == ENOMEM && map_opts.map_flags == BPF_F_NO_PREALLOC));
}
int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
@@ -1451,12 +1452,12 @@ static void test_update_delete(unsigned int fn, void *data)
if (do_update) {
err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES,
- retry_for_again_or_busy);
+ can_retry);
if (err)
printf("error %d %d\n", err, errno);
assert(err == 0);
err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES,
- retry_for_again_or_busy);
+ can_retry);
if (err)
printf("error %d %d\n", err, errno);
assert(err == 0);
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index c9e745d49493..309d9d4a8ace 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -88,7 +88,9 @@ static void stdio_hijack(char **log_buf, size_t *log_cnt)
#endif
}
-static void stdio_restore_cleanup(void)
+static pthread_mutex_t stdout_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static void stdio_restore(void)
{
#ifdef __GLIBC__
if (verbose() && env.worker_id == -1) {
@@ -98,34 +100,33 @@ static void stdio_restore_cleanup(void)
fflush(stdout);
+ pthread_mutex_lock(&stdout_lock);
+
if (env.subtest_state) {
- fclose(env.subtest_state->stdout_saved);
+ if (env.subtest_state->stdout_saved)
+ fclose(env.subtest_state->stdout_saved);
env.subtest_state->stdout_saved = NULL;
stdout = env.test_state->stdout_saved;
stderr = env.test_state->stdout_saved;
} else {
- fclose(env.test_state->stdout_saved);
+ if (env.test_state->stdout_saved)
+ fclose(env.test_state->stdout_saved);
env.test_state->stdout_saved = NULL;
+ stdout = env.stdout_saved;
+ stderr = env.stderr_saved;
}
+
+ pthread_mutex_unlock(&stdout_lock);
#endif
}
-static void stdio_restore(void)
+static int traffic_monitor_print_fn(const char *format, va_list args)
{
-#ifdef __GLIBC__
- if (verbose() && env.worker_id == -1) {
- /* nothing to do, output to stdout by default */
- return;
- }
-
- if (stdout == env.stdout_saved)
- return;
-
- stdio_restore_cleanup();
+ pthread_mutex_lock(&stdout_lock);
+ vfprintf(stdout, format, args);
+ pthread_mutex_unlock(&stdout_lock);
- stdout = env.stdout_saved;
- stderr = env.stderr_saved;
-#endif
+ return 0;
}
/* Adapted from perf/util/string.c */
@@ -474,8 +475,6 @@ static void dump_test_log(const struct prog_test_def *test,
print_test_result(test, test_state);
}
-static void stdio_restore(void);
-
/* A bunch of tests set custom affinity per-thread and/or per-process. Reset
* it after each test/sub-test.
*/
@@ -490,13 +489,11 @@ static void reset_affinity(void)
err = sched_setaffinity(0, sizeof(cpuset), &cpuset);
if (err < 0) {
- stdio_restore();
fprintf(stderr, "Failed to reset process affinity: %d!\n", err);
exit(EXIT_ERR_SETUP_INFRA);
}
err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
if (err < 0) {
- stdio_restore();
fprintf(stderr, "Failed to reset thread affinity: %d!\n", err);
exit(EXIT_ERR_SETUP_INFRA);
}
@@ -514,7 +511,6 @@ static void save_netns(void)
static void restore_netns(void)
{
if (setns(env.saved_netns_fd, CLONE_NEWNET) == -1) {
- stdio_restore();
perror("setns(CLONE_NEWNS)");
exit(EXIT_ERR_SETUP_INFRA);
}
@@ -541,7 +537,8 @@ void test__end_subtest(void)
test_result(subtest_state->error_cnt,
subtest_state->skipped));
- stdio_restore_cleanup();
+ stdio_restore();
+
env.subtest_state = NULL;
}
@@ -1270,8 +1267,10 @@ void crash_handler(int signum)
sz = backtrace(bt, ARRAY_SIZE(bt));
- if (env.stdout_saved)
- stdio_restore();
+ fflush(stdout);
+ stdout = env.stdout_saved;
+ stderr = env.stderr_saved;
+
if (env.test) {
env.test_state->error_cnt++;
dump_test_log(env.test, env.test_state, true, false, NULL);
@@ -1365,10 +1364,19 @@ static int recv_message(int sock, struct msg *msg)
return ret;
}
+static bool ns_is_needed(const char *test_name)
+{
+ if (strlen(test_name) < 3)
+ return false;
+
+ return !strncmp(test_name, "ns_", 3);
+}
+
static void run_one_test(int test_num)
{
struct prog_test_def *test = &prog_test_defs[test_num];
struct test_state *state = &test_states[test_num];
+ struct netns_obj *ns = NULL;
env.test = test;
env.test_state = state;
@@ -1376,10 +1384,13 @@ static void run_one_test(int test_num)
stdio_hijack(&state->log_buf, &state->log_cnt);
watchdog_start();
+ if (ns_is_needed(test->test_name))
+ ns = netns_new(test->test_name, true);
if (test->run_test)
test->run_test();
else if (test->run_serial_test)
test->run_serial_test();
+ netns_free(ns);
watchdog_stop();
/* ensure last sub-test is finalized properly */
@@ -1388,6 +1399,8 @@ static void run_one_test(int test_num)
state->tested = true;
+ stdio_restore();
+
if (verbose() && env.worker_id == -1)
print_test_result(test, state);
@@ -1396,7 +1409,6 @@ static void run_one_test(int test_num)
if (test->need_cgroup_cleanup)
cleanup_cgroup_environment();
- stdio_restore();
free(stop_libbpf_log_capture());
dump_test_log(test, state, false, false, NULL);
@@ -1931,6 +1943,9 @@ int main(int argc, char **argv)
sigaction(SIGSEGV, &sigact, NULL);
+ env.stdout_saved = stdout;
+ env.stderr_saved = stderr;
+
env.secs_till_notify = 10;
env.secs_till_kill = 120;
err = argp_parse(&argp, argc, argv, 0, NULL, &env);
@@ -1947,6 +1962,8 @@ int main(int argc, char **argv)
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
libbpf_set_print(libbpf_print_fn);
+ traffic_monitor_set_print(traffic_monitor_print_fn);
+
srand(time(NULL));
env.jit_enabled = is_jit_enabled();
@@ -1957,9 +1974,6 @@ int main(int argc, char **argv)
return -1;
}
- env.stdout_saved = stdout;
- env.stderr_saved = stderr;
-
env.has_testmod = true;
if (!env.list_test_names) {
/* ensure previous instance of the module is unloaded */
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 404d0d4915d5..870694f2a359 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -427,6 +427,14 @@ void hexdump(const char *prefix, const void *buf, size_t len);
goto goto_label; \
})
+#define SYS_FAIL(goto_label, fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (!ASSERT_NEQ(0, system(cmd), cmd)) \
+ goto goto_label; \
+ })
+
#define ALL_TO_DEV_NULL " >/dev/null 2>&1"
#define SYS_NOFAIL(fmt, ...) \
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
deleted file mode 100755
index d9661b9988ba..000000000000
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ /dev/null
@@ -1,645 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# End-to-end eBPF tunnel test suite
-# The script tests BPF network tunnel implementation.
-#
-# Topology:
-# ---------
-# root namespace | at_ns0 namespace
-# |
-# ----------- | -----------
-# | tnl dev | | | tnl dev | (overlay network)
-# ----------- | -----------
-# metadata-mode | native-mode
-# with bpf |
-# |
-# ---------- | ----------
-# | veth1 | --------- | veth0 | (underlay network)
-# ---------- peer ----------
-#
-#
-# Device Configuration
-# --------------------
-# Root namespace with metadata-mode tunnel + BPF
-# Device names and addresses:
-# veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay)
-# tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay)
-#
-# Namespace at_ns0 with native tunnel
-# Device names and addresses:
-# veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
-# tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay)
-#
-#
-# End-to-end ping packet flow
-# ---------------------------
-# Most of the tests start by namespace creation, device configuration,
-# then ping the underlay and overlay network. When doing 'ping 10.1.1.100'
-# from root namespace, the following operations happen:
-# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
-# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
-# with remote_ip=172.16.1.100 and others.
-# 3) Outer tunnel header is prepended and route the packet to veth1's egress
-# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0
-# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
-# 6) Forward the packet to the overlay tnl dev
-
-BPF_FILE="test_tunnel_kern.bpf.o"
-BPF_PIN_TUNNEL_DIR="/sys/fs/bpf/tc/tunnel"
-PING_ARG="-c 3 -w 10 -q"
-ret=0
-GREEN='\033[0;92m'
-RED='\033[0;31m'
-NC='\033[0m' # No Color
-
-config_device()
-{
- ip netns add at_ns0
- ip link add veth0 type veth peer name veth1
- ip link set veth0 netns at_ns0
- ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip link set dev veth1 up mtu 1500
- ip addr add dev veth1 172.16.1.200/24
-}
-
-add_gre_tunnel()
-{
- tun_key=
- if [ -n "$1" ]; then
- tun_key="key $1"
- fi
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq $tun_key \
- local 172.16.1.100 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # root namespace
- ip link add dev $DEV type $TYPE $tun_key external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6gretap_tunnel()
-{
-
- # assign ipv6 address
- ip netns exec at_ns0 ip addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
- local ::11 remote ::22
-
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip addr add dev $DEV 10.1.1.200/24
- ip addr add dev $DEV fc80::200/24
- ip link set dev $DEV up
-}
-
-add_erspan_tunnel()
-{
- # at_ns0 namespace
- if [ "$1" == "v1" ]; then
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local 172.16.1.100 remote 172.16.1.200 \
- erspan_ver 1 erspan 123
- else
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local 172.16.1.100 remote 172.16.1.200 \
- erspan_ver 2 erspan_dir egress erspan_hwid 3
- fi
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6erspan_tunnel()
-{
-
- # assign ipv6 address
- ip netns exec at_ns0 ip addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # at_ns0 namespace
- if [ "$1" == "v1" ]; then
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local ::11 remote ::22 \
- erspan_ver 1 erspan 123
- else
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local ::11 remote ::22 \
- erspan_ver 2 erspan_dir egress erspan_hwid 7
- fi
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip addr add dev $DEV 10.1.1.200/24
- ip link set dev $DEV up
-}
-
-add_geneve_tunnel()
-{
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE \
- id 2 dstport 6081 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # root namespace
- ip link add dev $DEV type $TYPE dstport 6081 external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6geneve_tunnel()
-{
- ip netns exec at_ns0 ip addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE id 22 \
- remote ::22 # geneve has no local option
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip addr add dev $DEV 10.1.1.200/24
- ip link set dev $DEV up
-}
-
-add_ipip_tunnel()
-{
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE \
- local 172.16.1.100 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6tnl_tunnel()
-{
- ip netns exec at_ns0 ip addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE \
- local ::11 remote ::22
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip addr add dev $DEV_NS 1::11/96
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip addr add dev $DEV 10.1.1.200/24
- ip addr add dev $DEV 1::22/96
- ip link set dev $DEV up
-}
-
-test_gre()
-{
- TYPE=gretap
- DEV_NS=gretap00
- DEV=gretap11
- ret=0
-
- check $TYPE
- config_device
- add_gre_tunnel 2
- attach_bpf $DEV gre_set_tunnel gre_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_gre_no_tunnel_key()
-{
- TYPE=gre
- DEV_NS=gre00
- DEV=gre11
- ret=0
-
- check $TYPE
- config_device
- add_gre_tunnel
- attach_bpf $DEV gre_set_tunnel_no_key gre_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6gre()
-{
- TYPE=ip6gre
- DEV_NS=ip6gre00
- DEV=ip6gre11
- ret=0
-
- check $TYPE
- config_device
- # reuse the ip6gretap function
- add_ip6gretap_tunnel
- attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
- # underlay
- ping6 $PING_ARG ::11
- # overlay: ipv4 over ipv6
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- ping $PING_ARG 10.1.1.100
- check_err $?
- # overlay: ipv6 over ipv6
- ip netns exec at_ns0 ping6 $PING_ARG fc80::200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6gretap()
-{
- TYPE=ip6gretap
- DEV_NS=ip6gretap00
- DEV=ip6gretap11
- ret=0
-
- check $TYPE
- config_device
- add_ip6gretap_tunnel
- attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
- # underlay
- ping6 $PING_ARG ::11
- # overlay: ipv4 over ipv6
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- ping $PING_ARG 10.1.1.100
- check_err $?
- # overlay: ipv6 over ipv6
- ip netns exec at_ns0 ping6 $PING_ARG fc80::200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_erspan()
-{
- TYPE=erspan
- DEV_NS=erspan00
- DEV=erspan11
- ret=0
-
- check $TYPE
- config_device
- add_erspan_tunnel $1
- attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6erspan()
-{
- TYPE=ip6erspan
- DEV_NS=ip6erspan00
- DEV=ip6erspan11
- ret=0
-
- check $TYPE
- config_device
- add_ip6erspan_tunnel $1
- attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel
- ping6 $PING_ARG ::11
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_geneve()
-{
- TYPE=geneve
- DEV_NS=geneve00
- DEV=geneve11
- ret=0
-
- check $TYPE
- config_device
- add_geneve_tunnel
- attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6geneve()
-{
- TYPE=geneve
- DEV_NS=ip6geneve00
- DEV=ip6geneve11
- ret=0
-
- check $TYPE
- config_device
- add_ip6geneve_tunnel
- attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: ip6$TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
-}
-
-test_ipip()
-{
- TYPE=ipip
- DEV_NS=ipip00
- DEV=ipip11
- ret=0
-
- check $TYPE
- config_device
- add_ipip_tunnel
- ip link set dev veth1 mtu 1500
- attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ipip6()
-{
- TYPE=ip6tnl
- DEV_NS=ipip6tnl00
- DEV=ipip6tnl11
- ret=0
-
- check $TYPE
- config_device
- add_ip6tnl_tunnel
- ip link set dev veth1 mtu 1500
- attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
- # underlay
- ping6 $PING_ARG ::11
- # ip4 over ip6
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6ip6()
-{
- TYPE=ip6tnl
- DEV_NS=ip6ip6tnl00
- DEV=ip6ip6tnl11
- ret=0
-
- check $TYPE
- config_device
- add_ip6tnl_tunnel
- ip link set dev veth1 mtu 1500
- attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel
- # underlay
- ping6 $PING_ARG ::11
- # ip6 over ip6
- ping6 $PING_ARG 1::11
- check_err $?
- ip netns exec at_ns0 ping6 $PING_ARG 1::22
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: ip6$TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
-}
-
-attach_bpf()
-{
- DEV=$1
- SET=$2
- GET=$3
- mkdir -p ${BPF_PIN_TUNNEL_DIR}
- bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR}/
- tc qdisc add dev $DEV clsact
- tc filter add dev $DEV egress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$SET
- tc filter add dev $DEV ingress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$GET
-}
-
-cleanup()
-{
- rm -rf ${BPF_PIN_TUNNEL_DIR}
-
- ip netns delete at_ns0 2> /dev/null
- ip link del veth1 2> /dev/null
- ip link del ipip11 2> /dev/null
- ip link del ipip6tnl11 2> /dev/null
- ip link del ip6ip6tnl11 2> /dev/null
- ip link del gretap11 2> /dev/null
- ip link del gre11 2> /dev/null
- ip link del ip6gre11 2> /dev/null
- ip link del ip6gretap11 2> /dev/null
- ip link del geneve11 2> /dev/null
- ip link del ip6geneve11 2> /dev/null
- ip link del erspan11 2> /dev/null
- ip link del ip6erspan11 2> /dev/null
-}
-
-cleanup_exit()
-{
- echo "CATCH SIGKILL or SIGINT, cleanup and exit"
- cleanup
- exit 0
-}
-
-check()
-{
- ip link help 2>&1 | grep -q "\s$1\s"
- if [ $? -ne 0 ];then
- echo "SKIP $1: iproute2 not support"
- cleanup
- return 1
- fi
-}
-
-enable_debug()
-{
- echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
- echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
- echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control
- echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control
-}
-
-check_err()
-{
- if [ $ret -eq 0 ]; then
- ret=$1
- fi
-}
-
-bpf_tunnel_test()
-{
- local errors=0
-
- echo "Testing GRE tunnel..."
- test_gre
- errors=$(( $errors + $? ))
-
- echo "Testing GRE tunnel (without tunnel keys)..."
- test_gre_no_tunnel_key
- errors=$(( $errors + $? ))
-
- echo "Testing IP6GRE tunnel..."
- test_ip6gre
- errors=$(( $errors + $? ))
-
- echo "Testing IP6GRETAP tunnel..."
- test_ip6gretap
- errors=$(( $errors + $? ))
-
- echo "Testing ERSPAN tunnel..."
- test_erspan v2
- errors=$(( $errors + $? ))
-
- echo "Testing IP6ERSPAN tunnel..."
- test_ip6erspan v2
- errors=$(( $errors + $? ))
-
- echo "Testing GENEVE tunnel..."
- test_geneve
- errors=$(( $errors + $? ))
-
- echo "Testing IP6GENEVE tunnel..."
- test_ip6geneve
- errors=$(( $errors + $? ))
-
- echo "Testing IPIP tunnel..."
- test_ipip
- errors=$(( $errors + $? ))
-
- echo "Testing IPIP6 tunnel..."
- test_ipip6
- errors=$(( $errors + $? ))
-
- echo "Testing IP6IP6 tunnel..."
- test_ip6ip6
- errors=$(( $errors + $? ))
-
- return $errors
-}
-
-trap cleanup 0 3 6
-trap cleanup_exit 2 9
-
-cleanup
-bpf_tunnel_test
-
-if [ $? -ne 0 ]; then
- echo -e "$(basename $0): ${RED}FAIL${NC}"
- exit 1
-fi
-echo -e "$(basename $0): ${GREEN}PASS${NC}"
-exit 0
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
deleted file mode 100755
index 4c3c3fdd2d73..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
+++ /dev/null
@@ -1,214 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Test topology:
-# - - - - - - - - - - - - - - - - - - -
-# | veth1 veth2 veth3 | ns0
-# - -| - - - - - - | - - - - - - | - -
-# --------- --------- ---------
-# | veth0 | | veth0 | | veth0 |
-# --------- --------- ---------
-# ns1 ns2 ns3
-#
-# Test modules:
-# XDP modes: generic, native, native + egress_prog
-#
-# Test cases:
-# ARP: Testing BPF_F_BROADCAST, the ingress interface also should receive
-# the redirects.
-# ns1 -> gw: ns1, ns2, ns3, should receive the arp request
-# IPv4: Testing BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, the ingress
-# interface should not receive the redirects.
-# ns1 -> gw: ns1 should not receive, ns2, ns3 should receive redirects.
-# IPv6: Testing none flag, all the pkts should be redirected back
-# ping test: ns1 -> ns2 (block), echo requests will be redirect back
-# egress_prog:
-# all src mac should be egress interface's mac
-
-# netns numbers
-NUM=3
-IFACES=""
-DRV_MODE="xdpgeneric xdpdrv xdpegress"
-PASS=0
-FAIL=0
-LOG_DIR=$(mktemp -d)
-declare -a NS
-NS[0]="ns0-$(mktemp -u XXXXXX)"
-NS[1]="ns1-$(mktemp -u XXXXXX)"
-NS[2]="ns2-$(mktemp -u XXXXXX)"
-NS[3]="ns3-$(mktemp -u XXXXXX)"
-
-test_pass()
-{
- echo "Pass: $@"
- PASS=$((PASS + 1))
-}
-
-test_fail()
-{
- echo "fail: $@"
- FAIL=$((FAIL + 1))
-}
-
-clean_up()
-{
- for i in $(seq 0 $NUM); do
- ip netns del ${NS[$i]} 2> /dev/null
- done
-}
-
-# Kselftest framework requirement - SKIP code is 4.
-check_env()
-{
- ip link set dev lo xdpgeneric off &>/dev/null
- if [ $? -ne 0 ];then
- echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
- exit 4
- fi
-
- which tcpdump &>/dev/null
- if [ $? -ne 0 ];then
- echo "selftests: [SKIP] Could not run test without tcpdump"
- exit 4
- fi
-}
-
-setup_ns()
-{
- local mode=$1
- IFACES=""
-
- if [ "$mode" = "xdpegress" ]; then
- mode="xdpdrv"
- fi
-
- ip netns add ${NS[0]}
- for i in $(seq $NUM); do
- ip netns add ${NS[$i]}
- ip -n ${NS[$i]} link add veth0 type veth peer name veth$i netns ${NS[0]}
- ip -n ${NS[$i]} link set veth0 up
- ip -n ${NS[0]} link set veth$i up
-
- ip -n ${NS[$i]} addr add 192.0.2.$i/24 dev veth0
- ip -n ${NS[$i]} addr add 2001:db8::$i/64 dev veth0
- # Add a neigh entry for IPv4 ping test
- ip -n ${NS[$i]} neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
- ip -n ${NS[$i]} link set veth0 $mode obj \
- xdp_dummy.bpf.o sec xdp &> /dev/null || \
- { test_fail "Unable to load dummy xdp" && exit 1; }
- IFACES="$IFACES veth$i"
- veth_mac[$i]=$(ip -n ${NS[0]} link show veth$i | awk '/link\/ether/ {print $2}')
- done
-}
-
-do_egress_tests()
-{
- local mode=$1
-
- # mac test
- ip netns exec ${NS[2]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log &
- ip netns exec ${NS[3]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log &
- sleep 0.5
- ip netns exec ${NS[1]} ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
- sleep 0.5
- pkill tcpdump
-
- # mac check
- grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-2_${mode}.log && \
- test_pass "$mode mac ns1-2" || test_fail "$mode mac ns1-2"
- grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-3_${mode}.log && \
- test_pass "$mode mac ns1-3" || test_fail "$mode mac ns1-3"
-}
-
-do_ping_tests()
-{
- local mode=$1
-
- # ping6 test: echo request should be redirect back to itself, not others
- ip netns exec ${NS[1]} ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
-
- ip netns exec ${NS[1]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log &
- ip netns exec ${NS[2]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log &
- ip netns exec ${NS[3]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log &
- sleep 0.5
- # ARP test
- ip netns exec ${NS[1]} arping -q -c 2 -I veth0 192.0.2.254
- # IPv4 test
- ip netns exec ${NS[1]} ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
- # IPv6 test
- ip netns exec ${NS[1]} ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
- sleep 0.5
- pkill tcpdump
-
- # All netns should receive the redirect arp requests
- [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \
- test_pass "$mode arp(F_BROADCAST) ns1-1" || \
- test_fail "$mode arp(F_BROADCAST) ns1-1"
- [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-2_${mode}.log) -eq 2 ] && \
- test_pass "$mode arp(F_BROADCAST) ns1-2" || \
- test_fail "$mode arp(F_BROADCAST) ns1-2"
- [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-3_${mode}.log) -eq 2 ] && \
- test_pass "$mode arp(F_BROADCAST) ns1-3" || \
- test_fail "$mode arp(F_BROADCAST) ns1-3"
-
- # ns1 should not receive the redirect echo request, others should
- [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \
- test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" || \
- test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1"
- [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-2_${mode}.log) -eq 4 ] && \
- test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" || \
- test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2"
- [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-3_${mode}.log) -eq 4 ] && \
- test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" || \
- test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3"
-
- # ns1 should receive the echo request, ns2 should not
- [ $(grep -c "ICMP6, echo request" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \
- test_pass "$mode IPv6 (no flags) ns1-1" || \
- test_fail "$mode IPv6 (no flags) ns1-1"
- [ $(grep -c "ICMP6, echo request" ${LOG_DIR}/ns1-2_${mode}.log) -eq 0 ] && \
- test_pass "$mode IPv6 (no flags) ns1-2" || \
- test_fail "$mode IPv6 (no flags) ns1-2"
-}
-
-do_tests()
-{
- local mode=$1
- local drv_p
-
- case ${mode} in
- xdpdrv) drv_p="-N";;
- xdpegress) drv_p="-X";;
- xdpgeneric) drv_p="-S";;
- esac
-
- ip netns exec ${NS[0]} ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log &
- xdp_pid=$!
- sleep 1
- if ! ps -p $xdp_pid > /dev/null; then
- test_fail "$mode xdp_redirect_multi start failed"
- return 1
- fi
-
- if [ "$mode" = "xdpegress" ]; then
- do_egress_tests $mode
- else
- do_ping_tests $mode
- fi
-
- kill $xdp_pid
-}
-
-check_env
-
-trap clean_up EXIT
-
-for mode in ${DRV_MODE}; do
- setup_ns $mode
- do_tests $mode
- clean_up
-done
-rm -rf ${LOG_DIR}
-
-echo "Summary: PASS $PASS, FAIL $FAIL"
-[ $FAIL -eq 0 ] && exit 0 || exit 1
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh
deleted file mode 100755
index fbcaa9f0120b..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_vlan.sh
+++ /dev/null
@@ -1,233 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-# Author: Jesper Dangaard Brouer <hawk@kernel.org>
-
-# Kselftest framework requirement - SKIP code is 4.
-readonly KSFT_SKIP=4
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-
-# Allow wrapper scripts to name test
-if [ -z "$TESTNAME" ]; then
- TESTNAME=xdp_vlan
-fi
-
-# Default XDP mode
-XDP_MODE=xdpgeneric
-
-usage() {
- echo "Testing XDP + TC eBPF VLAN manipulations: $TESTNAME"
- echo ""
- echo "Usage: $0 [-vfh]"
- echo " -v | --verbose : Verbose"
- echo " --flush : Flush before starting (e.g. after --interactive)"
- echo " --interactive : Keep netns setup running after test-run"
- echo " --mode=XXX : Choose XDP mode (xdp | xdpgeneric | xdpdrv)"
- echo ""
-}
-
-valid_xdp_mode()
-{
- local mode=$1
-
- case "$mode" in
- xdpgeneric | xdpdrv | xdp)
- return 0
- ;;
- *)
- return 1
- esac
-}
-
-cleanup()
-{
- local status=$?
-
- if [ "$status" = "0" ]; then
- echo "selftests: $TESTNAME [PASS]";
- else
- echo "selftests: $TESTNAME [FAILED]";
- fi
-
- if [ -n "$INTERACTIVE" ]; then
- echo "Namespace setup still active explore with:"
- echo " ip netns exec ${NS1} bash"
- echo " ip netns exec ${NS2} bash"
- exit $status
- fi
-
- set +e
- ip link del veth1 2> /dev/null
- ip netns del ${NS1} 2> /dev/null
- ip netns del ${NS2} 2> /dev/null
-}
-
-# Using external program "getopt" to get --long-options
-OPTIONS=$(getopt -o hvfi: \
- --long verbose,flush,help,interactive,debug,mode: -- "$@")
-if (( $? != 0 )); then
- usage
- echo "selftests: $TESTNAME [FAILED] Error calling getopt, unknown option?"
- exit 2
-fi
-eval set -- "$OPTIONS"
-
-## --- Parse command line arguments / parameters ---
-while true; do
- case "$1" in
- -v | --verbose)
- export VERBOSE=yes
- shift
- ;;
- -i | --interactive | --debug )
- INTERACTIVE=yes
- shift
- ;;
- -f | --flush )
- cleanup
- shift
- ;;
- --mode )
- shift
- XDP_MODE=$1
- shift
- ;;
- -- )
- shift
- break
- ;;
- -h | --help )
- usage;
- echo "selftests: $TESTNAME [SKIP] usage help info requested"
- exit $KSFT_SKIP
- ;;
- * )
- shift
- break
- ;;
- esac
-done
-
-if [ "$EUID" -ne 0 ]; then
- echo "selftests: $TESTNAME [FAILED] need root privileges"
- exit 1
-fi
-
-valid_xdp_mode $XDP_MODE
-if [ $? -ne 0 ]; then
- echo "selftests: $TESTNAME [FAILED] unknown XDP mode ($XDP_MODE)"
- exit 1
-fi
-
-ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null
-if [ $? -ne 0 ]; then
- echo "selftests: $TESTNAME [SKIP] need ip xdp support"
- exit $KSFT_SKIP
-fi
-
-# Interactive mode likely require us to cleanup netns
-if [ -n "$INTERACTIVE" ]; then
- ip link del veth1 2> /dev/null
- ip netns del ${NS1} 2> /dev/null
- ip netns del ${NS2} 2> /dev/null
-fi
-
-# Exit on failure
-set -e
-
-# Some shell-tools dependencies
-which ip > /dev/null
-which tc > /dev/null
-which ethtool > /dev/null
-
-# Make rest of shell verbose, showing comments as doc/info
-if [ -n "$VERBOSE" ]; then
- set -v
-fi
-
-# Create two namespaces
-ip netns add ${NS1}
-ip netns add ${NS2}
-
-# Run cleanup if failing or on kill
-trap cleanup 0 2 3 6 9
-
-# Create veth pair
-ip link add veth1 type veth peer name veth2
-
-# Move veth1 and veth2 into the respective namespaces
-ip link set veth1 netns ${NS1}
-ip link set veth2 netns ${NS2}
-
-# NOTICE: XDP require VLAN header inside packet payload
-# - Thus, disable VLAN offloading driver features
-# - For veth REMEMBER TX side VLAN-offload
-#
-# Disable rx-vlan-offload (mostly needed on ns1)
-ip netns exec ${NS1} ethtool -K veth1 rxvlan off
-ip netns exec ${NS2} ethtool -K veth2 rxvlan off
-#
-# Disable tx-vlan-offload (mostly needed on ns2)
-ip netns exec ${NS2} ethtool -K veth2 txvlan off
-ip netns exec ${NS1} ethtool -K veth1 txvlan off
-
-export IPADDR1=100.64.41.1
-export IPADDR2=100.64.41.2
-
-# In ns1/veth1 add IP-addr on plain net_device
-ip netns exec ${NS1} ip addr add ${IPADDR1}/24 dev veth1
-ip netns exec ${NS1} ip link set veth1 up
-
-# In ns2/veth2 create VLAN device
-export VLAN=4011
-export DEVNS2=veth2
-ip netns exec ${NS2} ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN
-ip netns exec ${NS2} ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN
-ip netns exec ${NS2} ip link set $DEVNS2 up
-ip netns exec ${NS2} ip link set $DEVNS2.$VLAN up
-
-# Bringup lo in netns (to avoids confusing people using --interactive)
-ip netns exec ${NS1} ip link set lo up
-ip netns exec ${NS2} ip link set lo up
-
-# At this point, the hosts cannot reach each-other,
-# because ns2 are using VLAN tags on the packets.
-
-ip netns exec ${NS2} sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First ping must fail"'
-
-
-# Now we can use the test_xdp_vlan.c program to pop/push these VLAN tags
-# ----------------------------------------------------------------------
-# In ns1: ingress use XDP to remove VLAN tags
-export DEVNS1=veth1
-export BPF_FILE=test_xdp_vlan.bpf.o
-
-# First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
-export XDP_PROG=xdp_vlan_change
-ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG
-
-# In ns1: egress use TC to add back VLAN tag 4011
-# (del cmd)
-# tc qdisc del dev $DEVNS1 clsact 2> /dev/null
-#
-ip netns exec ${NS1} tc qdisc add dev $DEVNS1 clsact
-ip netns exec ${NS1} tc filter add dev $DEVNS1 egress \
- prio 1 handle 1 bpf da obj $BPF_FILE sec tc_vlan_push
-
-# Now the namespaces can reach each-other, test with ping:
-ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1
-ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2
-
-# Second test: Replace xdp prog, that fully remove vlan header
-#
-# Catch kernel bug for generic-XDP, that does didn't allow us to
-# remove a VLAN header, because skb->protocol still contain VLAN
-# ETH_P_8021Q indication, and this cause overwriting of our changes.
-#
-export XDP_PROG=xdp_vlan_remove_outer2
-ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE off
-ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG
-
-# Now the namespaces should still be able reach each-other, test with ping:
-ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1
-ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh b/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh
deleted file mode 100755
index c515326d6d59..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# Exit on failure
-set -e
-
-# Wrapper script to test generic-XDP
-export TESTNAME=xdp_vlan_mode_generic
-./test_xdp_vlan.sh --mode=xdpgeneric
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh b/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh
deleted file mode 100755
index 5cf7ce1f16c1..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# Exit on failure
-set -e
-
-# Wrapper script to test native-XDP
-export TESTNAME=xdp_vlan_mode_native
-./test_xdp_vlan.sh --mode=xdpdrv
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 06af5029885b..a18972ffdeb6 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -3,6 +3,7 @@
#define _GNU_SOURCE
#include <argp.h>
#include <libgen.h>
+#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <sched.h>
@@ -154,6 +155,16 @@ struct filter {
bool abs;
};
+struct var_preset {
+ char *name;
+ enum { INTEGRAL, ENUMERATOR } type;
+ union {
+ long long ivalue;
+ char *svalue;
+ };
+ bool applied;
+};
+
static struct env {
char **filenames;
int filename_cnt;
@@ -195,6 +206,8 @@ static struct env {
int progs_processed;
int progs_skipped;
int top_src_lines;
+ struct var_preset *presets;
+ int npresets;
} env;
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
@@ -246,16 +259,20 @@ static const struct argp_option opts[] = {
{ "test-reg-invariants", 'r', NULL, 0,
"Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
{ "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" },
+ { "set-global-vars", 'G', "GLOBAL", 0, "Set global variables provided in the expression, for example \"var1 = 1\"" },
{},
};
static int parse_stats(const char *stats_str, struct stat_specs *specs);
static int append_filter(struct filter **filters, int *cnt, const char *str);
static int append_filter_file(const char *path);
+static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr);
+static int append_var_preset_file(const char *filename);
+static int append_file(const char *path);
+static int append_file_from_file(const char *path);
static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
- void *tmp;
int err;
switch (key) {
@@ -353,15 +370,26 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
argp_usage(state);
}
break;
+ case 'G': {
+ if (arg[0] == '@')
+ err = append_var_preset_file(arg + 1);
+ else
+ err = append_var_preset(&env.presets, &env.npresets, arg);
+ if (err) {
+ fprintf(stderr, "Failed to parse global variable presets: %s\n", arg);
+ return err;
+ }
+ break;
+ }
case ARGP_KEY_ARG:
- tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
- if (!tmp)
- return -ENOMEM;
- env.filenames = tmp;
- env.filenames[env.filename_cnt] = strdup(arg);
- if (!env.filenames[env.filename_cnt])
- return -ENOMEM;
- env.filename_cnt++;
+ if (arg[0] == '@')
+ err = append_file_from_file(arg + 1);
+ else
+ err = append_file(arg);
+ if (err) {
+ fprintf(stderr, "Failed to collect BPF object files: %d\n", err);
+ return err;
+ }
break;
default:
return ARGP_ERR_UNKNOWN;
@@ -632,7 +660,7 @@ static int append_filter_file(const char *path)
f = fopen(path, "r");
if (!f) {
err = -errno;
- fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
+ fprintf(stderr, "Failed to open filters in '%s': %s\n", path, strerror(-err));
return err;
}
@@ -662,6 +690,49 @@ static const struct stat_specs default_output_spec = {
},
};
+static int append_file(const char *path)
+{
+ void *tmp;
+
+ tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
+ if (!tmp)
+ return -ENOMEM;
+ env.filenames = tmp;
+ env.filenames[env.filename_cnt] = strdup(path);
+ if (!env.filenames[env.filename_cnt])
+ return -ENOMEM;
+ env.filename_cnt++;
+ return 0;
+}
+
+static int append_file_from_file(const char *path)
+{
+ char buf[1024];
+ int err = 0;
+ FILE *f;
+
+ f = fopen(path, "r");
+ if (!f) {
+ err = -errno;
+ fprintf(stderr, "Failed to open object files list in '%s': %s\n",
+ path, strerror(errno));
+ return err;
+ }
+
+ while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
+ /* lines starting with # are comments, skip them */
+ if (buf[0] == '\0' || buf[0] == '#')
+ continue;
+ err = append_file(buf);
+ if (err)
+ goto cleanup;
+ }
+
+cleanup:
+ fclose(f);
+ return err;
+}
+
static const struct stat_specs default_csv_output_spec = {
.spec_cnt = 14,
.ids = {
@@ -1163,13 +1234,13 @@ static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const ch
bpf_program__set_expected_attach_type(prog, attach_type);
if (!env.quiet) {
- printf("Using guessed program type '%s' for %s/%s...\n",
+ fprintf(stderr, "Using guessed program type '%s' for %s/%s...\n",
libbpf_bpf_prog_type_str(prog_type),
filename, prog_name);
}
} else {
if (!env.quiet) {
- printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
+ fprintf(stderr, "Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
ctx_name, filename, prog_name);
}
}
@@ -1292,6 +1363,261 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
return 0;
};
+static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr)
+{
+ void *tmp;
+ struct var_preset *cur;
+ char var[256], val[256], *val_end;
+ long long value;
+ int n;
+
+ tmp = realloc(*presets, (*cnt + 1) * sizeof(**presets));
+ if (!tmp)
+ return -ENOMEM;
+ *presets = tmp;
+ cur = &(*presets)[*cnt];
+ memset(cur, 0, sizeof(*cur));
+ (*cnt)++;
+
+ if (sscanf(expr, "%s = %s %n", var, val, &n) != 2 || n != strlen(expr)) {
+ fprintf(stderr, "Failed to parse expression '%s'\n", expr);
+ return -EINVAL;
+ }
+
+ if (val[0] == '-' || isdigit(val[0])) {
+ /* must be a number */
+ errno = 0;
+ value = strtoll(val, &val_end, 0);
+ if (errno == ERANGE) {
+ errno = 0;
+ value = strtoull(val, &val_end, 0);
+ }
+ if (errno || *val_end != '\0') {
+ fprintf(stderr, "Failed to parse value '%s'\n", val);
+ return -EINVAL;
+ }
+ cur->ivalue = value;
+ cur->type = INTEGRAL;
+ } else {
+ /* if not a number, consider it enum value */
+ cur->svalue = strdup(val);
+ if (!cur->svalue)
+ return -ENOMEM;
+ cur->type = ENUMERATOR;
+ }
+
+ cur->name = strdup(var);
+ if (!cur->name)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int append_var_preset_file(const char *filename)
+{
+ char buf[1024];
+ FILE *f;
+ int err = 0;
+
+ f = fopen(filename, "rt");
+ if (!f) {
+ err = -errno;
+ fprintf(stderr, "Failed to open presets in '%s': %s\n", filename, strerror(-err));
+ return -EINVAL;
+ }
+
+ while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
+ if (buf[0] == '\0' || buf[0] == '#')
+ continue;
+
+ err = append_var_preset(&env.presets, &env.npresets, buf);
+ if (err)
+ goto cleanup;
+ }
+
+cleanup:
+ fclose(f);
+ return err;
+}
+
+static bool is_signed_type(const struct btf_type *t)
+{
+ if (btf_is_int(t))
+ return btf_int_encoding(t) & BTF_INT_SIGNED;
+ if (btf_is_any_enum(t))
+ return btf_kflag(t);
+ return true;
+}
+
+static int enum_value_from_name(const struct btf *btf, const struct btf_type *t,
+ const char *evalue, long long *retval)
+{
+ if (btf_is_enum(t)) {
+ struct btf_enum *e = btf_enum(t);
+ int i, n = btf_vlen(t);
+
+ for (i = 0; i < n; ++i, ++e) {
+ const char *cur_name = btf__name_by_offset(btf, e->name_off);
+
+ if (strcmp(cur_name, evalue) == 0) {
+ *retval = e->val;
+ return 0;
+ }
+ }
+ } else if (btf_is_enum64(t)) {
+ struct btf_enum64 *e = btf_enum64(t);
+ int i, n = btf_vlen(t);
+
+ for (i = 0; i < n; ++i, ++e) {
+ const char *cur_name = btf__name_by_offset(btf, e->name_off);
+ __u64 value = btf_enum64_value(e);
+
+ if (strcmp(cur_name, evalue) == 0) {
+ *retval = value;
+ return 0;
+ }
+ }
+ }
+ return -EINVAL;
+}
+
+static bool is_preset_supported(const struct btf_type *t)
+{
+ return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t);
+}
+
+static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct btf_type *t,
+ struct bpf_map *map, struct btf_var_secinfo *sinfo,
+ struct var_preset *preset)
+{
+ const struct btf_type *base_type;
+ void *ptr;
+ long long value = preset->ivalue;
+ size_t size;
+
+ base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type));
+ if (!base_type) {
+ fprintf(stderr, "Failed to resolve type %d\n", t->type);
+ return -EINVAL;
+ }
+ if (!is_preset_supported(base_type)) {
+ fprintf(stderr, "Setting value for type %s is not supported\n",
+ btf__name_by_offset(btf, base_type->name_off));
+ return -EINVAL;
+ }
+
+ if (preset->type == ENUMERATOR) {
+ if (btf_is_any_enum(base_type)) {
+ if (enum_value_from_name(btf, base_type, preset->svalue, &value)) {
+ fprintf(stderr,
+ "Failed to find integer value for enum element %s\n",
+ preset->svalue);
+ return -EINVAL;
+ }
+ } else {
+ fprintf(stderr, "Value %s is not supported for type %s\n",
+ preset->svalue, btf__name_by_offset(btf, base_type->name_off));
+ return -EINVAL;
+ }
+ }
+
+ /* Check if value fits into the target variable size */
+ if (sinfo->size < sizeof(value)) {
+ bool is_signed = is_signed_type(base_type);
+ __u32 unsigned_bits = sinfo->size * 8 - (is_signed ? 1 : 0);
+ long long max_val = 1ll << unsigned_bits;
+
+ if (value >= max_val || value < -max_val) {
+ fprintf(stderr,
+ "Variable %s value %lld is out of range [%lld; %lld]\n",
+ btf__name_by_offset(btf, t->name_off), value,
+ is_signed ? -max_val : 0, max_val - 1);
+ return -EINVAL;
+ }
+ }
+
+ ptr = bpf_map__initial_value(map, &size);
+ if (!ptr || sinfo->offset + sinfo->size > size)
+ return -EINVAL;
+
+ if (__BYTE_ORDER == __LITTLE_ENDIAN) {
+ memcpy(ptr + sinfo->offset, &value, sinfo->size);
+ } else { /* __BYTE_ORDER == __BIG_ENDIAN */
+ __u8 src_offset = sizeof(value) - sinfo->size;
+
+ memcpy(ptr + sinfo->offset, (void *)&value + src_offset, sinfo->size);
+ }
+ return 0;
+}
+
+static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, int npresets)
+{
+ struct btf_var_secinfo *sinfo;
+ const char *sec_name;
+ const struct btf_type *t;
+ struct bpf_map *map;
+ struct btf *btf;
+ int i, j, k, n, cnt, err = 0;
+
+ if (npresets == 0)
+ return 0;
+
+ btf = bpf_object__btf(obj);
+ if (!btf)
+ return -EINVAL;
+
+ cnt = btf__type_cnt(btf);
+ for (i = 1; i != cnt; ++i) {
+ t = btf__type_by_id(btf, i);
+
+ if (!btf_is_datasec(t))
+ continue;
+
+ sinfo = btf_var_secinfos(t);
+ sec_name = btf__name_by_offset(btf, t->name_off);
+ map = bpf_object__find_map_by_name(obj, sec_name);
+ if (!map)
+ continue;
+
+ n = btf_vlen(t);
+ for (j = 0; j < n; ++j, ++sinfo) {
+ const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type);
+ const char *var_name;
+
+ if (!btf_is_var(var_type))
+ continue;
+
+ var_name = btf__name_by_offset(btf, var_type->name_off);
+
+ for (k = 0; k < npresets; ++k) {
+ if (strcmp(var_name, presets[k].name) != 0)
+ continue;
+
+ if (presets[k].applied) {
+ fprintf(stderr, "Variable %s is set more than once",
+ var_name);
+ return -EINVAL;
+ }
+
+ err = set_global_var(obj, btf, var_type, map, sinfo, presets + k);
+ if (err)
+ return err;
+
+ presets[k].applied = true;
+ break;
+ }
+ }
+ }
+ for (i = 0; i < npresets; ++i) {
+ if (!presets[i].applied) {
+ fprintf(stderr, "Global variable preset %s has not been applied\n",
+ presets[i].name);
+ }
+ presets[i].applied = false;
+ }
+ return err;
+}
+
static int process_obj(const char *filename)
{
const char *base_filename = basename(strdupa(filename));
@@ -1341,6 +1667,11 @@ static int process_obj(const char *filename)
if (prog_cnt == 1) {
prog = bpf_object__next_program(obj, NULL);
bpf_program__set_autoload(prog, true);
+ err = set_global_vars(obj, env.presets, env.npresets);
+ if (err) {
+ fprintf(stderr, "Failed to set global variables %d\n", err);
+ goto cleanup;
+ }
process_prog(filename, obj, prog);
goto cleanup;
}
@@ -1355,6 +1686,12 @@ static int process_obj(const char *filename)
goto cleanup;
}
+ err = set_global_vars(tobj, env.presets, env.npresets);
+ if (err) {
+ fprintf(stderr, "Failed to set global variables %d\n", err);
+ goto cleanup;
+ }
+
lprog = NULL;
bpf_object__for_each_program(tprog, tobj) {
const char *tprog_name = bpf_program__name(tprog);
@@ -2460,5 +2797,11 @@ int main(int argc, char **argv)
free(env.deny_filters[i].prog_glob);
}
free(env.deny_filters);
+ for (i = 0; i < env.npresets; ++i) {
+ free(env.presets[i].name);
+ if (env.presets[i].type == ENUMERATOR)
+ free(env.presets[i].svalue);
+ }
+ free(env.presets);
return -err;
}
diff --git a/tools/testing/selftests/bpf/with_addr.sh b/tools/testing/selftests/bpf/with_addr.sh
deleted file mode 100755
index ffcd3953f94c..000000000000
--- a/tools/testing/selftests/bpf/with_addr.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# add private ipv4 and ipv6 addresses to loopback
-
-readonly V6_INNER='100::a/128'
-readonly V4_INNER='192.168.0.1/32'
-
-if getopts ":s" opt; then
- readonly SIT_DEV_NAME='sixtofourtest0'
- readonly V6_SIT='2::/64'
- readonly V4_SIT='172.17.0.1/32'
- shift
-fi
-
-fail() {
- echo "error: $*" 1>&2
- exit 1
-}
-
-setup() {
- ip -6 addr add "${V6_INNER}" dev lo || fail 'failed to setup v6 address'
- ip -4 addr add "${V4_INNER}" dev lo || fail 'failed to setup v4 address'
-
- if [[ -n "${V6_SIT}" ]]; then
- ip link add "${SIT_DEV_NAME}" type sit remote any local any \
- || fail 'failed to add sit'
- ip link set dev "${SIT_DEV_NAME}" up \
- || fail 'failed to bring sit device up'
- ip -6 addr add "${V6_SIT}" dev "${SIT_DEV_NAME}" \
- || fail 'failed to setup v6 SIT address'
- ip -4 addr add "${V4_SIT}" dev "${SIT_DEV_NAME}" \
- || fail 'failed to setup v4 SIT address'
- fi
-
- sleep 2 # avoid race causing bind to fail
-}
-
-cleanup() {
- if [[ -n "${V6_SIT}" ]]; then
- ip -4 addr del "${V4_SIT}" dev "${SIT_DEV_NAME}"
- ip -6 addr del "${V6_SIT}" dev "${SIT_DEV_NAME}"
- ip link del "${SIT_DEV_NAME}"
- fi
-
- ip -4 addr del "${V4_INNER}" dev lo
- ip -6 addr del "${V6_INNER}" dev lo
-}
-
-trap cleanup EXIT
-
-setup
-"$@"
-exit "$?"
diff --git a/tools/testing/selftests/bpf/with_tunnels.sh b/tools/testing/selftests/bpf/with_tunnels.sh
deleted file mode 100755
index e24949ed3a20..000000000000
--- a/tools/testing/selftests/bpf/with_tunnels.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# setup tunnels for flow dissection test
-
-readonly SUFFIX="test_$(mktemp -u XXXX)"
-CONFIG="remote 127.0.0.2 local 127.0.0.1 dev lo"
-
-setup() {
- ip link add "ipip_${SUFFIX}" type ipip ${CONFIG}
- ip link add "gre_${SUFFIX}" type gre ${CONFIG}
- ip link add "sit_${SUFFIX}" type sit ${CONFIG}
-
- echo "tunnels before test:"
- ip tunnel show
-
- ip link set "ipip_${SUFFIX}" up
- ip link set "gre_${SUFFIX}" up
- ip link set "sit_${SUFFIX}" up
-}
-
-
-cleanup() {
- ip tunnel del "ipip_${SUFFIX}"
- ip tunnel del "gre_${SUFFIX}"
- ip tunnel del "sit_${SUFFIX}"
-
- echo "tunnels after test:"
- ip tunnel show
-}
-
-trap cleanup EXIT
-
-setup
-"$@"
-exit "$?"
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 6f7b15d6c6ed..3d8de0d4c96a 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -13,6 +13,7 @@
* - UDP 9091 packets trigger TX reply
* - TX HW timestamp is requested and reported back upon completion
* - TX checksum is requested
+ * - TX launch time HW offload is requested for transmission
*/
#include <test_progs.h>
@@ -37,6 +38,15 @@
#include <time.h>
#include <unistd.h>
#include <libgen.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/pkt_sched.h>
+#include <linux/pkt_cls.h>
+#include <linux/ethtool.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
#include "xdp_metadata.h"
@@ -64,6 +74,18 @@ int rxq;
bool skip_tx;
__u64 last_hw_rx_timestamp;
__u64 last_xdp_rx_timestamp;
+__u64 last_launch_time;
+__u64 launch_time_delta_to_hw_rx_timestamp;
+int launch_time_queue;
+
+#define run_command(cmd, ...) \
+({ \
+ char command[1024]; \
+ memset(command, 0, sizeof(command)); \
+ snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \
+ fprintf(stderr, "Running: %s\n", command); \
+ system(command); \
+})
void test__fail(void) { /* for network_helpers.c */ }
@@ -298,6 +320,12 @@ static bool complete_tx(struct xsk *xsk, clockid_t clock_id)
if (meta->completion.tx_timestamp) {
__u64 ref_tstamp = gettime(clock_id);
+ if (launch_time_delta_to_hw_rx_timestamp) {
+ print_tstamp_delta("HW Launch-time",
+ "HW TX-complete-time",
+ last_launch_time,
+ meta->completion.tx_timestamp);
+ }
print_tstamp_delta("HW TX-complete-time", "User TX-complete-time",
meta->completion.tx_timestamp, ref_tstamp);
print_tstamp_delta("XDP RX-time", "User TX-complete-time",
@@ -395,6 +423,17 @@ static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id)
xsk, ntohs(udph->check), ntohs(want_csum),
meta->request.csum_start, meta->request.csum_offset);
+ /* Set the value of launch time */
+ if (launch_time_delta_to_hw_rx_timestamp) {
+ meta->flags |= XDP_TXMD_FLAGS_LAUNCH_TIME;
+ meta->request.launch_time = last_hw_rx_timestamp +
+ launch_time_delta_to_hw_rx_timestamp;
+ last_launch_time = meta->request.launch_time;
+ print_tstamp_delta("HW RX-time", "HW Launch-time",
+ last_hw_rx_timestamp,
+ meta->request.launch_time);
+ }
+
memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */
tx_desc->options |= XDP_TX_METADATA;
tx_desc->len = len;
@@ -407,6 +446,7 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
const struct xdp_desc *rx_desc;
struct pollfd fds[rxq + 1];
__u64 comp_addr;
+ __u64 deadline;
__u64 addr;
__u32 idx = 0;
int ret;
@@ -477,9 +517,15 @@ peek:
if (ret)
printf("kick_tx ret=%d\n", ret);
- for (int j = 0; j < 500; j++) {
+ /* wait 1 second + cover launch time */
+ deadline = gettime(clock_id) +
+ NANOSEC_PER_SEC +
+ launch_time_delta_to_hw_rx_timestamp;
+ while (true) {
if (complete_tx(xsk, clock_id))
break;
+ if (gettime(clock_id) >= deadline)
+ break;
usleep(10);
}
}
@@ -608,6 +654,10 @@ static void print_usage(void)
" -h Display this help and exit\n\n"
" -m Enable multi-buffer XDP for larger MTU\n"
" -r Don't generate AF_XDP reply (rx metadata only)\n"
+ " -l Delta of launch time relative to HW RX-time in ns\n"
+ " default: 0 ns (launch time request is disabled)\n"
+ " -L Tx Queue to be enabled with launch time offload\n"
+ " default: 0 (Tx Queue 0)\n"
"Generate test packets on the other machine with:\n"
" echo -n xdp | nc -u -q1 <dst_ip> 9091\n";
@@ -618,7 +668,7 @@ static void read_args(int argc, char *argv[])
{
int opt;
- while ((opt = getopt(argc, argv, "chmr")) != -1) {
+ while ((opt = getopt(argc, argv, "chmrl:L:")) != -1) {
switch (opt) {
case 'c':
bind_flags &= ~XDP_USE_NEED_WAKEUP;
@@ -634,6 +684,12 @@ static void read_args(int argc, char *argv[])
case 'r':
skip_tx = true;
break;
+ case 'l':
+ launch_time_delta_to_hw_rx_timestamp = atoll(optarg);
+ break;
+ case 'L':
+ launch_time_queue = atoll(optarg);
+ break;
case '?':
if (isprint(optopt))
fprintf(stderr, "Unknown option: -%c\n", optopt);
@@ -657,23 +713,118 @@ static void read_args(int argc, char *argv[])
error(-1, errno, "Invalid interface name");
}
+void clean_existing_configurations(void)
+{
+ /* Check and delete root qdisc if exists */
+ if (run_command("sudo tc qdisc show dev %s | grep -q 'qdisc mqprio 8001:'", ifname) == 0)
+ run_command("sudo tc qdisc del dev %s root", ifname);
+
+ /* Check and delete ingress qdisc if exists */
+ if (run_command("sudo tc qdisc show dev %s | grep -q 'qdisc ingress ffff:'", ifname) == 0)
+ run_command("sudo tc qdisc del dev %s ingress", ifname);
+
+ /* Check and delete ethtool filters if any exist */
+ if (run_command("sudo ethtool -n %s | grep -q 'Filter:'", ifname) == 0) {
+ run_command("sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 sudo ethtool -N %s delete >&2",
+ ifname, ifname);
+ }
+}
+
+#define MAX_TC 16
+
int main(int argc, char *argv[])
{
clockid_t clock_id = CLOCK_TAI;
+ struct bpf_program *prog;
int server_fd = -1;
+ size_t map_len = 0;
+ size_t que_len = 0;
+ char *buf = NULL;
+ char *map = NULL;
+ char *que = NULL;
+ char *tmp = NULL;
+ int tc = 0;
int ret;
int i;
- struct bpf_program *prog;
-
read_args(argc, argv);
rxq = rxq_num(ifname);
-
printf("rxq: %d\n", rxq);
+ if (launch_time_queue >= rxq || launch_time_queue < 0)
+ error(1, 0, "Invalid launch_time_queue.");
+
+ clean_existing_configurations();
+ sleep(1);
+
+ /* Enable tx and rx hardware timestamping */
hwtstamp_enable(ifname);
+ /* Prepare priority to traffic class map for tc-mqprio */
+ for (i = 0; i < MAX_TC; i++) {
+ if (i < rxq)
+ tc = i;
+
+ if (asprintf(&buf, "%d ", tc) == -1) {
+ printf("Failed to malloc buf for tc map.\n");
+ goto free_mem;
+ }
+
+ map_len += strlen(buf);
+ tmp = realloc(map, map_len + 1);
+ if (!tmp) {
+ printf("Failed to realloc tc map.\n");
+ goto free_mem;
+ }
+ map = tmp;
+ strcat(map, buf);
+ free(buf);
+ buf = NULL;
+ }
+
+ /* Prepare traffic class to hardware queue map for tc-mqprio */
+ for (i = 0; i <= tc; i++) {
+ if (asprintf(&buf, "1@%d ", i) == -1) {
+ printf("Failed to malloc buf for tc queues.\n");
+ goto free_mem;
+ }
+
+ que_len += strlen(buf);
+ tmp = realloc(que, que_len + 1);
+ if (!tmp) {
+ printf("Failed to realloc tc queues.\n");
+ goto free_mem;
+ }
+ que = tmp;
+ strcat(que, buf);
+ free(buf);
+ buf = NULL;
+ }
+
+ /* Add mqprio qdisc */
+ run_command("sudo tc qdisc add dev %s handle 8001: parent root mqprio num_tc %d map %squeues %shw 0",
+ ifname, tc + 1, map, que);
+
+ /* To test launch time, send UDP packet with VLAN priority 1 to port 9091 */
+ if (launch_time_delta_to_hw_rx_timestamp) {
+ /* Enable launch time hardware offload on launch_time_queue */
+ run_command("sudo tc qdisc replace dev %s parent 8001:%d etf offload clockid CLOCK_TAI delta 500000",
+ ifname, launch_time_queue + 1);
+ sleep(1);
+
+ /* Route incoming packet with VLAN priority 1 into launch_time_queue */
+ if (run_command("sudo ethtool -N %s flow-type ether vlan 0x2000 vlan-mask 0x1FFF action %d",
+ ifname, launch_time_queue)) {
+ run_command("sudo tc qdisc add dev %s ingress", ifname);
+ run_command("sudo tc filter add dev %s parent ffff: protocol 802.1Q flower vlan_prio 1 hw_tc %d",
+ ifname, launch_time_queue);
+ }
+
+ /* Enable VLAN tag stripping offload */
+ run_command("sudo ethtool -K %s rxvlan on", ifname);
+ }
+
rx_xsk = malloc(sizeof(struct xsk) * rxq);
if (!rx_xsk)
error(1, ENOMEM, "malloc");
@@ -733,4 +884,11 @@ int main(int argc, char *argv[])
cleanup();
if (ret)
error(1, -ret, "verify_metadata");
+
+ clean_existing_configurations();
+
+free_mem:
+ free(buf);
+ free(map);
+ free(que);
}
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
deleted file mode 100644
index c1fc44c87c30..000000000000
--- a/tools/testing/selftests/bpf/xdp_redirect_multi.c
+++ /dev/null
@@ -1,226 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
-#include <linux/if_link.h>
-#include <assert.h>
-#include <errno.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <net/if.h>
-#include <unistd.h>
-#include <libgen.h>
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-
-#include "bpf_util.h"
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#define MAX_IFACE_NUM 32
-#define MAX_INDEX_NUM 1024
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int ifaces[MAX_IFACE_NUM] = {};
-
-static void int_exit(int sig)
-{
- __u32 prog_id = 0;
- int i;
-
- for (i = 0; ifaces[i] > 0; i++) {
- if (bpf_xdp_query_id(ifaces[i], xdp_flags, &prog_id)) {
- printf("bpf_xdp_query_id failed\n");
- exit(1);
- }
- if (prog_id)
- bpf_xdp_detach(ifaces[i], xdp_flags, NULL);
- }
-
- exit(0);
-}
-
-static int get_mac_addr(unsigned int ifindex, void *mac_addr)
-{
- char ifname[IF_NAMESIZE];
- struct ifreq ifr;
- int fd, ret = -1;
-
- fd = socket(AF_INET, SOCK_DGRAM, 0);
- if (fd < 0)
- return ret;
-
- if (!if_indextoname(ifindex, ifname))
- goto err_out;
-
- strcpy(ifr.ifr_name, ifname);
-
- if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
- goto err_out;
-
- memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
- ret = 0;
-
-err_out:
- close(fd);
- return ret;
-}
-
-static void usage(const char *prog)
-{
- fprintf(stderr,
- "usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n"
- "OPTS:\n"
- " -S use skb-mode\n"
- " -N enforce native mode\n"
- " -F force loading prog\n"
- " -X load xdp program on egress\n",
- prog);
-}
-
-int main(int argc, char **argv)
-{
- int prog_fd, group_all, mac_map;
- struct bpf_program *ingress_prog, *egress_prog;
- int i, err, ret, opt, egress_prog_fd = 0;
- struct bpf_devmap_val devmap_val;
- bool attach_egress_prog = false;
- unsigned char mac_addr[6];
- char ifname[IF_NAMESIZE];
- struct bpf_object *obj;
- unsigned int ifindex;
- char filename[256];
-
- while ((opt = getopt(argc, argv, "SNFX")) != -1) {
- switch (opt) {
- case 'S':
- xdp_flags |= XDP_FLAGS_SKB_MODE;
- break;
- case 'N':
- /* default, set below */
- break;
- case 'F':
- xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
- break;
- case 'X':
- attach_egress_prog = true;
- break;
- default:
- usage(basename(argv[0]));
- return 1;
- }
- }
-
- if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
- xdp_flags |= XDP_FLAGS_DRV_MODE;
- } else if (attach_egress_prog) {
- printf("Load xdp program on egress with SKB mode not supported yet\n");
- goto err_out;
- }
-
- if (optind == argc) {
- printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]);
- goto err_out;
- }
-
- printf("Get interfaces:");
- for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
- ifaces[i] = if_nametoindex(argv[optind + i]);
- if (!ifaces[i])
- ifaces[i] = strtoul(argv[optind + i], NULL, 0);
- if (!if_indextoname(ifaces[i], ifname)) {
- perror("Invalid interface name or i");
- goto err_out;
- }
- if (ifaces[i] > MAX_INDEX_NUM) {
- printf(" interface index too large\n");
- goto err_out;
- }
- printf(" %d", ifaces[i]);
- }
- printf("\n");
-
- snprintf(filename, sizeof(filename), "%s_kern.bpf.o", argv[0]);
- obj = bpf_object__open_file(filename, NULL);
- err = libbpf_get_error(obj);
- if (err)
- goto err_out;
- err = bpf_object__load(obj);
- if (err)
- goto err_out;
- prog_fd = bpf_program__fd(bpf_object__next_program(obj, NULL));
-
- if (attach_egress_prog)
- group_all = bpf_object__find_map_fd_by_name(obj, "map_egress");
- else
- group_all = bpf_object__find_map_fd_by_name(obj, "map_all");
- mac_map = bpf_object__find_map_fd_by_name(obj, "mac_map");
-
- if (group_all < 0 || mac_map < 0) {
- printf("bpf_object__find_map_fd_by_name failed\n");
- goto err_out;
- }
-
- if (attach_egress_prog) {
- /* Find ingress/egress prog for 2nd xdp prog */
- ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_all_prog");
- egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog");
- if (!ingress_prog || !egress_prog) {
- printf("finding ingress/egress_prog in obj file failed\n");
- goto err_out;
- }
- prog_fd = bpf_program__fd(ingress_prog);
- egress_prog_fd = bpf_program__fd(egress_prog);
- if (prog_fd < 0 || egress_prog_fd < 0) {
- printf("find egress_prog fd failed\n");
- goto err_out;
- }
- }
-
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
-
- /* Init forward multicast groups and exclude group */
- for (i = 0; ifaces[i] > 0; i++) {
- ifindex = ifaces[i];
-
- if (attach_egress_prog) {
- ret = get_mac_addr(ifindex, mac_addr);
- if (ret < 0) {
- printf("get interface %d mac failed\n", ifindex);
- goto err_out;
- }
- ret = bpf_map_update_elem(mac_map, &ifindex, mac_addr, 0);
- if (ret) {
- perror("bpf_update_elem mac_map failed\n");
- goto err_out;
- }
- }
-
- /* Add all the interfaces to group all */
- devmap_val.ifindex = ifindex;
- devmap_val.bpf_prog.fd = egress_prog_fd;
- ret = bpf_map_update_elem(group_all, &ifindex, &devmap_val, 0);
- if (ret) {
- perror("bpf_map_update_elem");
- goto err_out;
- }
-
- /* bind prog_fd to each interface */
- ret = bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL);
- if (ret) {
- printf("Set xdp fd failed on %d\n", ifindex);
- goto err_out;
- }
- }
-
- /* sleep some time for testing */
- sleep(999);
-
- return 0;
-
-err_out:
- return 1;
-}
diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh
index 3f45512fb512..7406c24be1ac 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Test the special cpuset v1 hotplug case where a cpuset become empty of
diff --git a/tools/testing/selftests/damon/.gitignore b/tools/testing/selftests/damon/.gitignore
index 2ab675fecb6b..2f0297657c81 100644
--- a/tools/testing/selftests/damon/.gitignore
+++ b/tools/testing/selftests/damon/.gitignore
@@ -1,6 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-huge_count_read_write
-debugfs_target_ids_read_before_terminate_race
-debugfs_target_ids_pid_leak
access_memory
access_memory_even
diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index 812f656260fb..ecbf07afc6dd 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -1,15 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for damon selftests
-TEST_GEN_FILES += huge_count_read_write
-TEST_GEN_FILES += debugfs_target_ids_read_before_terminate_race
-TEST_GEN_FILES += debugfs_target_ids_pid_leak
TEST_GEN_FILES += access_memory access_memory_even
-TEST_FILES = _chk_dependency.sh _debugfs_common.sh _damon_sysfs.py
+TEST_FILES = _chk_dependency.sh _damon_sysfs.py
# functionality tests
-TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh
TEST_PROGS += sysfs.sh
TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py
TEST_PROGS += damos_quota.py damos_quota_goal.py damos_apply_interval.py
@@ -17,11 +13,6 @@ TEST_PROGS += damos_tried_regions.py damon_nr_regions.py
TEST_PROGS += reclaim.sh lru_sort.sh
# regression tests (reproducers of previously found bugs)
-TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh
-TEST_PROGS += debugfs_duplicate_context_creation.sh
-TEST_PROGS += debugfs_rm_non_contexts.sh
-TEST_PROGS += debugfs_target_ids_read_before_terminate_race.sh
-TEST_PROGS += debugfs_target_ids_pid_leak.sh
TEST_PROGS += sysfs_update_removed_scheme_dir.sh
TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py
diff --git a/tools/testing/selftests/damon/config b/tools/testing/selftests/damon/config
index 0daf38974eb0..a68a9fead5dc 100644
--- a/tools/testing/selftests/damon/config
+++ b/tools/testing/selftests/damon/config
@@ -1,6 +1,5 @@
CONFIG_DAMON=y
CONFIG_DAMON_SYSFS=y
-CONFIG_DAMON_DBGFS=y
CONFIG_DAMON_PADDR=y
CONFIG_DAMON_VADDR=y
CONFIG_DAMON_RECLAIM=y
diff --git a/tools/testing/selftests/damon/damon_nr_regions.py b/tools/testing/selftests/damon/damon_nr_regions.py
index 2e8a74aff543..58f3291fed12 100755
--- a/tools/testing/selftests/damon/damon_nr_regions.py
+++ b/tools/testing/selftests/damon/damon_nr_regions.py
@@ -65,6 +65,7 @@ def test_nr_regions(real_nr_regions, min_nr_regions, max_nr_regions):
test_name = 'nr_regions test with %d/%d/%d real/min/max nr_regions' % (
real_nr_regions, min_nr_regions, max_nr_regions)
+ collected_nr_regions.sort()
if (collected_nr_regions[0] < min_nr_regions or
collected_nr_regions[-1] > max_nr_regions):
print('fail %s' % test_name)
@@ -109,6 +110,7 @@ def main():
attrs = kdamonds.kdamonds[0].contexts[0].monitoring_attrs
attrs.min_nr_regions = 3
attrs.max_nr_regions = 7
+ attrs.update_us = 100000
err = kdamonds.kdamonds[0].commit()
if err is not None:
proc.terminate()
diff --git a/tools/testing/selftests/damon/damos_quota.py b/tools/testing/selftests/damon/damos_quota.py
index 7d4c6bb2e3cd..57c4937aaed2 100755
--- a/tools/testing/selftests/damon/damos_quota.py
+++ b/tools/testing/selftests/damon/damos_quota.py
@@ -51,16 +51,19 @@ def main():
nr_quota_exceeds = scheme.stats.qt_exceeds
wss_collected.sort()
+ nr_expected_quota_exceeds = 0
for wss in wss_collected:
if wss > sz_quota:
print('quota is not kept: %s > %s' % (wss, sz_quota))
print('collected samples are as below')
print('\n'.join(['%d' % wss for wss in wss_collected]))
exit(1)
+ if wss == sz_quota:
+ nr_expected_quota_exceeds += 1
- if nr_quota_exceeds < len(wss_collected):
- print('quota is not always exceeded: %d > %d' %
- (len(wss_collected), nr_quota_exceeds))
+ if nr_quota_exceeds < nr_expected_quota_exceeds:
+ print('quota is exceeded less than expected: %d < %d' %
+ (nr_quota_exceeds, nr_expected_quota_exceeds))
exit(1)
if __name__ == '__main__':
diff --git a/tools/testing/selftests/damon/damos_quota_goal.py b/tools/testing/selftests/damon/damos_quota_goal.py
index 18246f3b62f7..f76e0412b564 100755
--- a/tools/testing/selftests/damon/damos_quota_goal.py
+++ b/tools/testing/selftests/damon/damos_quota_goal.py
@@ -63,6 +63,9 @@ def main():
if last_effective_bytes != 0 else -1.0))
if last_effective_bytes == goal.effective_bytes:
+ # effective quota was already minimum that cannot be more reduced
+ if expect_increase is False and last_effective_bytes == 1:
+ continue
print('efective bytes not changed: %d' % goal.effective_bytes)
exit(1)
diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh
deleted file mode 100755
index 902e312bca89..000000000000
--- a/tools/testing/selftests/damon/debugfs_attrs.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-source _debugfs_common.sh
-
-# Test attrs file
-# ===============
-
-file="$DBGFS/attrs"
-orig_content=$(cat "$file")
-
-test_write_succ "$file" "1 2 3 4 5" "$orig_content" "valid input"
-test_write_fail "$file" "1 2 3 4" "$orig_content" "no enough fields"
-test_write_fail "$file" "1 2 3 5 4" "$orig_content" \
- "min_nr_regions > max_nr_regions"
-test_content "$file" "$orig_content" "1 2 3 4 5" "successfully written"
-echo "$orig_content" > "$file"
diff --git a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh
deleted file mode 100755
index bd6c22d96ead..000000000000
--- a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-source _debugfs_common.sh
-
-# Test duplicated context creation
-# ================================
-
-if ! echo foo > "$DBGFS/mk_contexts"
-then
- echo "context creation failed"
- exit 1
-fi
-
-if echo foo > "$DBGFS/mk_contexts" 2> /dev/null
-then
- echo "duplicate context creation success"
- exit 1
-fi
-
-if ! echo foo > "$DBGFS/rm_contexts"
-then
- echo "context deletion failed"
- exit 1
-fi
-
-exit 0
diff --git a/tools/testing/selftests/damon/debugfs_empty_targets.sh b/tools/testing/selftests/damon/debugfs_empty_targets.sh
deleted file mode 100755
index effbea33dc16..000000000000
--- a/tools/testing/selftests/damon/debugfs_empty_targets.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-source _debugfs_common.sh
-
-# Test empty targets case
-# =======================
-
-orig_target_ids=$(cat "$DBGFS/target_ids")
-echo "" > "$DBGFS/target_ids"
-
-if [ -f "$DBGFS/monitor_on_DEPRECATED" ]
-then
- monitor_on_file="$DBGFS/monitor_on_DEPRECATED"
-else
- monitor_on_file="$DBGFS/monitor_on"
-fi
-
-orig_monitor_on=$(cat "$monitor_on_file")
-test_write_fail "$monitor_on_file" "on" "orig_monitor_on" "empty target ids"
-echo "$orig_target_ids" > "$DBGFS/target_ids"
diff --git a/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh b/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh
deleted file mode 100755
index 922cadac2950..000000000000
--- a/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-source _debugfs_common.sh
-
-# Test huge count read write
-# ==========================
-
-dmesg -C
-
-for file in "$DBGFS/"*
-do
- ./huge_count_read_write "$file"
-done
-
-if dmesg | grep -q WARNING
-then
- dmesg
- exit 1
-else
- exit 0
-fi
diff --git a/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh
deleted file mode 100755
index f3ffeb1343cf..000000000000
--- a/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-source _debugfs_common.sh
-
-# Test putting non-ctx files/dirs to rm_contexts file
-# ===================================================
-
-dmesg -C
-
-for file in "$DBGFS/"*
-do
- (echo "$(basename "$f")" > "$DBGFS/rm_contexts") &> /dev/null
- if dmesg | grep -q BUG
- then
- dmesg
- exit 1
- fi
-done
diff --git a/tools/testing/selftests/damon/debugfs_schemes.sh b/tools/testing/selftests/damon/debugfs_schemes.sh
deleted file mode 100755
index 5b39ab44731c..000000000000
--- a/tools/testing/selftests/damon/debugfs_schemes.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-source _debugfs_common.sh
-
-# Test schemes file
-# =================
-
-file="$DBGFS/schemes"
-orig_content=$(cat "$file")
-
-test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0 1 2 3 1 100 3 2 1" \
- "$orig_content" "valid input"
-test_write_fail "$file" "1 2
-3 4 5 6 3 0 0 0 1 2 3 1 100 3 2 1" "$orig_content" "multi lines"
-test_write_succ "$file" "" "$orig_content" "disabling"
-test_write_fail "$file" "2 1 2 1 10 1 3 10 1 1 1 1 1 1 1 1 2 3" \
- "$orig_content" "wrong condition ranges"
-echo "$orig_content" > "$file"
diff --git a/tools/testing/selftests/damon/debugfs_target_ids.sh b/tools/testing/selftests/damon/debugfs_target_ids.sh
deleted file mode 100755
index 49aeabdb0aae..000000000000
--- a/tools/testing/selftests/damon/debugfs_target_ids.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-source _debugfs_common.sh
-
-# Test target_ids file
-# ====================
-
-file="$DBGFS/target_ids"
-orig_content=$(cat "$file")
-
-test_write_succ "$file" "1 2 3 4" "$orig_content" "valid input"
-test_write_succ "$file" "1 2 abc 4" "$orig_content" "still valid input"
-test_content "$file" "$orig_content" "1 2" "non-integer was there"
-test_write_succ "$file" "abc 2 3" "$orig_content" "the file allows wrong input"
-test_content "$file" "$orig_content" "" "wrong input written"
-test_write_succ "$file" "" "$orig_content" "empty input"
-test_content "$file" "$orig_content" "" "empty input written"
-echo "$orig_content" > "$file"
diff --git a/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.c b/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.c
deleted file mode 100644
index 0cc2eef7d142..000000000000
--- a/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.c
+++ /dev/null
@@ -1,68 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Author: SeongJae Park <sj@kernel.org>
- */
-
-#define _GNU_SOURCE
-
-#include <fcntl.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <sys/time.h>
-#include <unistd.h>
-
-#define DBGFS_TARGET_IDS "/sys/kernel/debug/damon/target_ids"
-
-static void write_targetid_exit(void)
-{
- int target_ids_fd = open(DBGFS_TARGET_IDS, O_RDWR);
- char pid_str[128];
-
- snprintf(pid_str, sizeof(pid_str), "%d", getpid());
- write(target_ids_fd, pid_str, sizeof(pid_str));
- close(target_ids_fd);
- exit(0);
-}
-
-unsigned long msec_timestamp(void)
-{
- struct timeval tv;
-
- gettimeofday(&tv, NULL);
- return tv.tv_sec * 1000UL + tv.tv_usec / 1000;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned long start_ms;
- int time_to_run, nr_forks = 0;
-
- if (argc != 2) {
- fprintf(stderr, "Usage: %s <msecs to run>\n", argv[0]);
- exit(1);
- }
- time_to_run = atoi(argv[1]);
-
- start_ms = msec_timestamp();
- while (true) {
- int pid = fork();
-
- if (pid < 0) {
- fprintf(stderr, "fork() failed\n");
- exit(1);
- }
- if (pid == 0)
- write_targetid_exit();
- wait(NULL);
- nr_forks++;
-
- if (msec_timestamp() - start_ms > time_to_run)
- break;
- }
- printf("%d\n", nr_forks);
- return 0;
-}
diff --git a/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh b/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh
deleted file mode 100755
index 31fe33c2b032..000000000000
--- a/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-before=$(grep "^pid " /proc/slabinfo | awk '{print $2}')
-
-nr_leaks=$(./debugfs_target_ids_pid_leak 1000)
-expected_after_max=$((before + nr_leaks / 2))
-
-after=$(grep "^pid " /proc/slabinfo | awk '{print $2}')
-
-echo > /sys/kernel/debug/damon/target_ids
-
-echo "tried $nr_leaks pid leak"
-echo "number of active pid slabs: $before -> $after"
-echo "(up to $expected_after_max expected)"
-if [ $after -gt $expected_after_max ]
-then
- echo "maybe pids are leaking"
- exit 1
-else
- exit 0
-fi
diff --git a/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.c b/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.c
deleted file mode 100644
index b06f52a8ce2d..000000000000
--- a/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.c
+++ /dev/null
@@ -1,80 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Author: SeongJae Park <sj@kernel.org>
- */
-#define _GNU_SOURCE
-
-#include <fcntl.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <time.h>
-#include <unistd.h>
-
-#define DBGFS_MONITOR_ON "/sys/kernel/debug/damon/monitor_on_DEPRECATED"
-#define DBGFS_TARGET_IDS "/sys/kernel/debug/damon/target_ids"
-
-static void turn_damon_on_exit(void)
-{
- int target_ids_fd = open(DBGFS_TARGET_IDS, O_RDWR);
- int monitor_on_fd = open(DBGFS_MONITOR_ON, O_RDWR);
- char pid_str[128];
-
- snprintf(pid_str, sizeof(pid_str), "%d", getpid());
- write(target_ids_fd, pid_str, sizeof(pid_str));
- write(monitor_on_fd, "on\n", 3);
- close(target_ids_fd);
- close(monitor_on_fd);
- usleep(1000);
- exit(0);
-}
-
-static void try_race(void)
-{
- int target_ids_fd = open(DBGFS_TARGET_IDS, O_RDWR);
- int pid = fork();
- int buf[256];
-
- if (pid < 0) {
- fprintf(stderr, "fork() failed\n");
- exit(1);
- }
- if (pid == 0)
- turn_damon_on_exit();
- while (true) {
- int status;
-
- read(target_ids_fd, buf, sizeof(buf));
- if (waitpid(-1, &status, WNOHANG) == pid)
- break;
- }
- close(target_ids_fd);
-}
-
-static inline uint64_t ts_to_ms(struct timespec *ts)
-{
- return (uint64_t)ts->tv_sec * 1000 + (uint64_t)ts->tv_nsec / 1000000;
-}
-
-int main(int argc, char *argv[])
-{
- struct timespec start_time, now;
- int runtime_ms;
-
- if (argc != 2) {
- fprintf(stderr, "Usage: %s <runtime in ms>\n", argv[0]);
- exit(1);
- }
- runtime_ms = atoi(argv[1]);
- clock_gettime(CLOCK_MONOTONIC, &start_time);
- while (true) {
- try_race();
- clock_gettime(CLOCK_MONOTONIC, &now);
- if (ts_to_ms(&now) - ts_to_ms(&start_time) > runtime_ms)
- break;
- }
- return 0;
-}
diff --git a/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh b/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh
deleted file mode 100755
index fc793c4c9aea..000000000000
--- a/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-dmesg -C
-
-./debugfs_target_ids_read_before_terminate_race 5000
-
-if dmesg | grep -q dbgfs_target_ids_read
-then
- dmesg
- exit 1
-else
- exit 0
-fi
diff --git a/tools/testing/selftests/damon/huge_count_read_write.c b/tools/testing/selftests/damon/huge_count_read_write.c
deleted file mode 100644
index 53e69a669668..000000000000
--- a/tools/testing/selftests/damon/huge_count_read_write.c
+++ /dev/null
@@ -1,46 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Author: SeongJae Park <sj@kernel.org>
- */
-
-#include <fcntl.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <stdio.h>
-
-#pragma GCC diagnostic push
-#if __GNUC__ >= 11 && __GNUC_MINOR__ >= 1
-/* Ignore read(2) overflow and write(2) overread compile warnings */
-#pragma GCC diagnostic ignored "-Wstringop-overread"
-#pragma GCC diagnostic ignored "-Wstringop-overflow"
-#endif
-
-void write_read_with_huge_count(char *file)
-{
- int filedesc = open(file, O_RDWR);
- char buf[256];
- int ret;
-
- printf("%s %s\n", __func__, file);
- if (filedesc < 0) {
- fprintf(stderr, "failed opening %s\n", file);
- exit(1);
- }
-
- write(filedesc, "", 0xfffffffful);
- ret = read(filedesc, buf, 0xfffffffful);
- close(filedesc);
-}
-
-#pragma GCC diagnostic pop
-
-int main(int argc, char *argv[])
-{
- if (argc != 2) {
- fprintf(stderr, "Usage: %s <file>\n", argv[0]);
- exit(1);
- }
- write_read_with_huge_count(argv[1]);
-
- return 0;
-}
diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore
new file mode 100644
index 000000000000..ec746f374e85
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+xdp_helper
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index 137470bdee0c..0c95bd944d56 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -1,13 +1,18 @@
# SPDX-License-Identifier: GPL-2.0
+CFLAGS += $(KHDR_INCLUDES)
TEST_INCLUDES := $(wildcard lib/py/*.py) \
$(wildcard lib/sh/*.sh) \
../../net/net_helper.sh \
../../net/lib.sh \
+TEST_GEN_FILES := xdp_helper
+
TEST_PROGS := \
netcons_basic.sh \
+ netcons_fragmented_msg.sh \
netcons_overflow.sh \
+ netcons_sysdata.sh \
ping.py \
queues.py \
stats.py \
diff --git a/tools/testing/selftests/drivers/net/README.rst b/tools/testing/selftests/drivers/net/README.rst
index 3b6a29e6564b..eb838ae94844 100644
--- a/tools/testing/selftests/drivers/net/README.rst
+++ b/tools/testing/selftests/drivers/net/README.rst
@@ -107,7 +107,7 @@ On the target machine, running the tests will use netdevsim by default::
1..1
# timeout set to 45
# selftests: drivers/net: ping.py
- # KTAP version 1
+ # TAP version 13
# 1..3
# ok 1 ping.test_v4
# ok 2 ping.test_v6
@@ -128,7 +128,7 @@ Create a config with remote info::
Run the test::
[/root] # ./ksft-net-drv/drivers/net/ping.py
- KTAP version 1
+ TAP version 13
1..3
ok 1 ping.test_v4
ok 2 ping.test_v6 # SKIP Test requires IPv6 connectivity
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
index edc56e2cc606..7bc148889ca7 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -11,8 +11,8 @@ ALL_TESTS="
lib_dir=$(dirname "$0")
source ${lib_dir}/bond_topo_3d1c.sh
-c_maddr="33:33:00:00:00:10"
-g_maddr="33:33:00:00:02:54"
+c_maddr="33:33:ff:00:00:10"
+g_maddr="33:33:ff:00:02:54"
skip_prio()
{
diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config
index a2d8af60876d..f27172ddee0a 100644
--- a/tools/testing/selftests/drivers/net/config
+++ b/tools/testing/selftests/drivers/net/config
@@ -4,3 +4,4 @@ CONFIG_CONFIGFS_FS=y
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
CONFIG_NETCONSOLE_EXTENDED_LOG=y
+CONFIG_XDP_SOCKETS=y
diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py
index 394971b25c0b..7cc74faed743 100755
--- a/tools/testing/selftests/drivers/net/hds.py
+++ b/tools/testing/selftests/drivers/net/hds.py
@@ -2,17 +2,53 @@
# SPDX-License-Identifier: GPL-2.0
import errno
+import os
from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx
-from lib.py import EthtoolFamily, NlError
+from lib.py import CmdExitFailure, EthtoolFamily, NlError
from lib.py import NetDrvEnv
+from lib.py import defer, ethtool, ip
-def get_hds(cfg, netnl) -> None:
+
+def _get_hds_mode(cfg, netnl) -> str:
try:
rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
except NlError as e:
raise KsftSkipEx('ring-get not supported by device')
if 'tcp-data-split' not in rings:
raise KsftSkipEx('tcp-data-split not supported by device')
+ return rings['tcp-data-split']
+
+
+def _xdp_onoff(cfg):
+ prog = cfg.rpath("../../net/lib/xdp_dummy.bpf.o")
+ ip("link set dev %s xdp obj %s sec xdp" %
+ (cfg.ifname, prog))
+ ip("link set dev %s xdp off" % cfg.ifname)
+
+
+def _ioctl_ringparam_modify(cfg, netnl) -> None:
+ """
+ Helper for performing a hopefully unimportant IOCTL SET.
+ IOCTL does not support HDS, so it should not affect the HDS config.
+ """
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+
+ if 'tx' not in rings:
+ raise KsftSkipEx('setting Tx ring size not supported')
+
+ try:
+ ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] // 2}")
+ except CmdExitFailure as e:
+ ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] * 2}")
+ defer(ethtool, f"-G {cfg.ifname} tx {rings['tx']}")
+
+
+def get_hds(cfg, netnl) -> None:
+ _get_hds_mode(cfg, netnl)
+
def get_hds_thresh(cfg, netnl) -> None:
try:
@@ -104,6 +140,103 @@ def set_hds_thresh_gt(cfg, netnl) -> None:
netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_gt})
ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+def set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "auto" / UNKNOWN mode, XDP installation should work.
+ """
+ mode = _get_hds_mode(cfg, netnl)
+ if mode == 'enabled':
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ _xdp_onoff(cfg)
+
+
+def enabled_set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "enabled" mode, XDP installation should not work.
+ """
+ _get_hds_mode(cfg, netnl)
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'enabled'})
+
+ defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ with ksft_raises(CmdExitFailure) as e:
+ _xdp_onoff(cfg)
+
+
+def set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "auto" / UNKNOWN mode, XDP installation should work.
+ """
+ mode = _get_hds_mode(cfg, netnl)
+ if mode == 'enabled':
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ _xdp_onoff(cfg)
+
+
+def enabled_set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "enabled" mode, XDP installation should not work.
+ """
+ _get_hds_mode(cfg, netnl) # Trigger skip if not supported
+
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'enabled'})
+ defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ with ksft_raises(CmdExitFailure) as e:
+ _xdp_onoff(cfg)
+
+
+def ioctl(cfg, netnl) -> None:
+ mode1 = _get_hds_mode(cfg, netnl)
+ _ioctl_ringparam_modify(cfg, netnl)
+ mode2 = _get_hds_mode(cfg, netnl)
+
+ ksft_eq(mode1, mode2)
+
+
+def ioctl_set_xdp(cfg, netnl) -> None:
+ """
+ Like set_xdp(), but we perturb the settings via the legacy ioctl.
+ """
+ mode = _get_hds_mode(cfg, netnl)
+ if mode == 'enabled':
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ _ioctl_ringparam_modify(cfg, netnl)
+
+ _xdp_onoff(cfg)
+
+
+def ioctl_enabled_set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "enabled" mode, XDP installation should not work.
+ """
+ _get_hds_mode(cfg, netnl) # Trigger skip if not supported
+
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'enabled'})
+ defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ with ksft_raises(CmdExitFailure) as e:
+ _xdp_onoff(cfg)
+
+
def main() -> None:
with NetDrvEnv(__file__, queue_count=3) as cfg:
ksft_run([get_hds,
@@ -112,7 +245,12 @@ def main() -> None:
set_hds_enable,
set_hds_thresh_zero,
set_hds_thresh_max,
- set_hds_thresh_gt],
+ set_hds_thresh_gt,
+ set_xdp,
+ enabled_set_xdp,
+ ioctl,
+ ioctl_set_xdp,
+ ioctl_enabled_set_xdp],
args=(cfg, EthtoolFamily()))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/hw/.gitignore b/tools/testing/selftests/drivers/net/hw/.gitignore
index e9fe6ede681a..6942bf575497 100644
--- a/tools/testing/selftests/drivers/net/hw/.gitignore
+++ b/tools/testing/selftests/drivers/net/hw/.gitignore
@@ -1 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+iou-zcrx
ncdevmem
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
index 21ba64ce1e34..07cddb19ba35 100644
--- a/tools/testing/selftests/drivers/net/hw/Makefile
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0+ OR MIT
+TEST_GEN_FILES = iou-zcrx
+
TEST_PROGS = \
csum.py \
devlink_port_split.py \
@@ -10,11 +12,15 @@ TEST_PROGS = \
ethtool_rmon.sh \
hw_stats_l3.sh \
hw_stats_l3_gre.sh \
+ iou-zcrx.py \
+ irq.py \
loopback.sh \
nic_link_layer.py \
nic_performance.py \
pp_alloc_fail.py \
rss_ctx.py \
+ rss_input_xfrm.py \
+ tso.py \
#
TEST_FILES := \
@@ -32,9 +38,14 @@ TEST_INCLUDES := \
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := ncdevmem
TEST_GEN_FILES += $(YNL_GEN_FILES)
+TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
include ../../../lib.mk
# YNL build
YNL_GENS := ethtool netdev
include ../../../net/ynl.mk
+
+include ../../../net/bpf.mk
+
+$(OUTPUT)/iou-zcrx: LDLIBS += -luring
diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py
index cb40497faee4..701aca1361e0 100755
--- a/tools/testing/selftests/drivers/net/hw/csum.py
+++ b/tools/testing/selftests/drivers/net/hw/csum.py
@@ -9,15 +9,12 @@ from lib.py import ksft_run, ksft_exit, KsftSkipEx
from lib.py import EthtoolFamily, NetDrvEpEnv
from lib.py import bkg, cmd, wait_port_listen
-def test_receive(cfg, ipv4=False, extra_args=None):
+def test_receive(cfg, ipver="6", extra_args=None):
"""Test local nic checksum receive. Remote host sends crafted packets."""
if not cfg.have_rx_csum:
raise KsftSkipEx(f"Test requires rx checksum offload on {cfg.ifname}")
- if ipv4:
- ip_args = f"-4 -S {cfg.remote_v4} -D {cfg.v4}"
- else:
- ip_args = f"-6 -S {cfg.remote_v6} -D {cfg.v6}"
+ ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}"
rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}"
tx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -n 100 {ip_args} -r 1 -T {extra_args}"
@@ -27,17 +24,14 @@ def test_receive(cfg, ipv4=False, extra_args=None):
cmd(tx_cmd, host=cfg.remote)
-def test_transmit(cfg, ipv4=False, extra_args=None):
+def test_transmit(cfg, ipver="6", extra_args=None):
"""Test local nic checksum transmit. Remote host verifies packets."""
if (not cfg.have_tx_csum_generic and
- not (cfg.have_tx_csum_ipv4 and ipv4) and
- not (cfg.have_tx_csum_ipv6 and not ipv4)):
+ not (cfg.have_tx_csum_ipv4 and ipver == "4") and
+ not (cfg.have_tx_csum_ipv6 and ipver == "6")):
raise KsftSkipEx(f"Test requires tx checksum offload on {cfg.ifname}")
- if ipv4:
- ip_args = f"-4 -S {cfg.v4} -D {cfg.remote_v4}"
- else:
- ip_args = f"-6 -S {cfg.v6} -D {cfg.remote_v6}"
+ ip_args = f"-{ipver} -S {cfg.addr_v[ipver]} -D {cfg.remote_addr_v[ipver]}"
# Cannot randomize input when calculating zero checksum
if extra_args != "-U -Z":
@@ -51,26 +45,20 @@ def test_transmit(cfg, ipv4=False, extra_args=None):
cmd(tx_cmd)
-def test_builder(name, cfg, ipv4=False, tx=False, extra_args=""):
+def test_builder(name, cfg, ipver="6", tx=False, extra_args=""):
"""Construct specific tests from the common template.
Most tests follow the same basic pattern, differing only in
Direction of the test and optional flags passed to csum."""
def f(cfg):
- if ipv4:
- cfg.require_v4()
- else:
- cfg.require_v6()
+ cfg.require_ipver(ipver)
if tx:
- test_transmit(cfg, ipv4, extra_args)
+ test_transmit(cfg, ipver, extra_args)
else:
- test_receive(cfg, ipv4, extra_args)
+ test_receive(cfg, ipver, extra_args)
- if ipv4:
- f.__name__ = "ipv4_" + name
- else:
- f.__name__ = "ipv6_" + name
+ f.__name__ = f"ipv{ipver}_" + name
return f
@@ -100,19 +88,19 @@ def main() -> None:
with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
check_nic_features(cfg)
- cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../net/lib/csum")
+ cfg.bin_local = cfg.rpath("../../../net/lib/csum")
cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
cases = []
- for ipv4 in [True, False]:
- cases.append(test_builder("rx_tcp", cfg, ipv4, False, "-t"))
- cases.append(test_builder("rx_tcp_invalid", cfg, ipv4, False, "-t -E"))
+ for ipver in ["4", "6"]:
+ cases.append(test_builder("rx_tcp", cfg, ipver, False, "-t"))
+ cases.append(test_builder("rx_tcp_invalid", cfg, ipver, False, "-t -E"))
- cases.append(test_builder("rx_udp", cfg, ipv4, False, ""))
- cases.append(test_builder("rx_udp_invalid", cfg, ipv4, False, "-E"))
+ cases.append(test_builder("rx_udp", cfg, ipver, False, ""))
+ cases.append(test_builder("rx_udp_invalid", cfg, ipver, False, "-E"))
- cases.append(test_builder("tx_udp_csum_offload", cfg, ipv4, True, "-U"))
- cases.append(test_builder("tx_udp_zero_checksum", cfg, ipv4, True, "-U -Z"))
+ cases.append(test_builder("tx_udp_csum_offload", cfg, ipver, True, "-U"))
+ cases.append(test_builder("tx_udp_zero_checksum", cfg, ipver, True, "-U -Z"))
ksft_run(cases=cases, args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py
index 1223f0f5c10c..3947e9157115 100755
--- a/tools/testing/selftests/drivers/net/hw/devmem.py
+++ b/tools/testing/selftests/drivers/net/hw/devmem.py
@@ -21,15 +21,15 @@ def require_devmem(cfg):
@ksft_disruptive
def check_rx(cfg) -> None:
- cfg.require_v6()
+ cfg.require_ipver("6")
require_devmem(cfg)
port = rand_port()
- listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.v6} -p {port}"
+ listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port}"
with bkg(listen_cmd) as socat:
wait_port_listen(port)
- cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.v6}]:{port}", host=cfg.remote, shell=True)
+ cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.addr_v['6']}]:{port}", host=cfg.remote, shell=True)
ksft_eq(socat.stdout.strip(), "hello\nworld")
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
new file mode 100644
index 000000000000..c26b4180eddd
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+
+#include <liburing.h>
+
+#define PAGE_SIZE (4096)
+#define AREA_SIZE (8192 * PAGE_SIZE)
+#define SEND_SIZE (512 * 4096)
+#define min(a, b) \
+ ({ \
+ typeof(a) _a = (a); \
+ typeof(b) _b = (b); \
+ _a < _b ? _a : _b; \
+ })
+#define min_t(t, a, b) \
+ ({ \
+ t _ta = (a); \
+ t _tb = (b); \
+ min(_ta, _tb); \
+ })
+
+#define ALIGN_UP(v, align) (((v) + (align) - 1) & ~((align) - 1))
+
+static int cfg_server;
+static int cfg_client;
+static int cfg_port = 8000;
+static int cfg_payload_len;
+static const char *cfg_ifname;
+static int cfg_queue_id = -1;
+static bool cfg_oneshot;
+static int cfg_oneshot_recvs;
+static int cfg_send_size = SEND_SIZE;
+static struct sockaddr_in6 cfg_addr;
+
+static char payload[SEND_SIZE] __attribute__((aligned(PAGE_SIZE)));
+static void *area_ptr;
+static void *ring_ptr;
+static size_t ring_size;
+static struct io_uring_zcrx_rq rq_ring;
+static unsigned long area_token;
+static int connfd;
+static bool stop;
+static size_t received;
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
+{
+ int ret;
+
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = htons(port);
+
+ ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr);
+ if (ret != 1) {
+ /* fallback to plain IPv4 */
+ ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]);
+ if (ret != 1)
+ return -1;
+
+ /* add ::ffff prefix */
+ sin6->sin6_addr.s6_addr32[0] = 0;
+ sin6->sin6_addr.s6_addr32[1] = 0;
+ sin6->sin6_addr.s6_addr16[4] = 0;
+ sin6->sin6_addr.s6_addr16[5] = 0xffff;
+ }
+
+ return 0;
+}
+
+static inline size_t get_refill_ring_size(unsigned int rq_entries)
+{
+ size_t size;
+
+ ring_size = rq_entries * sizeof(struct io_uring_zcrx_rqe);
+ /* add space for the header (head/tail/etc.) */
+ ring_size += PAGE_SIZE;
+ return ALIGN_UP(ring_size, 4096);
+}
+
+static void setup_zcrx(struct io_uring *ring)
+{
+ unsigned int ifindex;
+ unsigned int rq_entries = 4096;
+ int ret;
+
+ ifindex = if_nametoindex(cfg_ifname);
+ if (!ifindex)
+ error(1, 0, "bad interface name: %s", cfg_ifname);
+
+ area_ptr = mmap(NULL,
+ AREA_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE,
+ 0,
+ 0);
+ if (area_ptr == MAP_FAILED)
+ error(1, 0, "mmap(): zero copy area");
+
+ ring_size = get_refill_ring_size(rq_entries);
+ ring_ptr = mmap(NULL,
+ ring_size,
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE,
+ 0,
+ 0);
+
+ struct io_uring_region_desc region_reg = {
+ .size = ring_size,
+ .user_addr = (__u64)(unsigned long)ring_ptr,
+ .flags = IORING_MEM_REGION_TYPE_USER,
+ };
+
+ struct io_uring_zcrx_area_reg area_reg = {
+ .addr = (__u64)(unsigned long)area_ptr,
+ .len = AREA_SIZE,
+ .flags = 0,
+ };
+
+ struct io_uring_zcrx_ifq_reg reg = {
+ .if_idx = ifindex,
+ .if_rxq = cfg_queue_id,
+ .rq_entries = rq_entries,
+ .area_ptr = (__u64)(unsigned long)&area_reg,
+ .region_ptr = (__u64)(unsigned long)&region_reg,
+ };
+
+ ret = io_uring_register_ifq(ring, &reg);
+ if (ret)
+ error(1, 0, "io_uring_register_ifq(): %d", ret);
+
+ rq_ring.khead = (unsigned int *)((char *)ring_ptr + reg.offsets.head);
+ rq_ring.ktail = (unsigned int *)((char *)ring_ptr + reg.offsets.tail);
+ rq_ring.rqes = (struct io_uring_zcrx_rqe *)((char *)ring_ptr + reg.offsets.rqes);
+ rq_ring.rq_tail = 0;
+ rq_ring.ring_entries = reg.rq_entries;
+
+ area_token = area_reg.rq_area_token;
+}
+
+static void add_accept(struct io_uring *ring, int sockfd)
+{
+ struct io_uring_sqe *sqe;
+
+ sqe = io_uring_get_sqe(ring);
+
+ io_uring_prep_accept(sqe, sockfd, NULL, NULL, 0);
+ sqe->user_data = 1;
+}
+
+static void add_recvzc(struct io_uring *ring, int sockfd)
+{
+ struct io_uring_sqe *sqe;
+
+ sqe = io_uring_get_sqe(ring);
+
+ io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, 0, 0);
+ sqe->ioprio |= IORING_RECV_MULTISHOT;
+ sqe->user_data = 2;
+}
+
+static void add_recvzc_oneshot(struct io_uring *ring, int sockfd, size_t len)
+{
+ struct io_uring_sqe *sqe;
+
+ sqe = io_uring_get_sqe(ring);
+
+ io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, len, 0);
+ sqe->ioprio |= IORING_RECV_MULTISHOT;
+ sqe->user_data = 2;
+}
+
+static void process_accept(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+ if (cqe->res < 0)
+ error(1, 0, "accept()");
+ if (connfd)
+ error(1, 0, "Unexpected second connection");
+
+ connfd = cqe->res;
+ if (cfg_oneshot)
+ add_recvzc_oneshot(ring, connfd, PAGE_SIZE);
+ else
+ add_recvzc(ring, connfd);
+}
+
+static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+ unsigned rq_mask = rq_ring.ring_entries - 1;
+ struct io_uring_zcrx_cqe *rcqe;
+ struct io_uring_zcrx_rqe *rqe;
+ struct io_uring_sqe *sqe;
+ uint64_t mask;
+ char *data;
+ ssize_t n;
+ int i;
+
+ if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs == 0) {
+ stop = true;
+ return;
+ }
+
+ if (cqe->res < 0)
+ error(1, 0, "recvzc(): %d", cqe->res);
+
+ if (cfg_oneshot) {
+ if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs) {
+ add_recvzc_oneshot(ring, connfd, PAGE_SIZE);
+ cfg_oneshot_recvs--;
+ }
+ } else if (!(cqe->flags & IORING_CQE_F_MORE)) {
+ add_recvzc(ring, connfd);
+ }
+
+ rcqe = (struct io_uring_zcrx_cqe *)(cqe + 1);
+
+ n = cqe->res;
+ mask = (1ULL << IORING_ZCRX_AREA_SHIFT) - 1;
+ data = (char *)area_ptr + (rcqe->off & mask);
+
+ for (i = 0; i < n; i++) {
+ if (*(data + i) != payload[(received + i)])
+ error(1, 0, "payload mismatch at ", i);
+ }
+ received += n;
+
+ rqe = &rq_ring.rqes[(rq_ring.rq_tail & rq_mask)];
+ rqe->off = (rcqe->off & ~IORING_ZCRX_AREA_MASK) | area_token;
+ rqe->len = cqe->res;
+ io_uring_smp_store_release(rq_ring.ktail, ++rq_ring.rq_tail);
+}
+
+static void server_loop(struct io_uring *ring)
+{
+ struct io_uring_cqe *cqe;
+ unsigned int count = 0;
+ unsigned int head;
+ int i, ret;
+
+ io_uring_submit_and_wait(ring, 1);
+
+ io_uring_for_each_cqe(ring, head, cqe) {
+ if (cqe->user_data == 1)
+ process_accept(ring, cqe);
+ else if (cqe->user_data == 2)
+ process_recvzc(ring, cqe);
+ else
+ error(1, 0, "unknown cqe");
+ count++;
+ }
+ io_uring_cq_advance(ring, count);
+}
+
+static void run_server(void)
+{
+ unsigned int flags = 0;
+ struct io_uring ring;
+ int fd, enable, ret;
+ uint64_t tstop;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1)
+ error(1, 0, "socket()");
+
+ enable = 1;
+ ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int));
+ if (ret < 0)
+ error(1, 0, "setsockopt(SO_REUSEADDR)");
+
+ ret = bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr));
+ if (ret < 0)
+ error(1, 0, "bind()");
+
+ if (listen(fd, 1024) < 0)
+ error(1, 0, "listen()");
+
+ flags |= IORING_SETUP_COOP_TASKRUN;
+ flags |= IORING_SETUP_SINGLE_ISSUER;
+ flags |= IORING_SETUP_DEFER_TASKRUN;
+ flags |= IORING_SETUP_SUBMIT_ALL;
+ flags |= IORING_SETUP_CQE32;
+
+ io_uring_queue_init(512, &ring, flags);
+
+ setup_zcrx(&ring);
+
+ add_accept(&ring, fd);
+
+ tstop = gettimeofday_ms() + 5000;
+ while (!stop && gettimeofday_ms() < tstop)
+ server_loop(&ring);
+
+ if (!stop)
+ error(1, 0, "test failed\n");
+}
+
+static void run_client(void)
+{
+ ssize_t to_send = cfg_send_size;
+ ssize_t sent = 0;
+ ssize_t chunk, res;
+ int fd;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1)
+ error(1, 0, "socket()");
+
+ if (connect(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)))
+ error(1, 0, "connect()");
+
+ while (to_send) {
+ void *src = &payload[sent];
+
+ chunk = min_t(ssize_t, cfg_payload_len, to_send);
+ res = send(fd, src, chunk, 0);
+ if (res < 0)
+ error(1, 0, "send(): %d", sent);
+ sent += res;
+ to_send -= res;
+ }
+
+ close(fd);
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s (-4|-6) (-s|-c) -h<server_ip> -p<port> "
+ "-l<payload_size> -i<ifname> -q<rxq_id>", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ const int max_payload_len = sizeof(payload) -
+ sizeof(struct ipv6hdr) -
+ sizeof(struct tcphdr) -
+ 40 /* max tcp options */;
+ struct sockaddr_in6 *addr6 = (void *) &cfg_addr;
+ char *addr = NULL;
+ int ret;
+ int c;
+
+ if (argc <= 1)
+ usage(argv[0]);
+ cfg_payload_len = max_payload_len;
+
+ while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:")) != -1) {
+ switch (c) {
+ case 's':
+ if (cfg_client)
+ error(1, 0, "Pass one of -s or -c");
+ cfg_server = 1;
+ break;
+ case 'c':
+ if (cfg_server)
+ error(1, 0, "Pass one of -s or -c");
+ cfg_client = 1;
+ break;
+ case 'h':
+ addr = optarg;
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 'l':
+ cfg_payload_len = strtoul(optarg, NULL, 0);
+ break;
+ case 'i':
+ cfg_ifname = optarg;
+ break;
+ case 'q':
+ cfg_queue_id = strtoul(optarg, NULL, 0);
+ break;
+ case 'o': {
+ cfg_oneshot = true;
+ cfg_oneshot_recvs = strtoul(optarg, NULL, 0);
+ break;
+ }
+ case 'z':
+ cfg_send_size = strtoul(optarg, NULL, 0);
+ break;
+ }
+ }
+
+ if (cfg_server && addr)
+ error(1, 0, "Receiver cannot have -h specified");
+
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ addr6->sin6_addr = in6addr_any;
+ if (addr) {
+ ret = parse_address(addr, cfg_port, addr6);
+ if (ret)
+ error(1, 0, "receiver address parse error: %s", addr);
+ }
+
+ if (cfg_payload_len > max_payload_len)
+ error(1, 0, "-l: payload exceeds max (%d)", max_payload_len);
+}
+
+int main(int argc, char **argv)
+{
+ const char *cfg_test = argv[argc - 1];
+ int i;
+
+ parse_opts(argc, argv);
+
+ for (i = 0; i < SEND_SIZE; i++)
+ payload[i] = 'a' + (i % 26);
+
+ if (cfg_server)
+ run_server();
+ else if (cfg_client)
+ run_client();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
new file mode 100755
index 000000000000..d301d9b356f7
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import re
+from os import path
+from lib.py import ksft_run, ksft_exit
+from lib.py import NetDrvEpEnv
+from lib.py import bkg, cmd, ethtool, wait_port_listen
+
+
+def _get_rx_ring_entries(cfg):
+ output = ethtool(f"-g {cfg.ifname}", host=cfg.remote).stdout
+ values = re.findall(r'RX:\s+(\d+)', output)
+ return int(values[1])
+
+
+def _get_combined_channels(cfg):
+ output = ethtool(f"-l {cfg.ifname}", host=cfg.remote).stdout
+ values = re.findall(r'Combined:\s+(\d+)', output)
+ return int(values[1])
+
+
+def _set_flow_rule(cfg, chan):
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port 9999 action {chan}", host=cfg.remote).stdout
+ values = re.search(r'ID (\d+)', output).group(1)
+ return int(values)
+
+
+def test_zcrx(cfg) -> None:
+ cfg.require_v6()
+
+ combined_chans = _get_combined_channels(cfg)
+ if combined_chans < 2:
+ raise KsftSkipEx('at least 2 combined channels required')
+ rx_ring = _get_rx_ring_entries(cfg)
+
+ try:
+ ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
+ flow_rule_id = _set_flow_rule(cfg, combined_chans - 1)
+
+ rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_v6} -p 9999 -l 12840"
+ with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+ wait_port_listen(9999, proto="tcp", host=cfg.remote)
+ cmd(tx_cmd)
+ finally:
+ ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+ ethtool(f"-X {cfg.ifname} default", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+
+
+def test_zcrx_oneshot(cfg) -> None:
+ cfg.require_v6()
+
+ combined_chans = _get_combined_channels(cfg)
+ if combined_chans < 2:
+ raise KsftSkipEx('at least 2 combined channels required')
+ rx_ring = _get_rx_ring_entries(cfg)
+
+ try:
+ ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
+ flow_rule_id = _set_flow_rule(cfg, combined_chans - 1)
+
+ rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1} -o 4"
+ tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_v6} -p 9999 -l 4096 -z 16384"
+ with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+ wait_port_listen(9999, proto="tcp", host=cfg.remote)
+ cmd(tx_cmd)
+ finally:
+ ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+ ethtool(f"-X {cfg.ifname} default", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx")
+ cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+ ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/irq.py b/tools/testing/selftests/drivers/net/hw/irq.py
new file mode 100755
index 000000000000..42ab98370245
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/irq.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_ge, ksft_eq
+from lib.py import KsftSkipEx
+from lib.py import ksft_disruptive
+from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import NetDrvEnv
+from lib.py import cmd, ip, defer
+
+
+def read_affinity(irq) -> str:
+ with open(f'/proc/irq/{irq}/smp_affinity', 'r') as fp:
+ return fp.read().lstrip("0,").strip()
+
+
+def write_affinity(irq, what) -> str:
+ if what != read_affinity(irq):
+ with open(f'/proc/irq/{irq}/smp_affinity', 'w') as fp:
+ fp.write(what)
+
+
+def check_irqs_reported(cfg) -> None:
+ """ Check that device reports IRQs for NAPI instances """
+ napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True)
+ irqs = sum(['irq' in x for x in napis])
+
+ ksft_ge(irqs, 1)
+ ksft_eq(irqs, len(napis))
+
+
+def _check_reconfig(cfg, reconfig_cb) -> None:
+ napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True)
+ for n in reversed(napis):
+ if 'irq' in n:
+ break
+ else:
+ raise KsftSkipEx(f"Device has no NAPI with IRQ attribute (#napis: {len(napis)}")
+
+ old = read_affinity(n['irq'])
+ # pick an affinity that's not the current one
+ new = "3" if old != "3" else "5"
+ write_affinity(n['irq'], new)
+ defer(write_affinity, n['irq'], old)
+
+ reconfig_cb(cfg)
+
+ ksft_eq(read_affinity(n['irq']), new, comment="IRQ affinity changed after reconfig")
+
+
+def check_reconfig_queues(cfg) -> None:
+ def reconfig(cfg) -> None:
+ channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ if channels['combined-count'] == 0:
+ rx_type = 'rx'
+ else:
+ rx_type = 'combined'
+ cur_queue_cnt = channels[f'{rx_type}-count']
+ max_queue_cnt = channels[f'{rx_type}-max']
+
+ cmd(f"ethtool -L {cfg.ifname} {rx_type} 1")
+ cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}")
+ cmd(f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}")
+
+ _check_reconfig(cfg, reconfig)
+
+
+def check_reconfig_xdp(cfg) -> None:
+ def reconfig(cfg) -> None:
+ ip(f"link set dev %s xdp obj %s sec xdp" %
+ (cfg.ifname, cfg.rpath("xdp_dummy.bpf.o")))
+ ip(f"link set dev %s xdp off" % cfg.ifname)
+
+ _check_reconfig(cfg, reconfig)
+
+
+@ksft_disruptive
+def check_down(cfg) -> None:
+ def reconfig(cfg) -> None:
+ ip("link set dev %s down" % cfg.ifname)
+ ip("link set dev %s up" % cfg.ifname)
+
+ _check_reconfig(cfg, reconfig)
+
+
+def main() -> None:
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netnl = NetdevFamily()
+
+ ksft_run([check_irqs_reported, check_reconfig_queues,
+ check_reconfig_xdp, check_down],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
index 19a6969643f4..2bf14ac2b8c6 100644
--- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -50,7 +50,6 @@
#include <linux/memfd.h>
#include <linux/dma-buf.h>
#include <linux/udmabuf.h>
-#include <libmnl/libmnl.h>
#include <linux/types.h>
#include <linux/netlink.h>
#include <linux/genetlink.h>
diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
index ca8a7edff3dd..ca60ae325c22 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
@@ -4,7 +4,8 @@
import datetime
import random
import re
-from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt, ksft_true
+from lib.py import ksft_run, ksft_pr, ksft_exit
+from lib.py import ksft_eq, ksft_ne, ksft_ge, ksft_in, ksft_lt, ksft_true, ksft_raises
from lib.py import NetDrvEpEnv
from lib.py import EthtoolFamily, NetdevFamily
from lib.py import KsftSkipEx, KsftFailEx
@@ -58,6 +59,14 @@ def require_ntuple(cfg):
raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"]))
+def require_context_cnt(cfg, need_cnt):
+ # There's no good API to get the context count, so the tests
+ # which try to add a lot opportunisitically set the count they
+ # discovered. Careful with test ordering!
+ if need_cnt and cfg.context_cnt and cfg.context_cnt < need_cnt:
+ raise KsftSkipEx(f"Test requires at least {need_cnt} contexts, but device only has {cfg.context_cnt}")
+
+
# Get Rx packet counts for all queues, as a simple list of integers
# if @prev is specified the prev counts will be subtracted
def _get_rx_cnts(cfg, prev=None):
@@ -252,6 +261,7 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True):
try:
# this targets queue 4, which doesn't exist
ntuple2 = ethtool_create(cfg, "-N", flow)
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}")
except CmdExitFailure:
pass
else:
@@ -259,7 +269,13 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True):
# change the table to target queues 0 and 2
ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 1 0")
# ntuple rule therefore targets queues 1 and 3
- ntuple2 = ethtool_create(cfg, "-N", flow)
+ try:
+ ntuple2 = ethtool_create(cfg, "-N", flow)
+ except CmdExitFailure:
+ ksft_pr("Driver does not support rss + queue offset")
+ return
+
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}")
# should replace existing filter
ksft_eq(ntuple, ntuple2)
_send_traffic_check(cfg, port, ctx_ref, { 'target': (1, 3),
@@ -376,7 +392,7 @@ def test_rss_context_dump(cfg):
# Sanity-check the results
for data in ctxs:
- ksft_ne(set(data['indir']), {0}, "indir table is all zero")
+ ksft_ne(set(data.get('indir', [1])), {0}, "indir table is all zero")
ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero")
# More specific checks
@@ -449,6 +465,8 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None):
raise
ksft_pr(f"Failed to create context {i + 1}, trying to test what we got")
ctx_cnt = i
+ if cfg.context_cnt is None:
+ cfg.context_cnt = ctx_cnt
break
_rss_key_check(cfg, context=ctx_id)
@@ -504,8 +522,7 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4):
"""
require_ntuple(cfg)
-
- requested_ctx_cnt = ctx_cnt
+ require_context_cnt(cfg, 4)
# Try to allocate more queues when necessary
qcnt = len(_get_rx_cnts(cfg))
@@ -570,9 +587,6 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4):
remove_ctx(-1)
check_traffic()
- if requested_ctx_cnt != ctx_cnt:
- raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}")
-
def test_rss_context_overlap(cfg, other_ctx=0):
"""
@@ -581,6 +595,8 @@ def test_rss_context_overlap(cfg, other_ctx=0):
"""
require_ntuple(cfg)
+ if other_ctx:
+ require_context_cnt(cfg, 2)
queue_cnt = len(_get_rx_cnts(cfg))
if queue_cnt < 4:
@@ -642,6 +658,29 @@ def test_rss_context_overlap2(cfg):
test_rss_context_overlap(cfg, True)
+def test_flow_add_context_missing(cfg):
+ """
+ Test that we are not allowed to add a rule pointing to an RSS context
+ which was never created.
+ """
+
+ require_ntuple(cfg)
+
+ # Find a context which doesn't exist
+ for ctx_id in range(1, 100):
+ try:
+ get_rss(cfg, context=ctx_id)
+ except CmdExitFailure:
+ break
+
+ with ksft_raises(CmdExitFailure) as cm:
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port 1234 context {ctx_id}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ ethtool(f"-N {cfg.ifname} delete {ntuple_id}")
+ if cm.exception:
+ ksft_in('Invalid argument', cm.exception.cmd.stderr)
+
+
def test_delete_rss_context_busy(cfg):
"""
Test that deletion returns -EBUSY when an rss context is being used
@@ -710,6 +749,7 @@ def test_rss_ntuple_addition(cfg):
def main() -> None:
with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.context_cnt = None
cfg.ethnl = EthtoolFamily()
cfg.netdevnl = NetdevFamily()
@@ -719,6 +759,7 @@ def main() -> None:
test_rss_context_dump, test_rss_context_queue_reconfigure,
test_rss_context_overlap, test_rss_context_overlap2,
test_rss_context_out_of_order, test_rss_context4_create_with_cfg,
+ test_flow_add_context_missing,
test_delete_rss_context_busy, test_rss_ntuple_addition],
args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
new file mode 100755
index 000000000000..53bb08cc29ec
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import multiprocessing
+import socket
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, cmd, fd_read_timeout
+from lib.py import NetDrvEpEnv
+from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import rand_port
+
+
+def traffic(cfg, local_port, remote_port, ipver):
+ af_inet = socket.AF_INET if ipver == "4" else socket.AF_INET6
+ sock = socket.socket(af_inet, socket.SOCK_DGRAM)
+ sock.bind(("", local_port))
+ sock.connect((cfg.remote_addr_v[ipver], remote_port))
+ tgt = f"{ipver}:[{cfg.addr_v[ipver]}]:{local_port},sourceport={remote_port}"
+ cmd("echo a | socat - UDP" + tgt, host=cfg.remote)
+ fd_read_timeout(sock.fileno(), 5)
+ return sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU)
+
+
+def test_rss_input_xfrm(cfg, ipver):
+ """
+ Test symmetric input_xfrm.
+ If symmetric RSS hash is configured, send traffic twice, swapping the
+ src/dst UDP ports, and verify that the same queue is receiving the traffic
+ in both cases (IPs are constant).
+ """
+
+ if multiprocessing.cpu_count() < 2:
+ raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash")
+
+ input_xfrm = cfg.ethnl.rss_get(
+ {'header': {'dev-name': cfg.ifname}}).get('input_xfrm')
+
+ # Check for symmetric xor/or-xor
+ if not input_xfrm or (input_xfrm != 1 and input_xfrm != 2):
+ raise KsftSkipEx("Symmetric RSS hash not requested")
+
+ cpus = set()
+ successful = 0
+ for _ in range(100):
+ try:
+ port1 = rand_port(socket.SOCK_DGRAM)
+ port2 = rand_port(socket.SOCK_DGRAM)
+ cpu1 = traffic(cfg, port1, port2, ipver)
+ cpu2 = traffic(cfg, port2, port1, ipver)
+ cpus.update([cpu1, cpu2])
+ ksft_eq(
+ cpu1, cpu2, comment=f"Received traffic on different cpus with ports ({port1 = }, {port2 = }) while symmetric hash is configured")
+
+ successful += 1
+ if successful == 10:
+ break
+ except:
+ continue
+ else:
+ raise KsftFailEx("Failed to run traffic")
+
+ ksft_ge(len(cpus), 2,
+ comment=f"Received traffic on less than two cpus {cpus = }")
+
+
+def test_rss_input_xfrm_ipv4(cfg):
+ cfg.require_ipver("4")
+ test_rss_input_xfrm(cfg, "4")
+
+
+def test_rss_input_xfrm_ipv6(cfg):
+ cfg.require_ipver("6")
+ test_rss_input_xfrm(cfg, "6")
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netdevnl = NetdevFamily()
+
+ ksft_run([test_rss_input_xfrm_ipv4, test_rss_input_xfrm_ipv6],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py
new file mode 100755
index 000000000000..e1ecb92f79d9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/tso.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Run the tools/testing/selftests/net/csum testsuite."""
+
+import fcntl
+import socket
+import struct
+import termios
+import time
+
+from lib.py import ksft_pr, ksft_run, ksft_exit, KsftSkipEx, KsftXfailEx
+from lib.py import ksft_eq, ksft_ge, ksft_lt
+from lib.py import EthtoolFamily, NetdevFamily, NetDrvEpEnv
+from lib.py import bkg, cmd, defer, ethtool, ip, rand_port, wait_port_listen
+
+
+def sock_wait_drain(sock, max_wait=1000):
+ """Wait for all pending write data on the socket to get ACKed."""
+ for _ in range(max_wait):
+ one = b'\0' * 4
+ outq = fcntl.ioctl(sock.fileno(), termios.TIOCOUTQ, one)
+ outq = struct.unpack("I", outq)[0]
+ if outq == 0:
+ break
+ time.sleep(0.01)
+ ksft_eq(outq, 0)
+
+
+def tcp_sock_get_retrans(sock):
+ """Get the number of retransmissions for the TCP socket."""
+ info = sock.getsockopt(socket.SOL_TCP, socket.TCP_INFO, 512)
+ return struct.unpack("I", info[100:104])[0]
+
+
+def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso):
+ cfg.require_cmd("socat", remote=True)
+
+ port = rand_port()
+ listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof"
+
+ with bkg(listen_cmd, host=cfg.remote) as nc:
+ wait_port_listen(port, host=cfg.remote)
+
+ if ipver == "4":
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.connect((remote_v4, port))
+ else:
+ sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+ sock.connect((remote_v6, port))
+
+ # Small send to make sure the connection is working.
+ sock.send("ping".encode())
+ sock_wait_drain(sock)
+
+ # Send 4MB of data, record the LSO packet count.
+ qstat_old = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+ buf = b"0" * 1024 * 1024 * 4
+ sock.send(buf)
+ sock_wait_drain(sock)
+ qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ # No math behind the 10 here, but try to catch cases where
+ # TCP falls back to non-LSO.
+ ksft_lt(tcp_sock_get_retrans(sock), 10)
+ sock.close()
+
+ # Check that at least 90% of the data was sent as LSO packets.
+ # System noise may cause false negatives. Also header overheads
+ # will add up to 5% of extra packes... The check is best effort.
+ total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"]
+ total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"]
+ if should_lso:
+ if cfg.have_stat_super_count:
+ ksft_ge(qstat_new['tx-hw-gso-packets'] -
+ qstat_old['tx-hw-gso-packets'],
+ total_lso_super,
+ comment="Number of LSO super-packets with LSO enabled")
+ if cfg.have_stat_wire_count:
+ ksft_ge(qstat_new['tx-hw-gso-wire-packets'] -
+ qstat_old['tx-hw-gso-wire-packets'],
+ total_lso_wire,
+ comment="Number of LSO wire-packets with LSO enabled")
+ else:
+ if cfg.have_stat_super_count:
+ ksft_lt(qstat_new['tx-hw-gso-packets'] -
+ qstat_old['tx-hw-gso-packets'],
+ 15, comment="Number of LSO super-packets with LSO disabled")
+ if cfg.have_stat_wire_count:
+ ksft_lt(qstat_new['tx-hw-gso-wire-packets'] -
+ qstat_old['tx-hw-gso-wire-packets'],
+ 500, comment="Number of LSO wire-packets with LSO disabled")
+
+
+def build_tunnel(cfg, outer_ipver, tun_info):
+ local_v4 = NetDrvEpEnv.nsim_v4_pfx + "1"
+ local_v6 = NetDrvEpEnv.nsim_v6_pfx + "1"
+ remote_v4 = NetDrvEpEnv.nsim_v4_pfx + "2"
+ remote_v6 = NetDrvEpEnv.nsim_v6_pfx + "2"
+
+ local_addr = cfg.addr_v[outer_ipver]
+ remote_addr = cfg.remote_addr_v[outer_ipver]
+
+ tun_type = tun_info[0]
+ tun_arg = tun_info[2]
+ ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}")
+ defer(ip, f"link del {tun_type}-ksft")
+ ip(f"link set dev {tun_type}-ksft up")
+ ip(f"addr add {local_v4}/24 dev {tun_type}-ksft")
+ ip(f"addr add {local_v6}/64 dev {tun_type}-ksft")
+
+ ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}",
+ host=cfg.remote)
+ defer(ip, f"link del {tun_type}-ksft", host=cfg.remote)
+ ip(f"link set dev {tun_type}-ksft up", host=cfg.remote)
+ ip(f"addr add {remote_v4}/24 dev {tun_type}-ksft", host=cfg.remote)
+ ip(f"addr add {remote_v6}/64 dev {tun_type}-ksft", host=cfg.remote)
+
+ return remote_v4, remote_v6
+
+
+def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None):
+ """Construct specific tests from the common template."""
+ def f(cfg):
+ cfg.require_ipver(outer_ipver)
+
+ if not cfg.have_stat_super_count and \
+ not cfg.have_stat_wire_count:
+ raise KsftSkipEx(f"Device does not support LSO queue stats")
+
+ ipver = outer_ipver
+ if tun:
+ remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun)
+ ipver = inner_ipver
+ else:
+ remote_v4 = cfg.remote_addr_v["4"]
+ remote_v6 = cfg.remote_addr_v["6"]
+
+ tun_partial = tun and tun[1]
+ # Tunnel which can silently fall back to gso-partial
+ has_gso_partial = tun and 'tx-gso-partial' in cfg.features
+
+ # For TSO4 via partial we need mangleid
+ if ipver == "4" and feature in cfg.partial_features:
+ ksft_pr("Testing with mangleid enabled")
+ if 'tx-tcp-mangleid-segmentation' not in cfg.features:
+ ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on")
+ defer(ethtool, f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off")
+
+ # First test without the feature enabled.
+ ethtool(f"-K {cfg.ifname} {feature} off")
+ if has_gso_partial:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial off")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False)
+
+ # Now test with the feature enabled.
+ # For compatible tunnels only - just GSO partial, not specific feature.
+ if has_gso_partial:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial on")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6,
+ should_lso=tun_partial)
+
+ # Full feature enabled.
+ if feature in cfg.features:
+ ethtool(f"-K {cfg.ifname} {feature} on")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True)
+ else:
+ raise KsftXfailEx(f"Device does not support {feature}")
+
+ f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver
+ return f
+
+
+def query_nic_features(cfg) -> None:
+ """Query and cache the NIC features."""
+ cfg.have_stat_super_count = False
+ cfg.have_stat_wire_count = False
+
+ cfg.features = set()
+ features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+ for f in features["active"]["bits"]["bit"]:
+ cfg.features.add(f["name"])
+
+ # Check which features are supported via GSO partial
+ cfg.partial_features = set()
+ if 'tx-gso-partial' in cfg.features:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial off")
+
+ no_partial = set()
+ features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+ for f in features["active"]["bits"]["bit"]:
+ no_partial.add(f["name"])
+ cfg.partial_features = cfg.features - no_partial
+ ethtool(f"-K {cfg.ifname} tx-gso-partial on")
+
+ stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)
+ if stats:
+ if 'tx-hw-gso-packets' in stats[0]:
+ ksft_pr("Detected qstat for LSO super-packets")
+ cfg.have_stat_super_count = True
+ if 'tx-hw-gso-wire-packets' in stats[0]:
+ ksft_pr("Detected qstat for LSO wire-packets")
+ cfg.have_stat_wire_count = True
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netnl = NetdevFamily()
+
+ query_nic_features(cfg)
+
+ test_info = (
+ # name, v4/v6 ethtool_feature tun:(type, partial, args)
+ ("", "4", "tx-tcp-segmentation", None),
+ ("", "6", "tx-tcp6-segmentation", None),
+ ("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")),
+ ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")),
+ ("gre", "4", "tx-gre-segmentation", ("ipgre", False, "")),
+ ("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")),
+ )
+
+ cases = []
+ for outer_ipver in ["4", "6"]:
+ for info in test_info:
+ # Skip if test which only works for a specific IP version
+ if info[1] and outer_ipver != info[1]:
+ continue
+
+ cases.append(test_builder(info[0], cfg, outer_ipver, info[2],
+ tun=info[3], inner_ipver="4"))
+ if info[3]:
+ cases.append(test_builder(info[0], cfg, outer_ipver, info[2],
+ tun=info[3], inner_ipver="6"))
+
+ ksft_run(cases=cases, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c b/tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c
new file mode 100644
index 000000000000..d988b2e0cee8
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define KBUILD_MODNAME "xdp_dummy"
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index 987e452d3a45..fd4d674e6c72 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -10,42 +10,68 @@ from lib.py import NetNS, NetdevSimDev
from .remote import Remote
-def _load_env_file(src_path):
- env = os.environ.copy()
+class NetDrvEnvBase:
+ """
+ Base class for a NIC / host envirnoments
+ """
+ def __init__(self, src_path):
+ self.src_path = src_path
+ self.env = self._load_env_file()
- src_dir = Path(src_path).parent.resolve()
- if not (src_dir / "net.config").exists():
+ def rpath(self, path):
+ """
+ Get an absolute path to a file based on a path relative to the directory
+ containing the test which constructed env.
+
+ For example, if the test.py is in the same directory as
+ a binary (built from helper.c), the test can use env.rpath("helper")
+ to get the absolute path to the binary
+ """
+ src_dir = Path(self.src_path).parent.resolve()
+ return (src_dir / path).as_posix()
+
+ def _load_env_file(self):
+ env = os.environ.copy()
+
+ src_dir = Path(self.src_path).parent.resolve()
+ if not (src_dir / "net.config").exists():
+ return ksft_setup(env)
+
+ with open((src_dir / "net.config").as_posix(), 'r') as fp:
+ for line in fp.readlines():
+ full_file = line
+ # Strip comments
+ pos = line.find("#")
+ if pos >= 0:
+ line = line[:pos]
+ line = line.strip()
+ if not line:
+ continue
+ pair = line.split('=', maxsplit=1)
+ if len(pair) != 2:
+ raise Exception("Can't parse configuration line:", full_file)
+ env[pair[0]] = pair[1]
return ksft_setup(env)
- with open((src_dir / "net.config").as_posix(), 'r') as fp:
- for line in fp.readlines():
- full_file = line
- # Strip comments
- pos = line.find("#")
- if pos >= 0:
- line = line[:pos]
- line = line.strip()
- if not line:
- continue
- pair = line.split('=', maxsplit=1)
- if len(pair) != 2:
- raise Exception("Can't parse configuration line:", full_file)
- env[pair[0]] = pair[1]
- return ksft_setup(env)
-
-
-class NetDrvEnv:
+
+class NetDrvEnv(NetDrvEnvBase):
"""
Class for a single NIC / host env, with no remote end
"""
- def __init__(self, src_path, **kwargs):
- self._ns = None
+ def __init__(self, src_path, nsim_test=None, **kwargs):
+ super().__init__(src_path)
- self.env = _load_env_file(src_path)
+ self._ns = None
if 'NETIF' in self.env:
- self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0]
+ if nsim_test is True:
+ raise KsftXfailEx("Test only works on netdevsim")
+
+ self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0]
else:
+ if nsim_test is False:
+ raise KsftXfailEx("Test does not work on netdevsim")
+
self._ns = NetdevSimDev(**kwargs)
self.dev = self._ns.nsims[0].dev
self.ifname = self.dev['ifname']
@@ -68,7 +94,7 @@ class NetDrvEnv:
self._ns = None
-class NetDrvEpEnv:
+class NetDrvEpEnv(NetDrvEnvBase):
"""
Class for an environment with a local device and "remote endpoint"
which can be used to send traffic in.
@@ -82,8 +108,7 @@ class NetDrvEpEnv:
nsim_v6_pfx = "2001:db8::"
def __init__(self, src_path, nsim_test=None):
-
- self.env = _load_env_file(src_path)
+ super().__init__(src_path)
self._stats_settle_time = None
@@ -94,17 +119,20 @@ class NetDrvEpEnv:
self._ns = None
self._ns_peer = None
+ self.addr_v = { "4": None, "6": None }
+ self.remote_addr_v = { "4": None, "6": None }
+
if "NETIF" in self.env:
if nsim_test is True:
raise KsftXfailEx("Test only works on netdevsim")
self._check_env()
- self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0]
+ self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0]
- self.v4 = self.env.get("LOCAL_V4")
- self.v6 = self.env.get("LOCAL_V6")
- self.remote_v4 = self.env.get("REMOTE_V4")
- self.remote_v6 = self.env.get("REMOTE_V6")
+ self.addr_v["4"] = self.env.get("LOCAL_V4")
+ self.addr_v["6"] = self.env.get("LOCAL_V6")
+ self.remote_addr_v["4"] = self.env.get("REMOTE_V4")
+ self.remote_addr_v["6"] = self.env.get("REMOTE_V6")
kind = self.env["REMOTE_TYPE"]
args = self.env["REMOTE_ARGS"]
else:
@@ -115,26 +143,29 @@ class NetDrvEpEnv:
self.dev = self._ns.nsims[0].dev
- self.v4 = self.nsim_v4_pfx + "1"
- self.v6 = self.nsim_v6_pfx + "1"
- self.remote_v4 = self.nsim_v4_pfx + "2"
- self.remote_v6 = self.nsim_v6_pfx + "2"
+ self.addr_v["4"] = self.nsim_v4_pfx + "1"
+ self.addr_v["6"] = self.nsim_v6_pfx + "1"
+ self.remote_addr_v["4"] = self.nsim_v4_pfx + "2"
+ self.remote_addr_v["6"] = self.nsim_v6_pfx + "2"
kind = "netns"
args = self._netns.name
self.remote = Remote(kind, args, src_path)
- self.addr = self.v6 if self.v6 else self.v4
- self.remote_addr = self.remote_v6 if self.remote_v6 else self.remote_v4
+ self.addr_ipver = "6" if self.addr_v["6"] else "4"
+ self.addr = self.addr_v[self.addr_ipver]
+ self.remote_addr = self.remote_addr_v[self.addr_ipver]
- self.addr_ipver = "6" if self.v6 else "4"
# Bracketed addresses, some commands need IPv6 to be inside []
- self.baddr = f"[{self.v6}]" if self.v6 else self.v4
- self.remote_baddr = f"[{self.remote_v6}]" if self.remote_v6 else self.remote_v4
+ self.baddr = f"[{self.addr_v['6']}]" if self.addr_v["6"] else self.addr_v["4"]
+ self.remote_baddr = f"[{self.remote_addr_v['6']}]" if self.remote_addr_v["6"] else self.remote_addr_v["4"]
self.ifname = self.dev['ifname']
self.ifindex = self.dev['ifindex']
+ # resolve remote interface name
+ self.remote_ifname = self.resolve_remote_ifc()
+
self._required_cmd = {}
def create_local(self):
@@ -181,6 +212,18 @@ class NetDrvEpEnv:
raise Exception("Invalid environment, missing configuration:", missing,
"Please see tools/testing/selftests/drivers/net/README.rst")
+ def resolve_remote_ifc(self):
+ v4 = v6 = None
+ if self.remote_addr_v["4"]:
+ v4 = ip("addr show to " + self.remote_addr_v["4"], json=True, host=self.remote)
+ if self.remote_addr_v["6"]:
+ v6 = ip("addr show to " + self.remote_addr_v["6"], json=True, host=self.remote)
+ if v4 and v6 and v4[0]["ifname"] != v6[0]["ifname"]:
+ raise Exception("Can't resolve remote interface name, v4 and v6 don't match")
+ if (v4 and len(v4) > 1) or (v6 and len(v6) > 1):
+ raise Exception("Can't resolve remote interface name, multiple interfaces match")
+ return v6[0]["ifname"] if v6 else v4[0]["ifname"]
+
def __enter__(self):
return self
@@ -204,13 +247,9 @@ class NetDrvEpEnv:
del self.remote
self.remote = None
- def require_v4(self):
- if not self.v4 or not self.remote_v4:
- raise KsftSkipEx("Test requires IPv4 connectivity")
-
- def require_v6(self):
- if not self.v6 or not self.remote_v6:
- raise KsftSkipEx("Test requires IPv6 connectivity")
+ def require_ipver(self, ipver):
+ if not self.addr_v[ipver] or not self.remote_addr_v[ipver]:
+ raise KsftSkipEx(f"Test requires IPv{ipver} connectivity")
def _require_cmd(self, comm, key, host=None):
cached = self._required_cmd.get(comm, {})
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
index 3acaba41ac7b..3c96b022954d 100644
--- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -110,6 +110,13 @@ function create_dynamic_target() {
echo 1 > "${NETCONS_PATH}"/enabled
}
+# Do not append the release to the header of the message
+function disable_release_append() {
+ echo 0 > "${NETCONS_PATH}"/enabled
+ echo 0 > "${NETCONS_PATH}"/release
+ echo 1 > "${NETCONS_PATH}"/enabled
+}
+
function cleanup() {
local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
@@ -223,3 +230,20 @@ function check_for_dependencies() {
exit "${ksft_skip}"
fi
}
+
+function check_for_taskset() {
+ if ! which taskset > /dev/null ; then
+ echo "SKIP: taskset(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+}
+
+# This is necessary if running multiple tests in a row
+function pkill_socat() {
+ PROCESS_NAME="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}"
+ # socat runs under timeout(1), kill it if it is still alive
+ # do not fail if socat doesn't exist anymore
+ set +e
+ pkill -f "${PROCESS_NAME}"
+ set -e
+}
diff --git a/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh
new file mode 100755
index 000000000000..4a71e01a230c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh
@@ -0,0 +1,122 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test netconsole's message fragmentation functionality.
+#
+# When a message exceeds the maximum packet size, netconsole splits it into
+# multiple fragments for transmission. This test verifies:
+# - Correct fragmentation of large messages
+# - Proper reassembly of fragments at the receiver
+# - Preservation of userdata across fragments
+# - Behavior with and without kernel release version appending
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# set userdata to a long value. In this case, it is "1-2-3-4...50-"
+USERDATA_VALUE=$(printf -- '%.2s-' {1..60})
+
+# Convert the header string in a regexp, so, we can remove
+# the second header as well.
+# A header looks like "13,468,514729715,-,ncfrag=0/1135;". If
+# release is appended, you might find something like:L
+# "6.13.0-04048-g4f561a87745a,13,468,514729715,-,ncfrag=0/1135;"
+function header_to_regex() {
+ # header is everything before ;
+ local HEADER="${1}"
+ REGEX=$(echo "${HEADER}" | cut -d'=' -f1)
+ echo "${REGEX}=[0-9]*\/[0-9]*;"
+}
+
+# We have two headers in the message. Remove both to get the full message,
+# and extract the full message.
+function extract_msg() {
+ local MSGFILE="${1}"
+ # Extract the header, which is the very first thing that arrives in the
+ # first list.
+ HEADER=$(sed -n '1p' "${MSGFILE}" | cut -d';' -f1)
+ HEADER_REGEX=$(header_to_regex "${HEADER}")
+
+ # Remove the two headers from the received message
+ # This will return the message without any header, similarly to what
+ # was sent.
+ sed "s/""${HEADER_REGEX}""//g" "${MSGFILE}"
+}
+
+# Validate the message, which has two messages glued together.
+# unwrap them to make sure all the characters were transmitted.
+# File will look like the following:
+# 13,468,514729715,-,ncfrag=0/1135;<message>
+# key=<part of key>-13,468,514729715,-,ncfrag=967/1135;<rest of the key>
+function validate_fragmented_result() {
+ # Discard the netconsole headers, and assemble the full message
+ RCVMSG=$(extract_msg "${1}")
+
+ # check for the main message
+ if ! echo "${RCVMSG}" | grep -q "${MSG}"; then
+ echo "Message body doesn't match." >&2
+ echo "msg received=" "${RCVMSG}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ # check userdata
+ if ! echo "${RCVMSG}" | grep -q "${USERDATA_VALUE}"; then
+ echo "message userdata doesn't match" >&2
+ echo "msg received=" "${RCVMSG}" >&2
+ exit "${ksft_fail}"
+ fi
+ # test passed. hooray
+}
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+# Set userdata "key" with the "value" value
+set_user_data
+
+
+# TEST 1: Send message and userdata. They will fragment
+# =======
+MSG=$(printf -- 'MSG%.3s=' {1..150})
+
+# Listen for netconsole port inside the namespace and destination interface
+listen_port_and_save_to "${OUTPUT_FILE}" &
+# Wait for socat to start and listen to the port.
+wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+# Send the message
+echo "${MSG}: ${TARGET}" > /dev/kmsg
+# Wait until socat saves the file to disk
+busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+# Check if the message was not corrupted
+validate_fragmented_result "${OUTPUT_FILE}"
+
+# TEST 2: Test with smaller message, and without release appended
+# =======
+MSG=$(printf -- 'FOOBAR%.3s=' {1..100})
+# Let's disable release and test again.
+disable_release_append
+
+listen_port_and_save_to "${OUTPUT_FILE}" &
+wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+echo "${MSG}: ${TARGET}" > /dev/kmsg
+busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+validate_fragmented_result "${OUTPUT_FILE}"
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netcons_sysdata.sh
new file mode 100755
index 000000000000..a737e377bf08
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_sysdata.sh
@@ -0,0 +1,242 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A test that makes sure that sysdata runtime CPU data is properly set
+# when a message is sent.
+#
+# There are 3 different tests, every time sent using a random CPU.
+# - Test #1
+# * Only enable cpu_nr sysdata feature.
+# - Test #2
+# * Keep cpu_nr sysdata feature enable and enable userdata.
+# - Test #3
+# * keep userdata enabled, and disable sysdata cpu_nr feature.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+# Enable the sysdata cpu_nr feature
+function set_cpu_nr() {
+ if [[ ! -f "${NETCONS_PATH}/userdata/cpu_nr_enabled" ]]
+ then
+ echo "Populate CPU configfs path not available in ${NETCONS_PATH}/userdata/cpu_nr_enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ echo 1 > "${NETCONS_PATH}/userdata/cpu_nr_enabled"
+}
+
+# Enable the taskname to be appended to sysdata
+function set_taskname() {
+ if [[ ! -f "${NETCONS_PATH}/userdata/taskname_enabled" ]]
+ then
+ echo "Not able to enable taskname sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/taskname_enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ echo 1 > "${NETCONS_PATH}/userdata/taskname_enabled"
+}
+
+# Enable the release to be appended to sysdata
+function set_release() {
+ if [[ ! -f "${NETCONS_PATH}/userdata/release_enabled" ]]
+ then
+ echo "Not able to enable release sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/release_enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ echo 1 > "${NETCONS_PATH}/userdata/release_enabled"
+}
+
+# Disable the sysdata cpu_nr feature
+function unset_cpu_nr() {
+ echo 0 > "${NETCONS_PATH}/userdata/cpu_nr_enabled"
+}
+
+# Once called, taskname=<..> will not be appended anymore
+function unset_taskname() {
+ echo 0 > "${NETCONS_PATH}/userdata/taskname_enabled"
+}
+
+function unset_release() {
+ echo 0 > "${NETCONS_PATH}/userdata/release_enabled"
+}
+
+# Test if MSG contains sysdata
+function validate_sysdata() {
+ # OUTPUT_FILE will contain something like:
+ # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
+ # userdatakey=userdatavalue
+ # cpu=X
+ # taskname=<taskname>
+
+ # Echo is what this test uses to create the message. See runtest()
+ # function
+ SENDER="echo"
+
+ if [ ! -f "$OUTPUT_FILE" ]; then
+ echo "FAIL: File was not generated." >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then
+ echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ # Check if cpu=XX exists in the file and matches the one used
+ # in taskset(1)
+ if ! grep -q "cpu=${CPU}\+" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'cpu=${CPU}' not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "taskname=${SENDER}" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'taskname=echo' not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ rm "${OUTPUT_FILE}"
+ pkill_socat
+}
+
+function validate_release() {
+ RELEASE=$(uname -r)
+
+ if [ ! -f "$OUTPUT_FILE" ]; then
+ echo "FAIL: File was not generated." >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "release=${RELEASE}" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'release=${RELEASE}' not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+# Test if MSG content exists in OUTPUT_FILE but no `cpu=` and `taskname=`
+# strings
+function validate_no_sysdata() {
+ if [ ! -f "$OUTPUT_FILE" ]; then
+ echo "FAIL: File was not generated." >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then
+ echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if grep -q "cpu=" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'cpu= found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if grep -q "taskname=" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'taskname= found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if grep -q "release=" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'release= found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ rm "${OUTPUT_FILE}"
+}
+
+# Start socat, send the message and wait for the file to show up in the file
+# system
+function runtest {
+ # Listen for netconsole port inside the namespace and destination
+ # interface
+ listen_port_and_save_to "${OUTPUT_FILE}" &
+ # Wait for socat to start and listen to the port.
+ wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+ # Send the message
+ taskset -c "${CPU}" echo "${MSG}: ${TARGET}" > /dev/kmsg
+ # Wait until socat saves the file to disk
+ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+}
+
+# ========== #
+# Start here #
+# ========== #
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# This test also depends on taskset(1). Check for it before starting the test
+check_for_taskset
+
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+
+#====================================================
+# TEST #1
+# Send message from a random CPU
+#====================================================
+# Random CPU in the system
+CPU=$((RANDOM % $(nproc)))
+OUTPUT_FILE="/tmp/${TARGET}_1"
+MSG="Test #1 from CPU${CPU}"
+# Enable the auto population of cpu_nr
+set_cpu_nr
+# Enable taskname to be appended to sysdata
+set_taskname
+set_release
+runtest
+# Make sure the message was received in the dst part
+# and exit
+validate_release
+validate_sysdata
+
+#====================================================
+# TEST #2
+# This test now adds userdata together with sysdata
+# ===================================================
+# Get a new random CPU
+CPU=$((RANDOM % $(nproc)))
+OUTPUT_FILE="/tmp/${TARGET}_2"
+MSG="Test #2 from CPU${CPU}"
+set_user_data
+runtest
+validate_release
+validate_sysdata
+
+# ===================================================
+# TEST #3
+# Unset all sysdata, fail if any userdata is set
+# ===================================================
+CPU=$((RANDOM % $(nproc)))
+OUTPUT_FILE="/tmp/${TARGET}_3"
+MSG="Test #3 from CPU${CPU}"
+unset_cpu_nr
+unset_taskname
+unset_release
+runtest
+# At this time, cpu= shouldn't be present in the msg
+validate_no_sysdata
+
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
index 384cfa3d38a6..92c2f0376c08 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -142,7 +142,7 @@ function pre_ethtool {
}
function check_table {
- local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1
+ local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1
local -n expected=$2
local last=$3
@@ -212,7 +212,7 @@ function check_tables {
}
function print_table {
- local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1
+ local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1
read -a have < $path
tree $NSIM_DEV_DFS/
@@ -641,7 +641,7 @@ for port in 0 1; do
NSIM_NETDEV=`get_netdev_name old_netdevs`
ip link set dev $NSIM_NETDEV up
- echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
+ echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error
msg="1 - create VxLANs v6"
exp0=( 0 0 0 0 )
@@ -663,7 +663,7 @@ for port in 0 1; do
new_geneve gnv0 20000
msg="2 - destroy GENEVE"
- echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
+ echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error
exp1=( `mke 20000 2` 0 0 0 )
del_dev gnv0
@@ -764,7 +764,7 @@ for port in 0 1; do
msg="create VxLANs v4"
new_vxlan vxlan0 10000 $NSIM_NETDEV
- echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
check_tables
msg="NIC device goes down"
@@ -775,7 +775,7 @@ for port in 0 1; do
fi
check_tables
- echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
check_tables
msg="NIC device goes up again"
@@ -789,7 +789,7 @@ for port in 0 1; do
del_dev vxlan0
check_tables
- echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
check_tables
msg="destroy NIC"
@@ -896,7 +896,7 @@ msg="vacate VxLAN in overflow table"
exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
del_dev vxlan2
-echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
check_tables
msg="tunnels destroyed 2"
diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py
index eb83e7b48797..93120e86e102 100755
--- a/tools/testing/selftests/drivers/net/ping.py
+++ b/tools/testing/selftests/drivers/net/ping.py
@@ -1,49 +1,219 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+import os
+import random, string, time
from lib.py import ksft_run, ksft_exit
-from lib.py import ksft_eq
-from lib.py import NetDrvEpEnv
+from lib.py import ksft_eq, KsftSkipEx, KsftFailEx
+from lib.py import EthtoolFamily, NetDrvEpEnv
from lib.py import bkg, cmd, wait_port_listen, rand_port
+from lib.py import defer, ethtool, ip
+remote_ifname=""
+no_sleep=False
-def test_v4(cfg) -> None:
- cfg.require_v4()
+def _test_v4(cfg) -> None:
+ cfg.require_ipver("4")
- cmd(f"ping -c 1 -W0.5 {cfg.remote_v4}")
- cmd(f"ping -c 1 -W0.5 {cfg.v4}", host=cfg.remote)
+ cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"])
+ cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote)
+ cmd("ping -s 65000 -c 1 -W0.5 " + cfg.remote_addr_v["4"])
+ cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote)
+def _test_v6(cfg) -> None:
+ cfg.require_ipver("6")
-def test_v6(cfg) -> None:
- cfg.require_v6()
+ cmd("ping -c 1 -W5 " + cfg.remote_addr_v["6"])
+ cmd("ping -c 1 -W5 " + cfg.addr_v["6"], host=cfg.remote)
+ cmd("ping -s 65000 -c 1 -W0.5 " + cfg.remote_addr_v["6"])
+ cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote)
- cmd(f"ping -c 1 -W0.5 {cfg.remote_v6}")
- cmd(f"ping -c 1 -W0.5 {cfg.v6}", host=cfg.remote)
-
-
-def test_tcp(cfg) -> None:
+def _test_tcp(cfg) -> None:
cfg.require_cmd("socat", remote=True)
port = rand_port()
listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT"
+ test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536))
with bkg(listen_cmd, exit_wait=True) as nc:
wait_port_listen(port)
- cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}",
+ cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}",
shell=True, host=cfg.remote)
- ksft_eq(nc.stdout.strip(), "ping")
+ ksft_eq(nc.stdout.strip(), test_string)
+ test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536))
with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc:
wait_port_listen(port, host=cfg.remote)
- cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True)
- ksft_eq(nc.stdout.strip(), "ping")
-
+ cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True)
+ ksft_eq(nc.stdout.strip(), test_string)
+
+def _set_offload_checksum(cfg, netnl, on) -> None:
+ try:
+ ethtool(f" -K {cfg.ifname} rx {on} tx {on} ")
+ except:
+ return
+
+def _set_xdp_generic_sb_on(cfg) -> None:
+ test_dir = os.path.dirname(os.path.realpath(__file__))
+ prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True)
+ defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off")
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_generic_mb_on(cfg) -> None:
+ test_dir = os.path.dirname(os.path.realpath(__file__))
+ prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote)
+ defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote)
+ ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog))
+ defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpgeneric off")
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_native_sb_on(cfg) -> None:
+ test_dir = os.path.dirname(os.path.realpath(__file__))
+ prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True)
+ defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
+ xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0]
+ if xdp_info['xdp']['mode'] != 1:
+ """
+ If the interface doesn't support native-mode, it falls back to generic mode.
+ The mode value 1 is native and 2 is generic.
+ So it raises an exception if mode is not 1(native mode).
+ """
+ raise KsftSkipEx('device does not support native-XDP')
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_native_mb_on(cfg) -> None:
+ test_dir = os.path.dirname(os.path.realpath(__file__))
+ prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote)
+ defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote)
+ try:
+ cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True)
+ defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
+ except Exception as e:
+ raise KsftSkipEx('device does not support native-multi-buffer XDP')
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_offload_on(cfg) -> None:
+ test_dir = os.path.dirname(os.path.realpath(__file__))
+ prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True)
+ try:
+ cmd(f"ip link set dev {cfg.ifname} xdpoffload obj {prog} sec xdp", shell=True)
+ except Exception as e:
+ raise KsftSkipEx('device does not support offloaded XDP')
+ defer(ip, f"link set dev {cfg.ifname} xdpoffload off")
+ cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def get_interface_info(cfg) -> None:
+ global remote_ifname
+ global no_sleep
+
+ remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_addr_v['4']} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout
+ remote_ifname = remote_info.rstrip('\n')
+ if remote_ifname == "":
+ raise KsftFailEx('Can not get remote interface')
+ local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0]
+ if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim":
+ no_sleep=True
+ if 'linkinfo' in local_info and local_info['linkinfo']['info_kind'] == "veth":
+ no_sleep=True
+
+def set_interface_init(cfg) -> None:
+ cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True)
+ cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True)
+ cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True)
+ cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True)
+ cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+
+def test_default(cfg, netnl) -> None:
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_generic_sb(cfg, netnl) -> None:
+ _set_xdp_generic_sb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_generic_mb(cfg, netnl) -> None:
+ _set_xdp_generic_mb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_native_sb(cfg, netnl) -> None:
+ _set_xdp_native_sb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_native_mb(cfg, netnl) -> None:
+ _set_xdp_native_mb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_offload(cfg, netnl) -> None:
+ _set_xdp_offload_on(cfg)
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
def main() -> None:
with NetDrvEpEnv(__file__) as cfg:
- ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+ get_interface_info(cfg)
+ set_interface_init(cfg)
+ ksft_run([test_default,
+ test_xdp_generic_sb,
+ test_xdp_generic_mb,
+ test_xdp_native_sb,
+ test_xdp_native_mb,
+ test_xdp_offload],
+ args=(cfg, EthtoolFamily()))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py
index 38303da957ee..cae923f84f69 100755
--- a/tools/testing/selftests/drivers/net/queues.py
+++ b/tools/testing/selftests/drivers/net/queues.py
@@ -2,13 +2,15 @@
# SPDX-License-Identifier: GPL-2.0
from lib.py import ksft_disruptive, ksft_exit, ksft_run
-from lib.py import ksft_eq, ksft_raises, KsftSkipEx
+from lib.py import ksft_eq, ksft_not_in, ksft_raises, KsftSkipEx, KsftFailEx
from lib.py import EthtoolFamily, NetdevFamily, NlError
from lib.py import NetDrvEnv
-from lib.py import cmd, defer, ip
+from lib.py import bkg, cmd, defer, ip
import errno
import glob
-
+import os
+import socket
+import struct
def sys_get_queues(ifname, qtype='rx') -> int:
folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*')
@@ -22,6 +24,40 @@ def nl_get_queues(cfg, nl, qtype='rx'):
return None
+def check_xsk(cfg, nl, xdp_queue_id=0) -> None:
+ # Probe for support
+ xdp = cmd(cfg.rpath("xdp_helper") + ' - -', fail=False)
+ if xdp.ret == 255:
+ raise KsftSkipEx('AF_XDP unsupported')
+ elif xdp.ret > 0:
+ raise KsftFailEx('unable to create AF_XDP socket')
+
+ with bkg(f'{cfg.rpath("xdp_helper")} {cfg.ifindex} {xdp_queue_id}',
+ ksft_wait=3):
+
+ rx = tx = False
+
+ queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True)
+ if not queues:
+ raise KsftSkipEx("Netlink reports no queues")
+
+ for q in queues:
+ if q['id'] == 0:
+ if q['type'] == 'rx':
+ rx = True
+ if q['type'] == 'tx':
+ tx = True
+
+ ksft_eq(q.get('xsk', None), {},
+ comment="xsk attr on queue we configured")
+ else:
+ ksft_not_in('xsk', q,
+ comment="xsk attr on queue we didn't configure")
+
+ ksft_eq(rx, True)
+ ksft_eq(tx, True)
+
+
def get_queues(cfg, nl) -> None:
snl = NetdevFamily(recv_size=4096)
@@ -45,10 +81,9 @@ def addremove_queues(cfg, nl) -> None:
netnl = EthtoolFamily()
channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}})
- if channels['combined-count'] == 0:
- rx_type = 'rx'
- else:
- rx_type = 'combined'
+ rx_type = 'rx'
+ if channels.get('combined-count', 0) > 0:
+ rx_type = 'combined'
expected = curr_queues - 1
cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10)
@@ -81,7 +116,8 @@ def check_down(cfg, nl) -> None:
def main() -> None:
with NetDrvEnv(__file__, queue_count=100) as cfg:
- ksft_run([get_queues, addremove_queues, check_down], args=(cfg, NetdevFamily()))
+ ksft_run([get_queues, addremove_queues, check_down, check_xsk],
+ args=(cfg, NetdevFamily()))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/xdp_helper.c b/tools/testing/selftests/drivers/net/xdp_helper.c
new file mode 100644
index 000000000000..aeed25914104
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/xdp_helper.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <linux/if_xdp.h>
+#include <linux/if_link.h>
+#include <net/if.h>
+#include <inttypes.h>
+
+#define UMEM_SZ (1U << 16)
+#define NUM_DESC (UMEM_SZ / 2048)
+
+/* Move this to a common header when reused! */
+static void ksft_ready(void)
+{
+ const char msg[7] = "ready\n";
+ char *env_str;
+ int fd;
+
+ env_str = getenv("KSFT_READY_FD");
+ if (env_str) {
+ fd = atoi(env_str);
+ if (!fd) {
+ fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n",
+ env_str);
+ return;
+ }
+ } else {
+ fd = STDOUT_FILENO;
+ }
+
+ write(fd, msg, sizeof(msg));
+ if (fd != STDOUT_FILENO)
+ close(fd);
+}
+
+static void ksft_wait(void)
+{
+ char *env_str;
+ char byte;
+ int fd;
+
+ env_str = getenv("KSFT_WAIT_FD");
+ if (env_str) {
+ fd = atoi(env_str);
+ if (!fd) {
+ fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n",
+ env_str);
+ return;
+ }
+ } else {
+ /* Not running in KSFT env, wait for input from STDIN instead */
+ fd = STDIN_FILENO;
+ }
+
+ read(fd, &byte, sizeof(byte));
+ if (fd != STDIN_FILENO)
+ close(fd);
+}
+
+/* this is a simple helper program that creates an XDP socket and does the
+ * minimum necessary to get bind() to succeed.
+ *
+ * this test program is not intended to actually process packets, but could be
+ * extended in the future if that is actually needed.
+ *
+ * it is used by queues.py to ensure the xsk netlinux attribute is set
+ * correctly.
+ */
+int main(int argc, char **argv)
+{
+ struct xdp_umem_reg umem_reg = { 0 };
+ struct sockaddr_xdp sxdp = { 0 };
+ int num_desc = NUM_DESC;
+ void *umem_area;
+ int ifindex;
+ int sock_fd;
+ int queue;
+
+ if (argc != 3) {
+ fprintf(stderr, "Usage: %s ifindex queue_id\n", argv[0]);
+ return 1;
+ }
+
+ sock_fd = socket(AF_XDP, SOCK_RAW, 0);
+ if (sock_fd < 0) {
+ perror("socket creation failed");
+ /* if the kernel doesn't support AF_XDP, let the test program
+ * know with -1. All other error paths return 1.
+ */
+ if (errno == EAFNOSUPPORT)
+ return -1;
+ return 1;
+ }
+
+ /* "Probing mode", just checking if AF_XDP sockets are supported */
+ if (!strcmp(argv[1], "-") && !strcmp(argv[2], "-")) {
+ printf("AF_XDP support detected\n");
+ close(sock_fd);
+ return 0;
+ }
+
+ ifindex = atoi(argv[1]);
+ queue = atoi(argv[2]);
+
+ umem_area = mmap(NULL, UMEM_SZ, PROT_READ | PROT_WRITE, MAP_PRIVATE |
+ MAP_ANONYMOUS, -1, 0);
+ if (umem_area == MAP_FAILED) {
+ perror("mmap failed");
+ return 1;
+ }
+
+ umem_reg.addr = (uintptr_t)umem_area;
+ umem_reg.len = UMEM_SZ;
+ umem_reg.chunk_size = 2048;
+ umem_reg.headroom = 0;
+
+ setsockopt(sock_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg,
+ sizeof(umem_reg));
+ setsockopt(sock_fd, SOL_XDP, XDP_UMEM_FILL_RING, &num_desc,
+ sizeof(num_desc));
+ setsockopt(sock_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &num_desc,
+ sizeof(num_desc));
+ setsockopt(sock_fd, SOL_XDP, XDP_RX_RING, &num_desc, sizeof(num_desc));
+
+ sxdp.sxdp_family = AF_XDP;
+ sxdp.sxdp_ifindex = ifindex;
+ sxdp.sxdp_queue_id = queue;
+ sxdp.sxdp_flags = 0;
+
+ if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) != 0) {
+ munmap(umem_area, UMEM_SZ);
+ perror("bind failed");
+ close(sock_fd);
+ return 1;
+ }
+
+ ksft_ready();
+ ksft_wait();
+
+ /* parent program will write a byte to stdin when its ready for this
+ * helper to exit
+ */
+
+ close(sock_fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/drivers/ntsync/.gitignore b/tools/testing/selftests/drivers/ntsync/.gitignore
new file mode 100644
index 000000000000..848573a3d3ea
--- /dev/null
+++ b/tools/testing/selftests/drivers/ntsync/.gitignore
@@ -0,0 +1 @@
+ntsync
diff --git a/tools/testing/selftests/drivers/ntsync/Makefile b/tools/testing/selftests/drivers/ntsync/Makefile
new file mode 100644
index 000000000000..dbf2b055c0b2
--- /dev/null
+++ b/tools/testing/selftests/drivers/ntsync/Makefile
@@ -0,0 +1,7 @@
+# SPDX-LICENSE-IDENTIFIER: GPL-2.0-only
+TEST_GEN_PROGS := ntsync
+
+CFLAGS += $(KHDR_INCLUDES)
+LDLIBS += -lpthread
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/drivers/ntsync/config b/tools/testing/selftests/drivers/ntsync/config
new file mode 100644
index 000000000000..60539c826d06
--- /dev/null
+++ b/tools/testing/selftests/drivers/ntsync/config
@@ -0,0 +1 @@
+CONFIG_WINESYNC=y
diff --git a/tools/testing/selftests/drivers/ntsync/ntsync.c b/tools/testing/selftests/drivers/ntsync/ntsync.c
new file mode 100644
index 000000000000..3aad311574c4
--- /dev/null
+++ b/tools/testing/selftests/drivers/ntsync/ntsync.c
@@ -0,0 +1,1343 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Various unit tests for the "ntsync" synchronization primitive driver.
+ *
+ * Copyright (C) 2021-2022 Elizabeth Figura <zfigura@codeweavers.com>
+ */
+
+#define _GNU_SOURCE
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <pthread.h>
+#include <linux/ntsync.h>
+#include "../../kselftest_harness.h"
+
+static int read_sem_state(int sem, __u32 *count, __u32 *max)
+{
+ struct ntsync_sem_args args;
+ int ret;
+
+ memset(&args, 0xcc, sizeof(args));
+ ret = ioctl(sem, NTSYNC_IOC_SEM_READ, &args);
+ *count = args.count;
+ *max = args.max;
+ return ret;
+}
+
+#define check_sem_state(sem, count, max) \
+ ({ \
+ __u32 __count, __max; \
+ int ret = read_sem_state((sem), &__count, &__max); \
+ EXPECT_EQ(0, ret); \
+ EXPECT_EQ((count), __count); \
+ EXPECT_EQ((max), __max); \
+ })
+
+static int release_sem(int sem, __u32 *count)
+{
+ return ioctl(sem, NTSYNC_IOC_SEM_RELEASE, count);
+}
+
+static int read_mutex_state(int mutex, __u32 *count, __u32 *owner)
+{
+ struct ntsync_mutex_args args;
+ int ret;
+
+ memset(&args, 0xcc, sizeof(args));
+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &args);
+ *count = args.count;
+ *owner = args.owner;
+ return ret;
+}
+
+#define check_mutex_state(mutex, count, owner) \
+ ({ \
+ __u32 __count, __owner; \
+ int ret = read_mutex_state((mutex), &__count, &__owner); \
+ EXPECT_EQ(0, ret); \
+ EXPECT_EQ((count), __count); \
+ EXPECT_EQ((owner), __owner); \
+ })
+
+static int unlock_mutex(int mutex, __u32 owner, __u32 *count)
+{
+ struct ntsync_mutex_args args;
+ int ret;
+
+ args.owner = owner;
+ args.count = 0xdeadbeef;
+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_UNLOCK, &args);
+ *count = args.count;
+ return ret;
+}
+
+static int read_event_state(int event, __u32 *signaled, __u32 *manual)
+{
+ struct ntsync_event_args args;
+ int ret;
+
+ memset(&args, 0xcc, sizeof(args));
+ ret = ioctl(event, NTSYNC_IOC_EVENT_READ, &args);
+ *signaled = args.signaled;
+ *manual = args.manual;
+ return ret;
+}
+
+#define check_event_state(event, signaled, manual) \
+ ({ \
+ __u32 __signaled, __manual; \
+ int ret = read_event_state((event), &__signaled, &__manual); \
+ EXPECT_EQ(0, ret); \
+ EXPECT_EQ((signaled), __signaled); \
+ EXPECT_EQ((manual), __manual); \
+ })
+
+static int wait_objs(int fd, unsigned long request, __u32 count,
+ const int *objs, __u32 owner, int alert, __u32 *index)
+{
+ struct ntsync_wait_args args = {0};
+ struct timespec timeout;
+ int ret;
+
+ clock_gettime(CLOCK_MONOTONIC, &timeout);
+
+ args.timeout = timeout.tv_sec * 1000000000 + timeout.tv_nsec;
+ args.count = count;
+ args.objs = (uintptr_t)objs;
+ args.owner = owner;
+ args.index = 0xdeadbeef;
+ args.alert = alert;
+ ret = ioctl(fd, request, &args);
+ *index = args.index;
+ return ret;
+}
+
+static int wait_any(int fd, __u32 count, const int *objs, __u32 owner, __u32 *index)
+{
+ return wait_objs(fd, NTSYNC_IOC_WAIT_ANY, count, objs, owner, 0, index);
+}
+
+static int wait_all(int fd, __u32 count, const int *objs, __u32 owner, __u32 *index)
+{
+ return wait_objs(fd, NTSYNC_IOC_WAIT_ALL, count, objs, owner, 0, index);
+}
+
+static int wait_any_alert(int fd, __u32 count, const int *objs,
+ __u32 owner, int alert, __u32 *index)
+{
+ return wait_objs(fd, NTSYNC_IOC_WAIT_ANY,
+ count, objs, owner, alert, index);
+}
+
+static int wait_all_alert(int fd, __u32 count, const int *objs,
+ __u32 owner, int alert, __u32 *index)
+{
+ return wait_objs(fd, NTSYNC_IOC_WAIT_ALL,
+ count, objs, owner, alert, index);
+}
+
+TEST(semaphore_state)
+{
+ struct ntsync_sem_args sem_args;
+ struct timespec timeout;
+ __u32 count, index;
+ int fd, ret, sem;
+
+ clock_gettime(CLOCK_MONOTONIC, &timeout);
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ sem_args.count = 3;
+ sem_args.max = 2;
+ sem = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_EQ(-1, sem);
+ EXPECT_EQ(EINVAL, errno);
+
+ sem_args.count = 2;
+ sem_args.max = 2;
+ sem = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_LE(0, sem);
+ check_sem_state(sem, 2, 2);
+
+ count = 0;
+ ret = release_sem(sem, &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(2, count);
+ check_sem_state(sem, 2, 2);
+
+ count = 1;
+ ret = release_sem(sem, &count);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOVERFLOW, errno);
+ check_sem_state(sem, 2, 2);
+
+ ret = wait_any(fd, 1, &sem, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(sem, 1, 2);
+
+ ret = wait_any(fd, 1, &sem, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(sem, 0, 2);
+
+ ret = wait_any(fd, 1, &sem, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ count = 3;
+ ret = release_sem(sem, &count);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOVERFLOW, errno);
+ check_sem_state(sem, 0, 2);
+
+ count = 2;
+ ret = release_sem(sem, &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, count);
+ check_sem_state(sem, 2, 2);
+
+ ret = wait_any(fd, 1, &sem, 123, &index);
+ EXPECT_EQ(0, ret);
+ ret = wait_any(fd, 1, &sem, 123, &index);
+ EXPECT_EQ(0, ret);
+
+ count = 1;
+ ret = release_sem(sem, &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, count);
+ check_sem_state(sem, 1, 2);
+
+ count = ~0u;
+ ret = release_sem(sem, &count);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOVERFLOW, errno);
+ check_sem_state(sem, 1, 2);
+
+ close(sem);
+
+ close(fd);
+}
+
+TEST(mutex_state)
+{
+ struct ntsync_mutex_args mutex_args;
+ __u32 owner, count, index;
+ struct timespec timeout;
+ int fd, ret, mutex;
+
+ clock_gettime(CLOCK_MONOTONIC, &timeout);
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ mutex_args.owner = 123;
+ mutex_args.count = 0;
+ mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_EQ(-1, mutex);
+ EXPECT_EQ(EINVAL, errno);
+
+ mutex_args.owner = 0;
+ mutex_args.count = 2;
+ mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_EQ(-1, mutex);
+ EXPECT_EQ(EINVAL, errno);
+
+ mutex_args.owner = 123;
+ mutex_args.count = 2;
+ mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_LE(0, mutex);
+ check_mutex_state(mutex, 2, 123);
+
+ ret = unlock_mutex(mutex, 0, &count);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+
+ ret = unlock_mutex(mutex, 456, &count);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EPERM, errno);
+ check_mutex_state(mutex, 2, 123);
+
+ ret = unlock_mutex(mutex, 123, &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(2, count);
+ check_mutex_state(mutex, 1, 123);
+
+ ret = unlock_mutex(mutex, 123, &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, count);
+ check_mutex_state(mutex, 0, 0);
+
+ ret = unlock_mutex(mutex, 123, &count);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EPERM, errno);
+
+ ret = wait_any(fd, 1, &mutex, 456, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_mutex_state(mutex, 1, 456);
+
+ ret = wait_any(fd, 1, &mutex, 456, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_mutex_state(mutex, 2, 456);
+
+ ret = unlock_mutex(mutex, 456, &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(2, count);
+ check_mutex_state(mutex, 1, 456);
+
+ ret = wait_any(fd, 1, &mutex, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ owner = 0;
+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+
+ owner = 123;
+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EPERM, errno);
+ check_mutex_state(mutex, 1, 456);
+
+ owner = 456;
+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
+ EXPECT_EQ(0, ret);
+
+ memset(&mutex_args, 0xcc, sizeof(mutex_args));
+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOWNERDEAD, errno);
+ EXPECT_EQ(0, mutex_args.count);
+ EXPECT_EQ(0, mutex_args.owner);
+
+ memset(&mutex_args, 0xcc, sizeof(mutex_args));
+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOWNERDEAD, errno);
+ EXPECT_EQ(0, mutex_args.count);
+ EXPECT_EQ(0, mutex_args.owner);
+
+ ret = wait_any(fd, 1, &mutex, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOWNERDEAD, errno);
+ EXPECT_EQ(0, index);
+ check_mutex_state(mutex, 1, 123);
+
+ owner = 123;
+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
+ EXPECT_EQ(0, ret);
+
+ memset(&mutex_args, 0xcc, sizeof(mutex_args));
+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOWNERDEAD, errno);
+ EXPECT_EQ(0, mutex_args.count);
+ EXPECT_EQ(0, mutex_args.owner);
+
+ ret = wait_any(fd, 1, &mutex, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOWNERDEAD, errno);
+ EXPECT_EQ(0, index);
+ check_mutex_state(mutex, 1, 123);
+
+ close(mutex);
+
+ mutex_args.owner = 0;
+ mutex_args.count = 0;
+ mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_LE(0, mutex);
+ check_mutex_state(mutex, 0, 0);
+
+ ret = wait_any(fd, 1, &mutex, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_mutex_state(mutex, 1, 123);
+
+ close(mutex);
+
+ mutex_args.owner = 123;
+ mutex_args.count = ~0u;
+ mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_LE(0, mutex);
+ check_mutex_state(mutex, ~0u, 123);
+
+ ret = wait_any(fd, 1, &mutex, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ close(mutex);
+
+ close(fd);
+}
+
+TEST(manual_event_state)
+{
+ struct ntsync_event_args event_args;
+ __u32 index, signaled;
+ int fd, event, ret;
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ event_args.manual = 1;
+ event_args.signaled = 0;
+ event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, event);
+ check_event_state(event, 0, 1);
+
+ signaled = 0xdeadbeef;
+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+ check_event_state(event, 1, 1);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, signaled);
+ check_event_state(event, 1, 1);
+
+ ret = wait_any(fd, 1, &event, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_event_state(event, 1, 1);
+
+ signaled = 0xdeadbeef;
+ ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, signaled);
+ check_event_state(event, 0, 1);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+ check_event_state(event, 0, 1);
+
+ ret = wait_any(fd, 1, &event, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, signaled);
+ check_event_state(event, 0, 1);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+ check_event_state(event, 0, 1);
+
+ close(event);
+
+ close(fd);
+}
+
+TEST(auto_event_state)
+{
+ struct ntsync_event_args event_args;
+ __u32 index, signaled;
+ int fd, event, ret;
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ event_args.manual = 0;
+ event_args.signaled = 1;
+ event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, event);
+
+ check_event_state(event, 1, 0);
+
+ signaled = 0xdeadbeef;
+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, signaled);
+ check_event_state(event, 1, 0);
+
+ ret = wait_any(fd, 1, &event, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_event_state(event, 0, 0);
+
+ signaled = 0xdeadbeef;
+ ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+ check_event_state(event, 0, 0);
+
+ ret = wait_any(fd, 1, &event, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, signaled);
+ check_event_state(event, 0, 0);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+ check_event_state(event, 0, 0);
+
+ close(event);
+
+ close(fd);
+}
+
+TEST(test_wait_any)
+{
+ int objs[NTSYNC_MAX_WAIT_COUNT + 1], fd, ret;
+ struct ntsync_mutex_args mutex_args = {0};
+ struct ntsync_sem_args sem_args = {0};
+ __u32 owner, index, count, i;
+ struct timespec timeout;
+
+ clock_gettime(CLOCK_MONOTONIC, &timeout);
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ sem_args.count = 2;
+ sem_args.max = 3;
+ objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_LE(0, objs[0]);
+
+ mutex_args.owner = 0;
+ mutex_args.count = 0;
+ objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_LE(0, objs[1]);
+
+ ret = wait_any(fd, 2, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(objs[0], 1, 3);
+ check_mutex_state(objs[1], 0, 0);
+
+ ret = wait_any(fd, 2, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(objs[0], 0, 3);
+ check_mutex_state(objs[1], 0, 0);
+
+ ret = wait_any(fd, 2, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, index);
+ check_sem_state(objs[0], 0, 3);
+ check_mutex_state(objs[1], 1, 123);
+
+ count = 1;
+ ret = release_sem(objs[0], &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, count);
+
+ ret = wait_any(fd, 2, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(objs[0], 0, 3);
+ check_mutex_state(objs[1], 1, 123);
+
+ ret = wait_any(fd, 2, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, index);
+ check_sem_state(objs[0], 0, 3);
+ check_mutex_state(objs[1], 2, 123);
+
+ ret = wait_any(fd, 2, objs, 456, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ owner = 123;
+ ret = ioctl(objs[1], NTSYNC_IOC_MUTEX_KILL, &owner);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_any(fd, 2, objs, 456, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOWNERDEAD, errno);
+ EXPECT_EQ(1, index);
+
+ ret = wait_any(fd, 2, objs, 456, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, index);
+
+ close(objs[1]);
+
+ /* test waiting on the same object twice */
+
+ count = 2;
+ ret = release_sem(objs[0], &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, count);
+
+ objs[1] = objs[0];
+ ret = wait_any(fd, 2, objs, 456, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(objs[0], 1, 3);
+
+ ret = wait_any(fd, 0, NULL, 456, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ for (i = 1; i < NTSYNC_MAX_WAIT_COUNT + 1; ++i)
+ objs[i] = objs[0];
+
+ ret = wait_any(fd, NTSYNC_MAX_WAIT_COUNT, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+
+ ret = wait_any(fd, NTSYNC_MAX_WAIT_COUNT + 1, objs, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+
+ ret = wait_any(fd, -1, objs, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+
+ close(objs[0]);
+
+ close(fd);
+}
+
+TEST(test_wait_all)
+{
+ struct ntsync_event_args event_args = {0};
+ struct ntsync_mutex_args mutex_args = {0};
+ struct ntsync_sem_args sem_args = {0};
+ __u32 owner, index, count;
+ int objs[2], fd, ret;
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ sem_args.count = 2;
+ sem_args.max = 3;
+ objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_LE(0, objs[0]);
+
+ mutex_args.owner = 0;
+ mutex_args.count = 0;
+ objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_LE(0, objs[1]);
+
+ ret = wait_all(fd, 2, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(objs[0], 1, 3);
+ check_mutex_state(objs[1], 1, 123);
+
+ ret = wait_all(fd, 2, objs, 456, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+ check_sem_state(objs[0], 1, 3);
+ check_mutex_state(objs[1], 1, 123);
+
+ ret = wait_all(fd, 2, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(objs[0], 0, 3);
+ check_mutex_state(objs[1], 2, 123);
+
+ ret = wait_all(fd, 2, objs, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+ check_sem_state(objs[0], 0, 3);
+ check_mutex_state(objs[1], 2, 123);
+
+ count = 3;
+ ret = release_sem(objs[0], &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, count);
+
+ ret = wait_all(fd, 2, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(objs[0], 2, 3);
+ check_mutex_state(objs[1], 3, 123);
+
+ owner = 123;
+ ret = ioctl(objs[1], NTSYNC_IOC_MUTEX_KILL, &owner);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_all(fd, 2, objs, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOWNERDEAD, errno);
+ check_sem_state(objs[0], 1, 3);
+ check_mutex_state(objs[1], 1, 123);
+
+ close(objs[1]);
+
+ event_args.manual = true;
+ event_args.signaled = true;
+ objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, objs[1]);
+
+ ret = wait_all(fd, 2, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(objs[0], 0, 3);
+ check_event_state(objs[1], 1, 1);
+
+ close(objs[1]);
+
+ /* test waiting on the same object twice */
+ objs[1] = objs[0];
+ ret = wait_all(fd, 2, objs, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+
+ close(objs[0]);
+
+ close(fd);
+}
+
+struct wake_args {
+ int fd;
+ int obj;
+};
+
+struct wait_args {
+ int fd;
+ unsigned long request;
+ struct ntsync_wait_args *args;
+ int ret;
+ int err;
+};
+
+static void *wait_thread(void *arg)
+{
+ struct wait_args *args = arg;
+
+ args->ret = ioctl(args->fd, args->request, args->args);
+ args->err = errno;
+ return NULL;
+}
+
+static __u64 get_abs_timeout(unsigned int ms)
+{
+ struct timespec timeout;
+ clock_gettime(CLOCK_MONOTONIC, &timeout);
+ return (timeout.tv_sec * 1000000000) + timeout.tv_nsec + (ms * 1000000);
+}
+
+static int wait_for_thread(pthread_t thread, unsigned int ms)
+{
+ struct timespec timeout;
+
+ clock_gettime(CLOCK_REALTIME, &timeout);
+ timeout.tv_nsec += ms * 1000000;
+ timeout.tv_sec += (timeout.tv_nsec / 1000000000);
+ timeout.tv_nsec %= 1000000000;
+ return pthread_timedjoin_np(thread, NULL, &timeout);
+}
+
+TEST(wake_any)
+{
+ struct ntsync_event_args event_args = {0};
+ struct ntsync_mutex_args mutex_args = {0};
+ struct ntsync_wait_args wait_args = {0};
+ struct ntsync_sem_args sem_args = {0};
+ struct wait_args thread_args;
+ __u32 count, index, signaled;
+ int objs[2], fd, ret;
+ pthread_t thread;
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ sem_args.count = 0;
+ sem_args.max = 3;
+ objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_LE(0, objs[0]);
+
+ mutex_args.owner = 123;
+ mutex_args.count = 1;
+ objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_LE(0, objs[1]);
+
+ /* test waking the semaphore */
+
+ wait_args.timeout = get_abs_timeout(1000);
+ wait_args.objs = (uintptr_t)objs;
+ wait_args.count = 2;
+ wait_args.owner = 456;
+ wait_args.index = 0xdeadbeef;
+ thread_args.fd = fd;
+ thread_args.args = &wait_args;
+ thread_args.request = NTSYNC_IOC_WAIT_ANY;
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ count = 1;
+ ret = release_sem(objs[0], &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, count);
+ check_sem_state(objs[0], 0, 3);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, thread_args.ret);
+ EXPECT_EQ(0, wait_args.index);
+
+ /* test waking the mutex */
+
+ /* first grab it again for owner 123 */
+ ret = wait_any(fd, 1, &objs[1], 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+
+ wait_args.timeout = get_abs_timeout(1000);
+ wait_args.owner = 456;
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ ret = unlock_mutex(objs[1], 123, &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(2, count);
+
+ ret = pthread_tryjoin_np(thread, NULL);
+ EXPECT_EQ(EBUSY, ret);
+
+ ret = unlock_mutex(objs[1], 123, &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, mutex_args.count);
+ check_mutex_state(objs[1], 1, 456);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, thread_args.ret);
+ EXPECT_EQ(1, wait_args.index);
+
+ close(objs[1]);
+
+ /* test waking events */
+
+ event_args.manual = false;
+ event_args.signaled = false;
+ objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, objs[1]);
+
+ wait_args.timeout = get_abs_timeout(1000);
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ ret = ioctl(objs[1], NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+ check_event_state(objs[1], 0, 0);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, thread_args.ret);
+ EXPECT_EQ(1, wait_args.index);
+
+ wait_args.timeout = get_abs_timeout(1000);
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ ret = ioctl(objs[1], NTSYNC_IOC_EVENT_PULSE, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+ check_event_state(objs[1], 0, 0);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, thread_args.ret);
+ EXPECT_EQ(1, wait_args.index);
+
+ close(objs[1]);
+
+ event_args.manual = true;
+ event_args.signaled = false;
+ objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, objs[1]);
+
+ wait_args.timeout = get_abs_timeout(1000);
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ ret = ioctl(objs[1], NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+ check_event_state(objs[1], 1, 1);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, thread_args.ret);
+ EXPECT_EQ(1, wait_args.index);
+
+ ret = ioctl(objs[1], NTSYNC_IOC_EVENT_RESET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, signaled);
+
+ wait_args.timeout = get_abs_timeout(1000);
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ ret = ioctl(objs[1], NTSYNC_IOC_EVENT_PULSE, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+ check_event_state(objs[1], 0, 1);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, thread_args.ret);
+ EXPECT_EQ(1, wait_args.index);
+
+ /* delete an object while it's being waited on */
+
+ wait_args.timeout = get_abs_timeout(200);
+ wait_args.owner = 123;
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ close(objs[0]);
+ close(objs[1]);
+
+ ret = wait_for_thread(thread, 200);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(-1, thread_args.ret);
+ EXPECT_EQ(ETIMEDOUT, thread_args.err);
+
+ close(fd);
+}
+
+TEST(wake_all)
+{
+ struct ntsync_event_args manual_event_args = {0};
+ struct ntsync_event_args auto_event_args = {0};
+ struct ntsync_mutex_args mutex_args = {0};
+ struct ntsync_wait_args wait_args = {0};
+ struct ntsync_sem_args sem_args = {0};
+ struct wait_args thread_args;
+ __u32 count, index, signaled;
+ int objs[4], fd, ret;
+ pthread_t thread;
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ sem_args.count = 0;
+ sem_args.max = 3;
+ objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_LE(0, objs[0]);
+
+ mutex_args.owner = 123;
+ mutex_args.count = 1;
+ objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_LE(0, objs[1]);
+
+ manual_event_args.manual = true;
+ manual_event_args.signaled = true;
+ objs[2] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &manual_event_args);
+ EXPECT_LE(0, objs[2]);
+
+ auto_event_args.manual = false;
+ auto_event_args.signaled = true;
+ objs[3] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &auto_event_args);
+ EXPECT_EQ(0, objs[3]);
+
+ wait_args.timeout = get_abs_timeout(1000);
+ wait_args.objs = (uintptr_t)objs;
+ wait_args.count = 4;
+ wait_args.owner = 456;
+ thread_args.fd = fd;
+ thread_args.args = &wait_args;
+ thread_args.request = NTSYNC_IOC_WAIT_ALL;
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ count = 1;
+ ret = release_sem(objs[0], &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, count);
+
+ ret = pthread_tryjoin_np(thread, NULL);
+ EXPECT_EQ(EBUSY, ret);
+
+ check_sem_state(objs[0], 1, 3);
+
+ ret = wait_any(fd, 1, &objs[0], 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+
+ ret = unlock_mutex(objs[1], 123, &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, count);
+
+ ret = pthread_tryjoin_np(thread, NULL);
+ EXPECT_EQ(EBUSY, ret);
+
+ check_mutex_state(objs[1], 0, 0);
+
+ ret = ioctl(objs[2], NTSYNC_IOC_EVENT_RESET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, signaled);
+
+ count = 2;
+ ret = release_sem(objs[0], &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, count);
+ check_sem_state(objs[0], 2, 3);
+
+ ret = ioctl(objs[3], NTSYNC_IOC_EVENT_RESET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, signaled);
+
+ ret = ioctl(objs[2], NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+
+ ret = ioctl(objs[3], NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+
+ check_sem_state(objs[0], 1, 3);
+ check_mutex_state(objs[1], 1, 456);
+ check_event_state(objs[2], 1, 1);
+ check_event_state(objs[3], 0, 0);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, thread_args.ret);
+
+ /* delete an object while it's being waited on */
+
+ wait_args.timeout = get_abs_timeout(200);
+ wait_args.owner = 123;
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ close(objs[0]);
+ close(objs[1]);
+ close(objs[2]);
+ close(objs[3]);
+
+ ret = wait_for_thread(thread, 200);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(-1, thread_args.ret);
+ EXPECT_EQ(ETIMEDOUT, thread_args.err);
+
+ close(fd);
+}
+
+TEST(alert_any)
+{
+ struct ntsync_event_args event_args = {0};
+ struct ntsync_wait_args wait_args = {0};
+ struct ntsync_sem_args sem_args = {0};
+ __u32 index, count, signaled;
+ struct wait_args thread_args;
+ int objs[2], event, fd, ret;
+ pthread_t thread;
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ sem_args.count = 0;
+ sem_args.max = 2;
+ objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_LE(0, objs[0]);
+
+ sem_args.count = 1;
+ sem_args.max = 2;
+ objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_LE(0, objs[1]);
+
+ event_args.manual = true;
+ event_args.signaled = true;
+ event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, event);
+
+ ret = wait_any_alert(fd, 0, NULL, 123, event, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_any_alert(fd, 0, NULL, 123, event, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, index);
+
+ ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(2, index);
+
+ /* test wakeup via alert */
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+ EXPECT_EQ(0, ret);
+
+ wait_args.timeout = get_abs_timeout(1000);
+ wait_args.objs = (uintptr_t)objs;
+ wait_args.count = 2;
+ wait_args.owner = 123;
+ wait_args.index = 0xdeadbeef;
+ wait_args.alert = event;
+ thread_args.fd = fd;
+ thread_args.args = &wait_args;
+ thread_args.request = NTSYNC_IOC_WAIT_ANY;
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, thread_args.ret);
+ EXPECT_EQ(2, wait_args.index);
+
+ close(event);
+
+ /* test with an auto-reset event */
+
+ event_args.manual = false;
+ event_args.signaled = true;
+ event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, event);
+
+ count = 1;
+ ret = release_sem(objs[0], &count);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+
+ ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(2, index);
+
+ ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ close(event);
+
+ close(objs[0]);
+ close(objs[1]);
+
+ close(fd);
+}
+
+TEST(alert_all)
+{
+ struct ntsync_event_args event_args = {0};
+ struct ntsync_wait_args wait_args = {0};
+ struct ntsync_sem_args sem_args = {0};
+ struct wait_args thread_args;
+ __u32 index, count, signaled;
+ int objs[2], event, fd, ret;
+ pthread_t thread;
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ sem_args.count = 2;
+ sem_args.max = 2;
+ objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_LE(0, objs[0]);
+
+ sem_args.count = 1;
+ sem_args.max = 2;
+ objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_LE(0, objs[1]);
+
+ event_args.manual = true;
+ event_args.signaled = true;
+ event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, event);
+
+ ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+
+ ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(2, index);
+
+ /* test wakeup via alert */
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+ EXPECT_EQ(0, ret);
+
+ wait_args.timeout = get_abs_timeout(1000);
+ wait_args.objs = (uintptr_t)objs;
+ wait_args.count = 2;
+ wait_args.owner = 123;
+ wait_args.index = 0xdeadbeef;
+ wait_args.alert = event;
+ thread_args.fd = fd;
+ thread_args.args = &wait_args;
+ thread_args.request = NTSYNC_IOC_WAIT_ALL;
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, thread_args.ret);
+ EXPECT_EQ(2, wait_args.index);
+
+ close(event);
+
+ /* test with an auto-reset event */
+
+ event_args.manual = false;
+ event_args.signaled = true;
+ event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, event);
+
+ count = 2;
+ ret = release_sem(objs[1], &count);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+
+ ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(2, index);
+
+ ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ close(event);
+
+ close(objs[0]);
+ close(objs[1]);
+
+ close(fd);
+}
+
+#define STRESS_LOOPS 10000
+#define STRESS_THREADS 4
+
+static unsigned int stress_counter;
+static int stress_device, stress_start_event, stress_mutex;
+
+static void *stress_thread(void *arg)
+{
+ struct ntsync_wait_args wait_args = {0};
+ __u32 index, count, i;
+ int ret;
+
+ wait_args.timeout = UINT64_MAX;
+ wait_args.count = 1;
+ wait_args.objs = (uintptr_t)&stress_start_event;
+ wait_args.owner = gettid();
+ wait_args.index = 0xdeadbeef;
+
+ ioctl(stress_device, NTSYNC_IOC_WAIT_ANY, &wait_args);
+
+ wait_args.objs = (uintptr_t)&stress_mutex;
+
+ for (i = 0; i < STRESS_LOOPS; ++i) {
+ ioctl(stress_device, NTSYNC_IOC_WAIT_ANY, &wait_args);
+
+ ++stress_counter;
+
+ unlock_mutex(stress_mutex, wait_args.owner, &count);
+ }
+
+ return NULL;
+}
+
+TEST(stress_wait)
+{
+ struct ntsync_event_args event_args;
+ struct ntsync_mutex_args mutex_args;
+ pthread_t threads[STRESS_THREADS];
+ __u32 signaled, i;
+ int ret;
+
+ stress_device = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, stress_device);
+
+ mutex_args.owner = 0;
+ mutex_args.count = 0;
+ stress_mutex = ioctl(stress_device, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_LE(0, stress_mutex);
+
+ event_args.manual = 1;
+ event_args.signaled = 0;
+ stress_start_event = ioctl(stress_device, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, stress_start_event);
+
+ for (i = 0; i < STRESS_THREADS; ++i)
+ pthread_create(&threads[i], NULL, stress_thread, NULL);
+
+ ret = ioctl(stress_start_event, NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+
+ for (i = 0; i < STRESS_THREADS; ++i) {
+ ret = pthread_join(threads[i], NULL);
+ EXPECT_EQ(0, ret);
+ }
+
+ EXPECT_EQ(STRESS_LOOPS * STRESS_THREADS, stress_counter);
+
+ close(stress_start_event);
+ close(stress_mutex);
+ close(stress_device);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh
index d374878cc0ba..c62544b966ae 100755
--- a/tools/testing/selftests/efivarfs/efivarfs.sh
+++ b/tools/testing/selftests/efivarfs/efivarfs.sh
@@ -76,11 +76,11 @@ test_create_empty()
: > $file
- if [ ! -e $file ]; then
- echo "$file can not be created without writing" >&2
+ if [ -e $file ]; then
+ echo "$file can be created without writing" >&2
+ file_cleanup $file
exit 1
fi
- file_cleanup $file
}
test_create_read()
@@ -89,10 +89,13 @@ test_create_read()
./create-read $file
if [ $? -ne 0 ]; then
echo "create and read $file failed"
+ exit 1
+ fi
+ if [ -e $file ]; then
+ echo "file still exists and should not"
file_cleanup $file
exit 1
fi
- file_cleanup $file
}
test_delete()
@@ -202,6 +205,158 @@ test_invalid_filenames()
exit $ret
}
+test_no_set_size()
+{
+ local attrs='\x07\x00\x00\x00'
+ local file=$efivarfs_mount/$FUNCNAME-$test_guid
+ local ret=0
+
+ printf "$attrs\x00" > $file
+ [ -e $file -a -s $file ] || exit 1
+ chattr -i $file
+ : > $file
+ if [ $? != 0 ]; then
+ echo "variable file failed to accept truncation"
+ ret=1
+ elif [ -e $file -a ! -s $file ]; then
+ echo "file can be truncated to zero size"
+ ret=1
+ fi
+ rm $file || exit 1
+
+ exit $ret
+}
+
+setup_test_multiple()
+{
+ ##
+ # we're going to do multi-threaded tests, so create a set of
+ # pipes for synchronization. We use pipes 1..3 to start the
+ # stalled shell job and pipes 4..6 as indicators that the job
+ # has started. If you need more than 3 jobs the two +3's below
+ # need increasing
+ ##
+
+ declare -ag p
+
+ # empty is because arrays number from 0 but jobs number from 1
+ p[0]=""
+
+ for f in 1 2 3 4 5 6; do
+ p[$f]=/tmp/efivarfs_pipe${f}
+ mknod ${p[$f]} p
+ done
+
+ declare -g var=$efivarfs_mount/test_multiple-$test_guid
+
+ cleanup() {
+ for f in ${p[@]}; do
+ rm -f ${f}
+ done
+ if [ -e $var ]; then
+ file_cleanup $var
+ fi
+ }
+ trap cleanup exit
+
+ waitstart() {
+ cat ${p[$[$1+3]]} > /dev/null
+ }
+
+ waitpipe() {
+ echo 1 > ${p[$[$1+3]]}
+ cat ${p[$1]} > /dev/null
+ }
+
+ endjob() {
+ echo 1 > ${p[$1]}
+ wait -n %$1
+ }
+}
+
+test_multiple_zero_size()
+{
+ ##
+ # check for remove on last close, set up three threads all
+ # holding the variable (one write and two reads) and then
+ # close them sequentially (waiting for completion) and check
+ # the state of the variable
+ ##
+
+ { waitpipe 1; echo 1; } > $var 2> /dev/null &
+ waitstart 1
+ # zero length file should exist
+ [ -e $var ] || exit 1
+ # second and third delayed close
+ { waitpipe 2; } < $var &
+ waitstart 2
+ { waitpipe 3; } < $var &
+ waitstart 3
+ # close first fd
+ endjob 1
+ # var should only be deleted on last close
+ [ -e $var ] || exit 1
+ # close second fd
+ endjob 2
+ [ -e $var ] || exit 1
+ # file should go on last close
+ endjob 3
+ [ ! -e $var ] || exit 1
+}
+
+test_multiple_create()
+{
+ ##
+ # set multiple threads to access the variable but delay
+ # the final write to check the close of 2 and 3. The
+ # final write should succeed in creating the variable
+ ##
+ { waitpipe 1; printf '\x07\x00\x00\x00\x54'; } > $var &
+ waitstart 1
+ [ -e $var -a ! -s $var ] || exit 1
+ { waitpipe 2; } < $var &
+ waitstart 2
+ { waitpipe 3; } < $var &
+ waitstart 3
+ # close second and third fds
+ endjob 2
+ # var should only be created (have size) on last close
+ [ -e $var -a ! -s $var ] || exit 1
+ endjob 3
+ [ -e $var -a ! -s $var ] || exit 1
+ # close first fd
+ endjob 1
+ # variable should still exist
+ [ -s $var ] || exit 1
+ file_cleanup $var
+}
+
+test_multiple_delete_on_write() {
+ ##
+ # delete the variable on final write; seqencing similar
+ # to test_multiple_create()
+ ##
+ printf '\x07\x00\x00\x00\x54' > $var
+ chattr -i $var
+ { waitpipe 1; printf '\x07\x00\x00\x00'; } > $var &
+ waitstart 1
+ [ -e $var -a -s $var ] || exit 1
+ { waitpipe 2; } < $var &
+ waitstart 2
+ { waitpipe 3; } < $var &
+ waitstart 3
+ # close first fd; write should set variable size to zero
+ endjob 1
+ # var should only be deleted on last close
+ [ -e $var -a ! -s $var ] || exit 1
+ endjob 2
+ [ -e $var ] || exit 1
+ # close last fd
+ endjob 3
+ # variable should now be removed
+ [ ! -e $var ] || exit 1
+}
+
check_prereqs
rc=0
@@ -214,5 +369,10 @@ run_test test_zero_size_delete
run_test test_open_unlink
run_test test_valid_filenames
run_test test_invalid_filenames
+run_test test_no_set_size
+setup_test_multiple
+run_test test_multiple_zero_size
+run_test test_multiple_create
+run_test test_multiple_delete_on_write
exit $rc
diff --git a/tools/testing/selftests/exec/check-exec.c b/tools/testing/selftests/exec/check-exec.c
index 4d3f4525e1e1..55bce47e56b7 100644
--- a/tools/testing/selftests/exec/check-exec.c
+++ b/tools/testing/selftests/exec/check-exec.c
@@ -22,6 +22,7 @@
#include <sys/prctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
+#include <sys/syscall.h>
#include <sys/sysmacros.h>
#include <unistd.h>
@@ -31,6 +32,12 @@
#include "../kselftest_harness.h"
+static int sys_execveat(int dirfd, const char *pathname, char *const argv[],
+ char *const envp[], int flags)
+{
+ return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
+}
+
static void drop_privileges(struct __test_metadata *const _metadata)
{
const unsigned int noroot = SECBIT_NOROOT | SECBIT_NOROOT_LOCKED;
@@ -219,8 +226,8 @@ static void test_exec_fd(struct __test_metadata *_metadata, const int fd,
* test framework as an error. With AT_EXECVE_CHECK, we only check a
* potential successful execution.
*/
- access_ret =
- execveat(fd, "", argv, NULL, AT_EMPTY_PATH | AT_EXECVE_CHECK);
+ access_ret = sys_execveat(fd, "", argv, NULL,
+ AT_EMPTY_PATH | AT_EXECVE_CHECK);
access_errno = errno;
if (err_code) {
EXPECT_EQ(-1, access_ret);
diff --git a/tools/testing/selftests/filesystems/mount-notify/.gitignore b/tools/testing/selftests/filesystems/mount-notify/.gitignore
new file mode 100644
index 000000000000..82a4846cbc4b
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/*_test
diff --git a/tools/testing/selftests/filesystems/mount-notify/Makefile b/tools/testing/selftests/filesystems/mount-notify/Makefile
new file mode 100644
index 000000000000..10be0227b5ae
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
+TEST_GEN_PROGS := mount-notify_test
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
new file mode 100644
index 000000000000..4a2d5c454fd1
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
@@ -0,0 +1,516 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <linux/fanotify.h>
+#include <unistd.h>
+#include <sys/fanotify.h>
+#include <sys/syscall.h>
+
+#include "../../kselftest_harness.h"
+#include "../statmount/statmount.h"
+
+#ifndef FAN_MNT_ATTACH
+struct fanotify_event_info_mnt {
+ struct fanotify_event_info_header hdr;
+ __u64 mnt_id;
+};
+#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */
+#endif
+
+#ifndef FAN_MNT_DETACH
+#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */
+#endif
+
+#ifndef FAN_REPORT_MNT
+#define FAN_REPORT_MNT 0x00004000 /* Report mount events */
+#endif
+
+#ifndef FAN_MARK_MNTNS
+#define FAN_MARK_MNTNS 0x00000110
+#endif
+
+static uint64_t get_mnt_id(struct __test_metadata *const _metadata,
+ const char *path)
+{
+ struct statx sx;
+
+ ASSERT_EQ(statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx), 0);
+ ASSERT_TRUE(!!(sx.stx_mask & STATX_MNT_ID_UNIQUE));
+ return sx.stx_mnt_id;
+}
+
+static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
+
+FIXTURE(fanotify) {
+ int fan_fd;
+ char buf[256];
+ unsigned int rem;
+ void *next;
+ char root_mntpoint[sizeof(root_mntpoint_templ)];
+ int orig_root;
+ int ns_fd;
+ uint64_t root_id;
+};
+
+FIXTURE_SETUP(fanotify)
+{
+ int ret;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(self->ns_fd, 0);
+
+ ASSERT_EQ(mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL), 0);
+
+ strcpy(self->root_mntpoint, root_mntpoint_templ);
+ ASSERT_NE(mkdtemp(self->root_mntpoint), NULL);
+
+ self->orig_root = open("/", O_PATH | O_CLOEXEC);
+ ASSERT_GE(self->orig_root, 0);
+
+ ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0);
+
+ ASSERT_EQ(chroot(self->root_mntpoint), 0);
+
+ ASSERT_EQ(chdir("/"), 0);
+
+ ASSERT_EQ(mkdir("a", 0700), 0);
+
+ ASSERT_EQ(mkdir("b", 0700), 0);
+
+ self->root_id = get_mnt_id(_metadata, "/");
+ ASSERT_NE(self->root_id, 0);
+
+ self->fan_fd = fanotify_init(FAN_REPORT_MNT, 0);
+ ASSERT_GE(self->fan_fd, 0);
+
+ ret = fanotify_mark(self->fan_fd, FAN_MARK_ADD | FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH, self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+
+ self->rem = 0;
+}
+
+FIXTURE_TEARDOWN(fanotify)
+{
+ ASSERT_EQ(self->rem, 0);
+ close(self->fan_fd);
+
+ ASSERT_EQ(fchdir(self->orig_root), 0);
+
+ ASSERT_EQ(chroot("."), 0);
+
+ EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0);
+ EXPECT_EQ(chdir(self->root_mntpoint), 0);
+ EXPECT_EQ(chdir("/"), 0);
+ EXPECT_EQ(rmdir(self->root_mntpoint), 0);
+}
+
+static uint64_t expect_notify(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t *mask)
+{
+ struct fanotify_event_metadata *meta;
+ struct fanotify_event_info_mnt *mnt;
+ unsigned int thislen;
+
+ if (!self->rem) {
+ ssize_t len = read(self->fan_fd, self->buf, sizeof(self->buf));
+ ASSERT_GT(len, 0);
+
+ self->rem = len;
+ self->next = (void *) self->buf;
+ }
+
+ meta = self->next;
+ ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem));
+
+ thislen = meta->event_len;
+ self->rem -= thislen;
+ self->next += thislen;
+
+ *mask = meta->mask;
+ thislen -= sizeof(*meta);
+
+ mnt = ((void *) meta) + meta->event_len - thislen;
+
+ ASSERT_EQ(thislen, sizeof(*mnt));
+
+ return mnt->mnt_id;
+}
+
+static void expect_notify_n(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ unsigned int n, uint64_t mask[], uint64_t mnts[])
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ mnts[i] = expect_notify(_metadata, self, &mask[i]);
+}
+
+static uint64_t expect_notify_mask(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t expect_mask)
+{
+ uint64_t mntid, mask;
+
+ mntid = expect_notify(_metadata, self, &mask);
+ ASSERT_EQ(expect_mask, mask);
+
+ return mntid;
+}
+
+
+static void expect_notify_mask_n(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t mask, unsigned int n, uint64_t mnts[])
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ mnts[i] = expect_notify_mask(_metadata, self, mask);
+}
+
+static void verify_mount_ids(struct __test_metadata *const _metadata,
+ const uint64_t list1[], const uint64_t list2[],
+ size_t num)
+{
+ unsigned int i, j;
+
+ // Check that neither list has any duplicates
+ for (i = 0; i < num; i++) {
+ for (j = 0; j < num; j++) {
+ if (i != j) {
+ ASSERT_NE(list1[i], list1[j]);
+ ASSERT_NE(list2[i], list2[j]);
+ }
+ }
+ }
+ // Check that all list1 memebers can be found in list2. Together with
+ // the above it means that the list1 and list2 represent the same sets.
+ for (i = 0; i < num; i++) {
+ for (j = 0; j < num; j++) {
+ if (list1[i] == list2[j])
+ break;
+ }
+ ASSERT_NE(j, num);
+ }
+}
+
+static void check_mounted(struct __test_metadata *const _metadata,
+ const uint64_t mnts[], size_t num)
+{
+ ssize_t ret;
+ uint64_t *list;
+
+ list = malloc((num + 1) * sizeof(list[0]));
+ ASSERT_NE(list, NULL);
+
+ ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0);
+ ASSERT_EQ(ret, num);
+
+ verify_mount_ids(_metadata, mnts, list, num);
+
+ free(list);
+}
+
+static void setup_mount_tree(struct __test_metadata *const _metadata,
+ int log2_num)
+{
+ int ret, i;
+
+ ret = mount("", "/", NULL, MS_SHARED, NULL);
+ ASSERT_EQ(ret, 0);
+
+ for (i = 0; i < log2_num; i++) {
+ ret = mount("/", "/", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+TEST_F(fanotify, bind)
+{
+ int ret;
+ uint64_t mnts[2] = { self->root_id };
+
+ ret = mount("/", "/", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ // Cleanup
+ uint64_t detach_id;
+ ret = umount("/");
+ ASSERT_EQ(ret, 0);
+
+ detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
+ ASSERT_EQ(detach_id, mnts[1]);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, move)
+{
+ int ret;
+ uint64_t mnts[2] = { self->root_id };
+ uint64_t move_id;
+
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0);
+ ASSERT_EQ(ret, 0);
+
+ move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH);
+ ASSERT_EQ(move_id, mnts[1]);
+
+ // Cleanup
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, propagate)
+{
+ const unsigned int log2_num = 4;
+ const unsigned int num = (1 << log2_num);
+ uint64_t mnts[num];
+
+ setup_mount_tree(_metadata, log2_num);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1);
+
+ mnts[0] = self->root_id;
+ check_mounted(_metadata, mnts, num);
+
+ // Cleanup
+ int ret;
+ uint64_t mnts2[num];
+ ret = umount2("/", MNT_DETACH);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/", NULL, MS_PRIVATE, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts2[0] = self->root_id;
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1);
+ verify_mount_ids(_metadata, mnts, mnts2, num);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, fsmount)
+{
+ int ret, fs, mnt;
+ uint64_t mnts[2] = { self->root_id };
+
+ fs = fsopen("tmpfs", 0);
+ ASSERT_GE(fs, 0);
+
+ ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0);
+ ASSERT_EQ(ret, 0);
+
+ mnt = fsmount(fs, 0, 0);
+ ASSERT_GE(mnt, 0);
+
+ close(fs);
+
+ ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH);
+ ASSERT_EQ(ret, 0);
+
+ close(mnt);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ // Cleanup
+ uint64_t detach_id;
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
+ ASSERT_EQ(detach_id, mnts[1]);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, reparent)
+{
+ uint64_t mnts[6] = { self->root_id };
+ uint64_t dmnts[3];
+ uint64_t masks[3];
+ unsigned int i;
+ int ret;
+
+ // Create setup with a[1] -> b[2] propagation
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/a", NULL, MS_SHARED, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/a", "/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/b", NULL, MS_SLAVE, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
+
+ check_mounted(_metadata, mnts, 3);
+
+ // Mount on a[3], which is propagated to b[4]
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3);
+
+ check_mounted(_metadata, mnts, 5);
+
+ // Mount on b[5], not propagated
+ ret = mount("/", "/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+
+ check_mounted(_metadata, mnts, 6);
+
+ // Umount a[3], which is propagated to b[4], but not b[5]
+ // This will result in b[5] "falling" on b[2]
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_n(_metadata, self, 3, masks, dmnts);
+ verify_mount_ids(_metadata, mnts + 3, dmnts, 3);
+
+ for (i = 0; i < 3; i++) {
+ if (dmnts[i] == mnts[5]) {
+ ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH);
+ } else {
+ ASSERT_EQ(masks[i], FAN_MNT_DETACH);
+ }
+ }
+
+ mnts[3] = mnts[5];
+ check_mounted(_metadata, mnts, 4);
+
+ // Cleanup
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts);
+ verify_mount_ids(_metadata, mnts + 1, dmnts, 3);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, rmdir)
+{
+ uint64_t mnts[3] = { self->root_id };
+ int ret;
+
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/", "/a/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
+
+ check_mounted(_metadata, mnts, 3);
+
+ ret = chdir("/a");
+ ASSERT_EQ(ret, 0);
+
+ ret = fork();
+ ASSERT_GE(ret, 0);
+
+ if (ret == 0) {
+ chdir("/");
+ unshare(CLONE_NEWNS);
+ mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
+ umount2("/a", MNT_DETACH);
+ // This triggers a detach in the other namespace
+ rmdir("/a");
+ exit(0);
+ }
+ wait(NULL);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1);
+ check_mounted(_metadata, mnts, 1);
+
+ // Cleanup
+ ret = chdir("/");
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(fanotify, pivot_root)
+{
+ uint64_t mnts[3] = { self->root_id };
+ uint64_t mnts2[3];
+ int ret;
+
+ ret = mount("tmpfs", "/a", "tmpfs", 0, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+
+ ret = mkdir("/a/new", 0700);
+ ASSERT_EQ(ret, 0);
+
+ ret = mkdir("/a/old", 0700);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/a", "/a/new", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ check_mounted(_metadata, mnts, 3);
+
+ ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2);
+ verify_mount_ids(_metadata, mnts, mnts2, 2);
+ check_mounted(_metadata, mnts, 3);
+
+ // Cleanup
+ ret = syscall(SYS_pivot_root, "/old", "/old/a/new");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a/new");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
index 457cf76f3c5f..a3d8015897e9 100644
--- a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
+++ b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
@@ -3,6 +3,8 @@
#define _GNU_SOURCE
#include <fcntl.h>
+#include <linux/auto_dev-ioctl.h>
+#include <linux/errno.h>
#include <sched.h>
#include <stdio.h>
#include <string.h>
@@ -146,4 +148,16 @@ TEST_F(iterate_mount_namespaces, iterate_backward)
}
}
+TEST_F(iterate_mount_namespaces, nfs_valid_ioctl)
+{
+ ASSERT_NE(ioctl(self->fd_mnt_ns[0], AUTOFS_DEV_IOCTL_OPENMOUNT, NULL), 0);
+ ASSERT_EQ(errno, ENOTTY);
+
+ ASSERT_NE(ioctl(self->fd_mnt_ns[0], AUTOFS_DEV_IOCTL_CLOSEMOUNT, NULL), 0);
+ ASSERT_EQ(errno, ENOTTY);
+
+ ASSERT_NE(ioctl(self->fd_mnt_ns[0], AUTOFS_DEV_IOCTL_READY, NULL), 0);
+ ASSERT_EQ(errno, ENOTTY);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/overlayfs/Makefile b/tools/testing/selftests/filesystems/overlayfs/Makefile
index e8d1adb021af..6c661232b3b5 100644
--- a/tools/testing/selftests/filesystems/overlayfs/Makefile
+++ b/tools/testing/selftests/filesystems/overlayfs/Makefile
@@ -1,7 +1,14 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := dev_in_maps set_layers_via_fds
+CFLAGS += -Wall
+CFLAGS += $(KHDR_INCLUDES)
+LDLIBS += -lcap
-CFLAGS := -Wall -Werror
+LOCAL_HDRS += wrappers.h log.h
+
+TEST_GEN_PROGS := dev_in_maps
+TEST_GEN_PROGS += set_layers_via_fds
include ../../lib.mk
+
+$(OUTPUT)/set_layers_via_fds: ../utils.c
diff --git a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
index 1d0ae785a667..5074e64e74a8 100644
--- a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
+++ b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
@@ -6,26 +6,40 @@
#include <sched.h>
#include <stdio.h>
#include <string.h>
+#include <sys/socket.h>
#include <sys/stat.h>
+#include <sys/sysmacros.h>
#include <sys/mount.h>
#include <unistd.h>
#include "../../kselftest_harness.h"
+#include "../../pidfd/pidfd.h"
#include "log.h"
+#include "../utils.h"
#include "wrappers.h"
FIXTURE(set_layers_via_fds) {
+ int pidfd;
};
FIXTURE_SETUP(set_layers_via_fds)
{
- ASSERT_EQ(mkdir("/set_layers_via_fds", 0755), 0);
+ self->pidfd = -EBADF;
+ EXPECT_EQ(mkdir("/set_layers_via_fds", 0755), 0);
+ EXPECT_EQ(mkdir("/set_layers_via_fds_tmpfs", 0755), 0);
}
FIXTURE_TEARDOWN(set_layers_via_fds)
{
+ if (self->pidfd >= 0) {
+ EXPECT_EQ(sys_pidfd_send_signal(self->pidfd, SIGKILL, NULL, 0), 0);
+ EXPECT_EQ(close(self->pidfd), 0);
+ }
umount2("/set_layers_via_fds", 0);
- ASSERT_EQ(rmdir("/set_layers_via_fds"), 0);
+ EXPECT_EQ(rmdir("/set_layers_via_fds"), 0);
+
+ umount2("/set_layers_via_fds_tmpfs", 0);
+ EXPECT_EQ(rmdir("/set_layers_via_fds_tmpfs"), 0);
}
TEST_F(set_layers_via_fds, set_layers_via_fds)
@@ -214,4 +228,493 @@ TEST_F(set_layers_via_fds, set_500_layers_via_fds)
ASSERT_EQ(close(fd_overlay), 0);
}
+TEST_F(set_layers_via_fds, set_override_creds)
+{
+ int fd_context, fd_tmpfs, fd_overlay;
+ int layer_fds[] = { [0 ... 3] = -EBADF };
+ pid_t pid;
+ int pidfd;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+
+ layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
+ ASSERT_GE(layer_fds[0], 0);
+
+ layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
+ ASSERT_GE(layer_fds[1], 0);
+
+ layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
+ ASSERT_GE(layer_fds[2], 0);
+
+ layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
+ ASSERT_GE(layer_fds[3], 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[0]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[1]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_STRING, "metacopy", "on", 0), 0);
+
+ pid = create_child(&pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
+ TH_LOG("sys_fsconfig should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+ ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+ ASSERT_GE(close(pidfd), 0);
+
+ pid = create_child(&pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "nooverride_creds", NULL, 0)) {
+ TH_LOG("sys_fsconfig should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+ ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+ ASSERT_GE(close(pidfd), 0);
+
+ pid = create_child(&pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
+ TH_LOG("sys_fsconfig should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+ ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+ ASSERT_GE(close(pidfd), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+}
+
+TEST_F(set_layers_via_fds, set_override_creds_invalid)
+{
+ int fd_context, fd_tmpfs, fd_overlay, ret;
+ int layer_fds[] = { [0 ... 3] = -EBADF };
+ pid_t pid;
+ int fd_userns1, fd_userns2;
+ int ipc_sockets[2];
+ char c;
+ const unsigned int predictable_fd_context_nr = 123;
+
+ fd_userns1 = get_userns_fd(0, 0, 10000);
+ ASSERT_GE(fd_userns1, 0);
+
+ fd_userns2 = get_userns_fd(0, 1234, 10000);
+ ASSERT_GE(fd_userns2, 0);
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_GE(ret, 0);
+
+ pid = create_child(&self->pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (close(ipc_sockets[0])) {
+ TH_LOG("close should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!switch_userns(fd_userns2, 0, 0, false)) {
+ TH_LOG("switch_userns should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (read_nointr(ipc_sockets[1], &c, 1) != 1) {
+ TH_LOG("read_nointr should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (close(ipc_sockets[1])) {
+ TH_LOG("close should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!sys_fsconfig(predictable_fd_context_nr, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
+ TH_LOG("sys_fsconfig should have failed");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ ASSERT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(switch_userns(fd_userns1, 0, 0, false), true);
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+
+ layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
+ ASSERT_GE(layer_fds[0], 0);
+
+ layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
+ ASSERT_GE(layer_fds[1], 0);
+
+ layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
+ ASSERT_GE(layer_fds[2], 0);
+
+ layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
+ ASSERT_GE(layer_fds[3], 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+ ASSERT_EQ(dup3(fd_context, predictable_fd_context_nr, 0), predictable_fd_context_nr);
+ ASSERT_EQ(close(fd_context), 0);
+ fd_context = predictable_fd_context_nr;
+ ASSERT_EQ(write_nointr(ipc_sockets[0], "1", 1), 1);
+ ASSERT_EQ(close(ipc_sockets[0]), 0);
+
+ ASSERT_EQ(wait_for_pid(pid), 0);
+ ASSERT_EQ(close(self->pidfd), 0);
+ self->pidfd = -EBADF;
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[0]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[1]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++)
+ ASSERT_EQ(close(layer_fds[i]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "userxattr", NULL, 0), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+ ASSERT_EQ(close(fd_userns1), 0);
+ ASSERT_EQ(close(fd_userns2), 0);
+}
+
+TEST_F(set_layers_via_fds, set_override_creds_nomknod)
+{
+ int fd_context, fd_tmpfs, fd_overlay;
+ int layer_fds[] = { [0 ... 3] = -EBADF };
+ pid_t pid;
+ int pidfd;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+
+ layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
+ ASSERT_GE(layer_fds[0], 0);
+
+ layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
+ ASSERT_GE(layer_fds[1], 0);
+
+ layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
+ ASSERT_GE(layer_fds[2], 0);
+
+ layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
+ ASSERT_GE(layer_fds[3], 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[0]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[1]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "userxattr", NULL, 0), 0);
+
+ pid = create_child(&pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (!cap_down(CAP_MKNOD))
+ _exit(EXIT_FAILURE);
+
+ if (!cap_down(CAP_SYS_ADMIN))
+ _exit(EXIT_FAILURE);
+
+ if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0))
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+ ASSERT_EQ(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+ ASSERT_GE(close(pidfd), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(mknodat(fd_overlay, "dev-zero", S_IFCHR | 0644, makedev(1, 5)), -1);
+ ASSERT_EQ(errno, EPERM);
+
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+}
+
+TEST_F(set_layers_via_fds, set_500_layers_via_opath_fds)
+{
+ int fd_context, fd_tmpfs, fd_overlay, fd_work, fd_upper, fd_lower;
+ int layer_fds[500] = { [0 ... 499] = -EBADF };
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+ char path[100];
+
+ sprintf(path, "l%d", i);
+ ASSERT_EQ(mkdirat(fd_tmpfs, path, 0755), 0);
+ layer_fds[i] = openat(fd_tmpfs, path, O_DIRECTORY | O_PATH);
+ ASSERT_GE(layer_fds[i], 0);
+ }
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+ fd_work = openat(fd_tmpfs, "w", O_DIRECTORY | O_PATH);
+ ASSERT_GE(fd_work, 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ fd_upper = openat(fd_tmpfs, "u", O_DIRECTORY | O_PATH);
+ ASSERT_GE(fd_upper, 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l501", 0755), 0);
+ fd_lower = openat(fd_tmpfs, "l501", O_DIRECTORY | O_PATH);
+ ASSERT_GE(fd_lower, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, fd_work), 0);
+ ASSERT_EQ(close(fd_work), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, fd_upper), 0);
+ ASSERT_EQ(close(fd_upper), 0);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[i]), 0);
+ ASSERT_EQ(close(layer_fds[i]), 0);
+ }
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, fd_lower), 0);
+ ASSERT_EQ(close(fd_lower), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+}
+
+TEST_F(set_layers_via_fds, set_layers_via_detached_mount_fds)
+{
+ int fd_context, fd_tmpfs, fd_overlay, fd_tmp;
+ int layer_fds[] = { [0 ... 8] = -EBADF };
+ bool layers_found[] = { [0 ... 8] = false };
+ size_t len = 0;
+ char *line = NULL;
+ FILE *f_mountinfo;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u/upper", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u/work", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l3", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l4", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "d1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "d2", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "d3", 0755), 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/set_layers_via_fds_tmpfs", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ fd_tmp = open_tree(fd_tmpfs, "u", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(fd_tmp, 0);
+
+ layer_fds[0] = openat(fd_tmp, "upper", O_CLOEXEC | O_DIRECTORY | O_PATH);
+ ASSERT_GE(layer_fds[0], 0);
+
+ layer_fds[1] = openat(fd_tmp, "work", O_CLOEXEC | O_DIRECTORY | O_PATH);
+ ASSERT_GE(layer_fds[1], 0);
+
+ layer_fds[2] = open_tree(fd_tmpfs, "l1", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[2], 0);
+
+ layer_fds[3] = open_tree(fd_tmpfs, "l2", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[3], 0);
+
+ layer_fds[4] = open_tree(fd_tmpfs, "l3", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[4], 0);
+
+ layer_fds[5] = open_tree(fd_tmpfs, "l4", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[5], 0);
+
+ layer_fds[6] = open_tree(fd_tmpfs, "d1", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[6], 0);
+
+ layer_fds[7] = open_tree(fd_tmpfs, "d2", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[7], 0);
+
+ layer_fds[8] = open_tree(fd_tmpfs, "d3", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[8], 0);
+
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[0]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[1]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[4]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[5]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[6]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[7]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[8]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_STRING, "metacopy", "on", 0), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ f_mountinfo = fopen("/proc/self/mountinfo", "r");
+ ASSERT_NE(f_mountinfo, NULL);
+
+ while (getline(&line, &len, f_mountinfo) != -1) {
+ char *haystack = line;
+
+ if (strstr(haystack, "workdir=/tmp/w"))
+ layers_found[0] = true;
+ if (strstr(haystack, "upperdir=/tmp/u"))
+ layers_found[1] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l1"))
+ layers_found[2] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l2"))
+ layers_found[3] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l3"))
+ layers_found[4] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l4"))
+ layers_found[5] = true;
+ if (strstr(haystack, "datadir+=/tmp/d1"))
+ layers_found[6] = true;
+ if (strstr(haystack, "datadir+=/tmp/d2"))
+ layers_found[7] = true;
+ if (strstr(haystack, "datadir+=/tmp/d3"))
+ layers_found[8] = true;
+ }
+ free(line);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+ ASSERT_EQ(layers_found[i], true);
+ ASSERT_EQ(close(layer_fds[i]), 0);
+ }
+
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+ ASSERT_EQ(fclose(f_mountinfo), 0);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/overlayfs/wrappers.h b/tools/testing/selftests/filesystems/overlayfs/wrappers.h
index 071b95fd2ac0..c38bc48e0cfa 100644
--- a/tools/testing/selftests/filesystems/overlayfs/wrappers.h
+++ b/tools/testing/selftests/filesystems/overlayfs/wrappers.h
@@ -44,4 +44,21 @@ static inline int sys_move_mount(int from_dfd, const char *from_pathname,
to_pathname, flags);
}
+#ifndef OPEN_TREE_CLONE
+#define OPEN_TREE_CLONE 1
+#endif
+
+#ifndef OPEN_TREE_CLOEXEC
+#define OPEN_TREE_CLOEXEC O_CLOEXEC
+#endif
+
+#ifndef AT_RECURSIVE
+#define AT_RECURSIVE 0x8000
+#endif
+
+static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
+{
+ return syscall(__NR_open_tree, dfd, filename, flags);
+}
+
#endif
diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h
index f4294bab9d73..a7a5289ddae9 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount.h
+++ b/tools/testing/selftests/filesystems/statmount/statmount.h
@@ -25,7 +25,7 @@ static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask,
return syscall(__NR_statmount, &req, buf, bufsize, flags);
}
-static ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id,
+static inline ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id,
uint64_t last_mnt_id, uint64_t list[], size_t num,
unsigned int flags)
{
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c
index 8eb6aa606a0d..f048042e53e9 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c
@@ -26,13 +26,12 @@ static const char *const known_fs[] = {
"hfsplus", "hostfs", "hpfs", "hugetlbfs", "ibmasmfs", "iomem",
"ipathfs", "iso9660", "jffs2", "jfs", "minix", "mqueue", "msdos",
"nfs", "nfs4", "nfsd", "nilfs2", "nsfs", "ntfs", "ntfs3", "ocfs2",
- "ocfs2_dlmfs", "ocxlflash", "omfs", "openpromfs", "overlay", "pipefs",
- "proc", "pstore", "pvfs2", "qnx4", "qnx6", "ramfs",
- "resctrl", "romfs", "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem",
- "securityfs", "selinuxfs", "smackfs", "smb3", "sockfs", "spufs",
- "squashfs", "sysfs", "sysv", "tmpfs", "tracefs", "ubifs", "udf",
- "ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs",
- "zonefs", NULL };
+ "ocfs2_dlmfs", "omfs", "openpromfs", "overlay", "pipefs", "proc",
+ "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", "resctrl", "romfs",
+ "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem", "securityfs",
+ "selinuxfs", "smackfs", "smb3", "sockfs", "spufs", "squashfs", "sysfs",
+ "sysv", "tmpfs", "tracefs", "ubifs", "udf", "ufs", "v7", "vboxsf",
+ "vfat", "virtiofs", "vxfs", "xenfs", "xfs", "zonefs", NULL };
static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags)
{
@@ -383,6 +382,10 @@ static void test_statmount_mnt_point(void)
return;
}
+ if (!(sm->mask & STATMOUNT_MNT_POINT)) {
+ ksft_test_result_fail("missing STATMOUNT_MNT_POINT in mask\n");
+ return;
+ }
if (strcmp(sm->str + sm->mnt_point, "/") != 0) {
ksft_test_result_fail("unexpected mount point: '%s' != '/'\n",
sm->str + sm->mnt_point);
@@ -408,6 +411,10 @@ static void test_statmount_mnt_root(void)
strerror(errno));
return;
}
+ if (!(sm->mask & STATMOUNT_MNT_ROOT)) {
+ ksft_test_result_fail("missing STATMOUNT_MNT_ROOT in mask\n");
+ return;
+ }
mnt_root = sm->str + sm->mnt_root;
last_root = strrchr(mnt_root, '/');
if (last_root)
@@ -437,6 +444,10 @@ static void test_statmount_fs_type(void)
strerror(errno));
return;
}
+ if (!(sm->mask & STATMOUNT_FS_TYPE)) {
+ ksft_test_result_fail("missing STATMOUNT_FS_TYPE in mask\n");
+ return;
+ }
fs_type = sm->str + sm->fs_type;
for (s = known_fs; s != NULL; s++) {
if (strcmp(fs_type, *s) == 0)
@@ -464,6 +475,11 @@ static void test_statmount_mnt_opts(void)
return;
}
+ if (!(sm->mask & STATMOUNT_MNT_BASIC)) {
+ ksft_test_result_fail("missing STATMOUNT_MNT_BASIC in mask\n");
+ return;
+ }
+
while (getline(&line, &len, f_mountinfo) != -1) {
int i;
char *p, *p2;
@@ -514,7 +530,10 @@ static void test_statmount_mnt_opts(void)
if (p2)
*p2 = '\0';
- statmount_opts = sm->str + sm->mnt_opts;
+ if (sm->mask & STATMOUNT_MNT_OPTS)
+ statmount_opts = sm->str + sm->mnt_opts;
+ else
+ statmount_opts = "";
if (strcmp(statmount_opts, p) != 0)
ksft_test_result_fail(
"unexpected mount options: '%s' != '%s'\n",
diff --git a/tools/testing/selftests/filesystems/utils.c b/tools/testing/selftests/filesystems/utils.c
new file mode 100644
index 000000000000..e553c89c5b19
--- /dev/null
+++ b/tools/testing/selftests/filesystems/utils.c
@@ -0,0 +1,501 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <fcntl.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <grp.h>
+#include <linux/limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/eventfd.h>
+#include <sys/fsuid.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/xattr.h>
+
+#include "utils.h"
+
+#define MAX_USERNS_LEVEL 32
+
+#define syserror(format, ...) \
+ ({ \
+ fprintf(stderr, "%m - " format "\n", ##__VA_ARGS__); \
+ (-errno); \
+ })
+
+#define syserror_set(__ret__, format, ...) \
+ ({ \
+ typeof(__ret__) __internal_ret__ = (__ret__); \
+ errno = labs(__ret__); \
+ fprintf(stderr, "%m - " format "\n", ##__VA_ARGS__); \
+ __internal_ret__; \
+ })
+
+#define STRLITERALLEN(x) (sizeof(""x"") - 1)
+
+#define INTTYPE_TO_STRLEN(type) \
+ (2 + (sizeof(type) <= 1 \
+ ? 3 \
+ : sizeof(type) <= 2 \
+ ? 5 \
+ : sizeof(type) <= 4 \
+ ? 10 \
+ : sizeof(type) <= 8 ? 20 : sizeof(int[-2 * (sizeof(type) > 8)])))
+
+#define list_for_each(__iterator, __list) \
+ for (__iterator = (__list)->next; __iterator != __list; __iterator = __iterator->next)
+
+typedef enum idmap_type_t {
+ ID_TYPE_UID,
+ ID_TYPE_GID
+} idmap_type_t;
+
+struct id_map {
+ idmap_type_t map_type;
+ __u32 nsid;
+ __u32 hostid;
+ __u32 range;
+};
+
+struct list {
+ void *elem;
+ struct list *next;
+ struct list *prev;
+};
+
+struct userns_hierarchy {
+ int fd_userns;
+ int fd_event;
+ unsigned int level;
+ struct list id_map;
+};
+
+static inline void list_init(struct list *list)
+{
+ list->elem = NULL;
+ list->next = list->prev = list;
+}
+
+static inline int list_empty(const struct list *list)
+{
+ return list == list->next;
+}
+
+static inline void __list_add(struct list *new, struct list *prev, struct list *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+static inline void list_add_tail(struct list *head, struct list *list)
+{
+ __list_add(list, head->prev, head);
+}
+
+static inline void list_del(struct list *list)
+{
+ struct list *next, *prev;
+
+ next = list->next;
+ prev = list->prev;
+ next->prev = prev;
+ prev->next = next;
+}
+
+static ssize_t read_nointr(int fd, void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = read(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+#define __STACK_SIZE (8 * 1024 * 1024)
+static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
+{
+ void *stack;
+
+ stack = malloc(__STACK_SIZE);
+ if (!stack)
+ return -ENOMEM;
+
+#ifdef __ia64__
+ return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#else
+ return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#endif
+}
+
+static int get_userns_fd_cb(void *data)
+{
+ for (;;)
+ pause();
+ _exit(0);
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static int write_id_mapping(idmap_type_t map_type, pid_t pid, const char *buf, size_t buf_size)
+{
+ int fd = -EBADF, setgroups_fd = -EBADF;
+ int fret = -1;
+ int ret;
+ char path[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
+ STRLITERALLEN("/setgroups") + 1];
+
+ if (geteuid() != 0 && map_type == ID_TYPE_GID) {
+ ret = snprintf(path, sizeof(path), "/proc/%d/setgroups", pid);
+ if (ret < 0 || ret >= sizeof(path))
+ goto out;
+
+ setgroups_fd = open(path, O_WRONLY | O_CLOEXEC);
+ if (setgroups_fd < 0 && errno != ENOENT) {
+ syserror("Failed to open \"%s\"", path);
+ goto out;
+ }
+
+ if (setgroups_fd >= 0) {
+ ret = write_nointr(setgroups_fd, "deny\n", STRLITERALLEN("deny\n"));
+ if (ret != STRLITERALLEN("deny\n")) {
+ syserror("Failed to write \"deny\" to \"/proc/%d/setgroups\"", pid);
+ goto out;
+ }
+ }
+ }
+
+ ret = snprintf(path, sizeof(path), "/proc/%d/%cid_map", pid, map_type == ID_TYPE_UID ? 'u' : 'g');
+ if (ret < 0 || ret >= sizeof(path))
+ goto out;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC);
+ if (fd < 0) {
+ syserror("Failed to open \"%s\"", path);
+ goto out;
+ }
+
+ ret = write_nointr(fd, buf, buf_size);
+ if (ret != buf_size) {
+ syserror("Failed to write %cid mapping to \"%s\"",
+ map_type == ID_TYPE_UID ? 'u' : 'g', path);
+ goto out;
+ }
+
+ fret = 0;
+out:
+ close(fd);
+ close(setgroups_fd);
+
+ return fret;
+}
+
+static int map_ids_from_idmap(struct list *idmap, pid_t pid)
+{
+ int fill, left;
+ char mapbuf[4096] = {};
+ bool had_entry = false;
+ idmap_type_t map_type, u_or_g;
+
+ if (list_empty(idmap))
+ return 0;
+
+ for (map_type = ID_TYPE_UID, u_or_g = 'u';
+ map_type <= ID_TYPE_GID; map_type++, u_or_g = 'g') {
+ char *pos = mapbuf;
+ int ret;
+ struct list *iterator;
+
+
+ list_for_each(iterator, idmap) {
+ struct id_map *map = iterator->elem;
+ if (map->map_type != map_type)
+ continue;
+
+ had_entry = true;
+
+ left = 4096 - (pos - mapbuf);
+ fill = snprintf(pos, left, "%u %u %u\n", map->nsid, map->hostid, map->range);
+ /*
+ * The kernel only takes <= 4k for writes to
+ * /proc/<pid>/{g,u}id_map
+ */
+ if (fill <= 0 || fill >= left)
+ return syserror_set(-E2BIG, "Too many %cid mappings defined", u_or_g);
+
+ pos += fill;
+ }
+ if (!had_entry)
+ continue;
+
+ ret = write_id_mapping(map_type, pid, mapbuf, pos - mapbuf);
+ if (ret < 0)
+ return syserror("Failed to write mapping: %s", mapbuf);
+
+ memset(mapbuf, 0, sizeof(mapbuf));
+ }
+
+ return 0;
+}
+
+static int get_userns_fd_from_idmap(struct list *idmap)
+{
+ int ret;
+ pid_t pid;
+ char path_ns[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
+ STRLITERALLEN("/ns/user") + 1];
+
+ pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER | CLONE_NEWNS);
+ if (pid < 0)
+ return -errno;
+
+ ret = map_ids_from_idmap(idmap, pid);
+ if (ret < 0)
+ return ret;
+
+ ret = snprintf(path_ns, sizeof(path_ns), "/proc/%d/ns/user", pid);
+ if (ret < 0 || (size_t)ret >= sizeof(path_ns))
+ ret = -EIO;
+ else
+ ret = open(path_ns, O_RDONLY | O_CLOEXEC | O_NOCTTY);
+
+ (void)kill(pid, SIGKILL);
+ (void)wait_for_pid(pid);
+ return ret;
+}
+
+int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
+{
+ struct list head, uid_mapl, gid_mapl;
+ struct id_map uid_map = {
+ .map_type = ID_TYPE_UID,
+ .nsid = nsid,
+ .hostid = hostid,
+ .range = range,
+ };
+ struct id_map gid_map = {
+ .map_type = ID_TYPE_GID,
+ .nsid = nsid,
+ .hostid = hostid,
+ .range = range,
+ };
+
+ list_init(&head);
+ uid_mapl.elem = &uid_map;
+ gid_mapl.elem = &gid_map;
+ list_add_tail(&head, &uid_mapl);
+ list_add_tail(&head, &gid_mapl);
+
+ return get_userns_fd_from_idmap(&head);
+}
+
+bool switch_ids(uid_t uid, gid_t gid)
+{
+ if (setgroups(0, NULL))
+ return syserror("failure: setgroups");
+
+ if (setresgid(gid, gid, gid))
+ return syserror("failure: setresgid");
+
+ if (setresuid(uid, uid, uid))
+ return syserror("failure: setresuid");
+
+ /* Ensure we can access proc files from processes we can ptrace. */
+ if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0))
+ return syserror("failure: make dumpable");
+
+ return true;
+}
+
+static int create_userns_hierarchy(struct userns_hierarchy *h);
+
+static int userns_fd_cb(void *data)
+{
+ struct userns_hierarchy *h = data;
+ char c;
+ int ret;
+
+ ret = read_nointr(h->fd_event, &c, 1);
+ if (ret < 0)
+ return syserror("failure: read from socketpair");
+
+ /* Only switch ids if someone actually wrote a mapping for us. */
+ if (c == '1') {
+ if (!switch_ids(0, 0))
+ return syserror("failure: switch ids to 0");
+ }
+
+ ret = write_nointr(h->fd_event, "1", 1);
+ if (ret < 0)
+ return syserror("failure: write to socketpair");
+
+ ret = create_userns_hierarchy(++h);
+ if (ret < 0)
+ return syserror("failure: userns level %d", h->level);
+
+ return 0;
+}
+
+static int create_userns_hierarchy(struct userns_hierarchy *h)
+{
+ int fret = -1;
+ char c;
+ int fd_socket[2];
+ int fd_userns = -EBADF, ret = -1;
+ ssize_t bytes;
+ pid_t pid;
+ char path[256];
+
+ if (h->level == MAX_USERNS_LEVEL)
+ return 0;
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, fd_socket);
+ if (ret < 0)
+ return syserror("failure: create socketpair");
+
+ /* Note the CLONE_FILES | CLONE_VM when mucking with fds and memory. */
+ h->fd_event = fd_socket[1];
+ pid = do_clone(userns_fd_cb, h, CLONE_NEWUSER | CLONE_FILES | CLONE_VM);
+ if (pid < 0) {
+ syserror("failure: userns level %d", h->level);
+ goto out_close;
+ }
+
+ ret = map_ids_from_idmap(&h->id_map, pid);
+ if (ret < 0) {
+ kill(pid, SIGKILL);
+ syserror("failure: writing id mapping for userns level %d for %d", h->level, pid);
+ goto out_wait;
+ }
+
+ if (!list_empty(&h->id_map))
+ bytes = write_nointr(fd_socket[0], "1", 1); /* Inform the child we wrote a mapping. */
+ else
+ bytes = write_nointr(fd_socket[0], "0", 1); /* Inform the child we didn't write a mapping. */
+ if (bytes < 0) {
+ kill(pid, SIGKILL);
+ syserror("failure: write to socketpair");
+ goto out_wait;
+ }
+
+ /* Wait for child to set*id() and become dumpable. */
+ bytes = read_nointr(fd_socket[0], &c, 1);
+ if (bytes < 0) {
+ kill(pid, SIGKILL);
+ syserror("failure: read from socketpair");
+ goto out_wait;
+ }
+
+ snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
+ fd_userns = open(path, O_RDONLY | O_CLOEXEC);
+ if (fd_userns < 0) {
+ kill(pid, SIGKILL);
+ syserror("failure: open userns level %d for %d", h->level, pid);
+ goto out_wait;
+ }
+
+ fret = 0;
+
+out_wait:
+ if (!wait_for_pid(pid) && !fret) {
+ h->fd_userns = fd_userns;
+ fd_userns = -EBADF;
+ }
+
+out_close:
+ if (fd_userns >= 0)
+ close(fd_userns);
+ close(fd_socket[0]);
+ close(fd_socket[1]);
+ return fret;
+}
+
+/* caps_down - lower all effective caps */
+int caps_down(void)
+{
+ bool fret = false;
+ cap_t caps = NULL;
+ int ret = -1;
+
+ caps = cap_get_proc();
+ if (!caps)
+ goto out;
+
+ ret = cap_clear_flag(caps, CAP_EFFECTIVE);
+ if (ret)
+ goto out;
+
+ ret = cap_set_proc(caps);
+ if (ret)
+ goto out;
+
+ fret = true;
+
+out:
+ cap_free(caps);
+ return fret;
+}
+
+/* cap_down - lower an effective cap */
+int cap_down(cap_value_t down)
+{
+ bool fret = false;
+ cap_t caps = NULL;
+ cap_value_t cap = down;
+ int ret = -1;
+
+ caps = cap_get_proc();
+ if (!caps)
+ goto out;
+
+ ret = cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, 0);
+ if (ret)
+ goto out;
+
+ ret = cap_set_proc(caps);
+ if (ret)
+ goto out;
+
+ fret = true;
+
+out:
+ cap_free(caps);
+ return fret;
+}
diff --git a/tools/testing/selftests/filesystems/utils.h b/tools/testing/selftests/filesystems/utils.h
new file mode 100644
index 000000000000..7f1df2a3e94c
--- /dev/null
+++ b/tools/testing/selftests/filesystems/utils.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __IDMAP_UTILS_H
+#define __IDMAP_UTILS_H
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <errno.h>
+#include <linux/types.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/capability.h>
+#include <sys/fsuid.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+extern int get_userns_fd(unsigned long nsid, unsigned long hostid,
+ unsigned long range);
+
+extern int caps_down(void);
+extern int cap_down(cap_value_t down);
+
+extern bool switch_ids(uid_t uid, gid_t gid);
+
+static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps)
+{
+ if (setns(fd, CLONE_NEWUSER))
+ return false;
+
+ if (!switch_ids(uid, gid))
+ return false;
+
+ if (drop_caps && !caps_down())
+ return false;
+
+ return true;
+}
+
+#endif /* __IDMAP_UTILS_H */
diff --git a/tools/testing/selftests/ftrace/.gitignore b/tools/testing/selftests/ftrace/.gitignore
index 2659417cb2c7..4d7fcb828850 100644
--- a/tools/testing/selftests/ftrace/.gitignore
+++ b/tools/testing/selftests/ftrace/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
logs
+poll
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
index dc25bcf4f9e2..73f6c6fcecab 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
@@ -7,12 +7,42 @@ echo 0 > events/enable
echo > dynamic_events
PLACE=$FUNCTION_FORK
+PLACE2="kmem_cache_free"
+PLACE3="schedule_timeout"
+
+# Some functions may have BPF programs attached, therefore
+# count already enabled_functions before tests start
+ocnt=`cat enabled_functions | wc -l`
echo "f:myevent1 $PLACE" >> dynamic_events
+
+# Make sure the event is attached and is the only one
+grep -q $PLACE enabled_functions
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $((ocnt + 1)) ]; then
+ exit_fail
+fi
+
echo "f:myevent2 $PLACE%return" >> dynamic_events
+# It should till be the only attached function
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $((ocnt + 1)) ]; then
+ exit_fail
+fi
+
+# add another event
+echo "f:myevent3 $PLACE2" >> dynamic_events
+
+grep -q $PLACE2 enabled_functions
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $((ocnt + 2)) ]; then
+ exit_fail
+fi
+
grep -q myevent1 dynamic_events
grep -q myevent2 dynamic_events
+grep -q myevent3 dynamic_events
test -d events/fprobes/myevent1
test -d events/fprobes/myevent2
@@ -21,6 +51,34 @@ echo "-:myevent2" >> dynamic_events
grep -q myevent1 dynamic_events
! grep -q myevent2 dynamic_events
+# should still have 2 left
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $((ocnt + 2)) ]; then
+ exit_fail
+fi
+
echo > dynamic_events
+# Should have none left
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $ocnt ]; then
+ exit_fail
+fi
+
+echo "f:myevent4 $PLACE" >> dynamic_events
+
+# Should only have one enabled
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $((ocnt + 1)) ]; then
+ exit_fail
+fi
+
+echo > dynamic_events
+
+# Should have none left
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $ocnt ]; then
+ exit_fail
+fi
+
clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc
index 155792eaeee5..f271c4238b72 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc
@@ -6,6 +6,7 @@
echo 0 > events/enable
echo > dynamic_events
+SUBSYSTEM=kmem
TRACEPOINT1=kmem_cache_alloc
TRACEPOINT2=kmem_cache_free
@@ -24,4 +25,17 @@ grep -q myevent1 dynamic_events
echo > dynamic_events
+# auto naming check
+echo "t $TRACEPOINT1" >> dynamic_events
+
+test -d events/tracepoints/$TRACEPOINT1
+
+echo > dynamic_events
+
+# SUBSYSTEM is not supported
+echo "t $SUBSYSTEM/$TRACEPOINT1" >> dynamic_events && exit_fail ||:
+echo "t $SUBSYSTEM:$TRACEPOINT1" >> dynamic_events && exit_fail ||:
+echo "t:myevent3 $SUBSYSTEM/$TRACEPOINT1" >> dynamic_events && exit_fail ||:
+echo "t:myevent3 $SUBSYSTEM:$TRACEPOINT1" >> dynamic_events && exit_fail ||:
+
clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc
index 86c76679c56e..f2048c244526 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc
@@ -3,14 +3,18 @@
# description: Generic dynamic event - add/remove/test uprobe events
# requires: uprobe_events
+if ! which readelf > /dev/null 2>&1 ; then
+ echo "No readelf found. skipped."
+ exit_unresolved
+fi
+
echo 0 > events/enable
echo > dynamic_events
REALBIN=`readlink -f /bin/sh`
+ENTRYPOINT=`readelf -h ${REALBIN} | grep Entry | sed -e 's/[^0]*//'`
-echo 'cat /proc/$$/maps' | /bin/sh | \
- grep "r-xp .*${REALBIN}$" | \
- awk '{printf "p:myevent %s:0x%s\n", $6,$3 }' >> uprobe_events
+echo "p:myevent ${REALBIN}:${ENTRYPOINT}" >> uprobe_events
grep -q myevent uprobe_events
test -d events/uprobes/myevent
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc
new file mode 100644
index 000000000000..6b94b678741a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc
@@ -0,0 +1,42 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Checking dynamic events limitations
+# requires: dynamic_events "imm-value":README
+
+# Max arguments limitation
+MAX_ARGS=128
+EXCEED_ARGS=$((MAX_ARGS + 1))
+
+check_max_args() { # event_header
+ TEST_STRING=$1
+ # Acceptable
+ for i in `seq 1 $MAX_ARGS`; do
+ TEST_STRING="$TEST_STRING \\$i"
+ done
+ echo "$TEST_STRING" >> dynamic_events
+ echo > dynamic_events
+ # Error
+ TEST_STRING="$TEST_STRING \\$EXCEED_ARGS"
+ ! echo "$TEST_STRING" >> dynamic_events
+ return 0
+}
+
+# Kprobe max args limitation
+if grep -q "kprobe_events" README; then
+ check_max_args "p vfs_read"
+fi
+
+# Fprobe max args limitation
+if grep -q "f[:[<group>/][<event>]] <func-name>[%return] [<args>]" README; then
+ check_max_args "f vfs_read"
+fi
+
+# Tprobe max args limitation
+if grep -q "t[:[<group>/][<event>]] <tracepoint> [<args>]" README; then
+ check_max_args "t kfree"
+fi
+
+# Uprobe max args limitation
+if grep -q "uprobe_events" README; then
+ check_max_args "p /bin/sh:10"
+fi
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
index c9425a34fae3..fee479295e2f 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
@@ -27,6 +27,7 @@ check_error 'f:^foo.1/bar vfs_read' # BAD_GROUP_NAME
check_error 'f:^ vfs_read' # NO_EVENT_NAME
check_error 'f:foo/^12345678901234567890123456789012345678901234567890123456789012345 vfs_read' # EVENT_TOO_LONG
check_error 'f:foo/^bar.1 vfs_read' # BAD_EVENT_NAME
+check_error 't kmem^/kfree' # BAD_TP_NAME
check_error 'f vfs_read ^$stack10000' # BAD_STACK_NUM
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index 84d6a9c7ad67..a1052bf460fc 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -156,7 +156,13 @@ check_requires() { # Check required files and tracers
exit_unsupported
fi
elif [ "$r" != "$i" ]; then
- if ! grep -Fq "$r" README ; then
+ # If this is an instance, check the top directory
+ if echo $TRACING_DIR | grep -q "/instances/"; then
+ test="$TRACING_DIR/../.."
+ else
+ test=$TRACING_DIR
+ fi
+ if ! grep -Fq "$r" $test/README ; then
echo "Required feature pattern \"$r\" is not in README."
exit_unsupported
fi
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc
index 1590d6bfb857..20a35fea13f8 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc
@@ -2,6 +2,7 @@
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger expected fail actions
# requires: set_event snapshot "snapshot()":README
+# flags: instance
fail() { #msg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
index 91339c130832..55ab0270e5f7 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
@@ -2,6 +2,7 @@
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger onchange action
# requires: set_event "onchange(var)":README ping:program
+# flags: instance
fail() { #msg
echo $1
@@ -19,4 +20,6 @@ if ! grep -q "changed:" events/sched/sched_waking/hist; then
fail "Failed to create onchange action inter-event histogram"
fi
+echo '!hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio) if comm=="ping"' >> events/sched/sched_waking/trigger
+
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
index 147967e86584..9eb37c2fa417 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
@@ -2,6 +2,7 @@
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger snapshot action
# requires: set_event snapshot events/sched/sched_process_fork/hist "onchange(var)":README "snapshot()":README ping:program
+# flags: instance
fail() { #msg
echo $1
@@ -27,4 +28,6 @@ if ! grep -q "comm=ping" snapshot; then
fail "Failed to create snapshot action inter-event histogram"
fi
+echo '!hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio):onchange($newprio).snapshot() if comm=="ping"' >> events/sched/sched_waking/trigger
+
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
index 05ffba299dbf..0ebda2068a00 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
@@ -2,6 +2,7 @@
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test histogram expression parsing
# requires: set_event events/sched/sched_process_fork/trigger events/sched/sched_process_fork/hist error_log "<var1>=<field|var_ref|numeric_literal>":README
+# flags: instance
fail() { #msg
diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh
index 6fb66a687f17..bbc29ed9c60a 100755
--- a/tools/testing/selftests/gpio/gpio-sim.sh
+++ b/tools/testing/selftests/gpio/gpio-sim.sh
@@ -46,12 +46,6 @@ remove_chip() {
rmdir $CONFIGFS_DIR/$CHIP || fail "Unable to remove the chip"
}
-configfs_cleanup() {
- for CHIP in `ls $CONFIGFS_DIR/`; do
- remove_chip $CHIP
- done
-}
-
create_chip() {
local CHIP=$1
@@ -105,6 +99,13 @@ disable_chip() {
echo 0 > $CONFIGFS_DIR/$CHIP/live || fail "Unable to disable the chip"
}
+configfs_cleanup() {
+ for CHIP in `ls $CONFIGFS_DIR/`; do
+ disable_chip $CHIP
+ remove_chip $CHIP
+ done
+}
+
configfs_chip_name() {
local CHIP=$1
local BANK=$2
@@ -181,6 +182,7 @@ create_chip chip
create_bank chip bank
enable_chip chip
test -n `cat $CONFIGFS_DIR/chip/bank/chip_name` || fail "chip_name doesn't work"
+disable_chip chip
remove_chip chip
echo "1.2. chip_name returns 'none' if the chip is still pending"
@@ -195,6 +197,7 @@ create_chip chip
create_bank chip bank
enable_chip chip
test -n `cat $CONFIGFS_DIR/chip/dev_name` || fail "dev_name doesn't work"
+disable_chip chip
remove_chip chip
echo "2. Creating and configuring simulated chips"
@@ -204,6 +207,7 @@ create_chip chip
create_bank chip bank
enable_chip chip
test "`get_chip_num_lines chip bank`" = "1" || fail "default number of lines is not 1"
+disable_chip chip
remove_chip chip
echo "2.2. Number of lines can be specified"
@@ -212,6 +216,7 @@ create_bank chip bank
set_num_lines chip bank 16
enable_chip chip
test "`get_chip_num_lines chip bank`" = "16" || fail "number of lines is not 16"
+disable_chip chip
remove_chip chip
echo "2.3. Label can be set"
@@ -220,6 +225,7 @@ create_bank chip bank
set_label chip bank foobar
enable_chip chip
test "`get_chip_label chip bank`" = "foobar" || fail "label is incorrect"
+disable_chip chip
remove_chip chip
echo "2.4. Label can be left empty"
@@ -227,6 +233,7 @@ create_chip chip
create_bank chip bank
enable_chip chip
test -z "`cat $CONFIGFS_DIR/chip/bank/label`" || fail "label is not empty"
+disable_chip chip
remove_chip chip
echo "2.5. Line names can be configured"
@@ -238,6 +245,7 @@ set_line_name chip bank 2 bar
enable_chip chip
test "`get_line_name chip bank 0`" = "foo" || fail "line name is incorrect"
test "`get_line_name chip bank 2`" = "bar" || fail "line name is incorrect"
+disable_chip chip
remove_chip chip
echo "2.6. Line config can remain unused if offset is greater than number of lines"
@@ -248,6 +256,7 @@ set_line_name chip bank 5 foobar
enable_chip chip
test "`get_line_name chip bank 0`" = "" || fail "line name is incorrect"
test "`get_line_name chip bank 1`" = "" || fail "line name is incorrect"
+disable_chip chip
remove_chip chip
echo "2.7. Line configfs directory names are sanitized"
@@ -267,6 +276,7 @@ for CHIP in $CHIPS; do
enable_chip $CHIP
done
for CHIP in $CHIPS; do
+ disable_chip $CHIP
remove_chip $CHIP
done
@@ -278,6 +288,7 @@ echo foobar > $CONFIGFS_DIR/chip/bank/label 2> /dev/null && \
fail "Setting label of a live chip should fail"
echo 8 > $CONFIGFS_DIR/chip/bank/num_lines 2> /dev/null && \
fail "Setting number of lines of a live chip should fail"
+disable_chip chip
remove_chip chip
echo "2.10. Can't create line items when chip is live"
@@ -285,6 +296,7 @@ create_chip chip
create_bank chip bank
enable_chip chip
mkdir $CONFIGFS_DIR/chip/bank/line0 2> /dev/null && fail "Creating line item should fail"
+disable_chip chip
remove_chip chip
echo "2.11. Probe errors are propagated to user-space"
@@ -316,6 +328,7 @@ mkdir -p $CONFIGFS_DIR/chip/bank/line4/hog
enable_chip chip
$BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 4 2> /dev/null && \
fail "Setting the value of a hogged line shouldn't succeed"
+disable_chip chip
remove_chip chip
echo "3. Controlling simulated chips"
@@ -331,6 +344,7 @@ test "$?" = "1" || fail "pull set incorrectly"
sysfs_set_pull chip bank 0 pull-down
$BASE_DIR/gpio-mockup-cdev /dev/`configfs_chip_name chip bank` 1
test "$?" = "0" || fail "pull set incorrectly"
+disable_chip chip
remove_chip chip
echo "3.2. Pull can be read from sysfs"
@@ -344,6 +358,7 @@ SYSFS_PATH=/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull
test `cat $SYSFS_PATH` = "pull-down" || fail "reading the pull failed"
sysfs_set_pull chip bank 0 pull-up
test `cat $SYSFS_PATH` = "pull-up" || fail "reading the pull failed"
+disable_chip chip
remove_chip chip
echo "3.3. Incorrect input in sysfs is rejected"
@@ -355,6 +370,7 @@ DEVNAME=`configfs_dev_name chip`
CHIPNAME=`configfs_chip_name chip bank`
SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull"
echo foobar > $SYSFS_PATH 2> /dev/null && fail "invalid input not detected"
+disable_chip chip
remove_chip chip
echo "3.4. Can't write to value"
@@ -365,6 +381,7 @@ DEVNAME=`configfs_dev_name chip`
CHIPNAME=`configfs_chip_name chip bank`
SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value"
echo 1 > $SYSFS_PATH 2> /dev/null && fail "writing to 'value' succeeded unexpectedly"
+disable_chip chip
remove_chip chip
echo "4. Simulated GPIO chips are functional"
@@ -382,6 +399,7 @@ $BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 0 &
sleep 0.1 # FIXME Any better way?
test `cat $SYSFS_PATH` = "1" || fail "incorrect value read from sysfs"
kill $!
+disable_chip chip
remove_chip chip
echo "4.2. Bias settings work correctly"
@@ -394,6 +412,7 @@ CHIPNAME=`configfs_chip_name chip bank`
SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value"
$BASE_DIR/gpio-mockup-cdev -b pull-up /dev/`configfs_chip_name chip bank` 0
test `cat $SYSFS_PATH` = "1" || fail "bias setting does not work"
+disable_chip chip
remove_chip chip
echo "GPIO $MODULE test PASS"
diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile
index 0336353bd15f..2839d2612ce3 100644
--- a/tools/testing/selftests/hid/Makefile
+++ b/tools/testing/selftests/hid/Makefile
@@ -43,10 +43,8 @@ TEST_GEN_PROGS = hid_bpf hidraw
# $3 - target (assumed to be file); only file name will be emitted;
# $4 - optional extra arg, emitted as-is, if provided.
ifeq ($(V),1)
-Q =
msg =
else
-Q = @
msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))";
MAKEFLAGS += --no-print-directory
submake_extras := feature_display=0
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index cdf91b0ca40f..c3b6d2604b1e 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -444,10 +444,6 @@ static inline __noreturn __printf(1, 2) void ksft_exit_skip(const char *msg, ...
static inline int ksft_min_kernel_version(unsigned int min_major,
unsigned int min_minor)
{
-#ifdef NOLIBC
- ksft_print_msg("NOLIBC: Can't check kernel version: Function not implemented\n");
- return 0;
-#else
unsigned int major, minor;
struct utsname info;
@@ -455,7 +451,6 @@ static inline int ksft_min_kernel_version(unsigned int min_major,
ksft_exit_fail_msg("Can't parse kernel version\n");
return major > min_major || (major == min_major && minor >= min_minor);
-#endif
}
#endif /* __KSELFTEST_H */
diff --git a/tools/testing/selftests/kselftest/module.sh b/tools/testing/selftests/kselftest/module.sh
index fb4733faff12..51fb65159932 100755
--- a/tools/testing/selftests/kselftest/module.sh
+++ b/tools/testing/selftests/kselftest/module.sh
@@ -11,7 +11,7 @@
# SPDX-License-Identifier: GPL-2.0+
# $(dirname $0)/../kselftest/module.sh "description" module_name
#
-# Example: tools/testing/selftests/lib/printf.sh
+# Example: tools/testing/selftests/lib/bitmap.sh
desc="" # Output prefix.
module="" # Filename (without the .ko).
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 7f57abf936e7..1d41a046a7bf 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -9,3 +9,4 @@
!config
!settings
!Makefile
+!Makefile.kvm \ No newline at end of file
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 41593d2e7de9..20af35a91d6f 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -1,347 +1,16 @@
# SPDX-License-Identifier: GPL-2.0-only
-include ../../../build/Build.include
-
-all:
-
top_srcdir = ../../../..
include $(top_srcdir)/scripts/subarch.include
ARCH ?= $(SUBARCH)
-ifeq ($(ARCH),x86)
- ARCH_DIR := x86_64
-else ifeq ($(ARCH),arm64)
- ARCH_DIR := aarch64
-else ifeq ($(ARCH),s390)
- ARCH_DIR := s390x
-else
- ARCH_DIR := $(ARCH)
-endif
-
-LIBKVM += lib/assert.c
-LIBKVM += lib/elf.c
-LIBKVM += lib/guest_modes.c
-LIBKVM += lib/io.c
-LIBKVM += lib/kvm_util.c
-LIBKVM += lib/memstress.c
-LIBKVM += lib/guest_sprintf.c
-LIBKVM += lib/rbtree.c
-LIBKVM += lib/sparsebit.c
-LIBKVM += lib/test_util.c
-LIBKVM += lib/ucall_common.c
-LIBKVM += lib/userfaultfd_util.c
-
-LIBKVM_STRING += lib/string_override.c
-
-LIBKVM_x86_64 += lib/x86_64/apic.c
-LIBKVM_x86_64 += lib/x86_64/handlers.S
-LIBKVM_x86_64 += lib/x86_64/hyperv.c
-LIBKVM_x86_64 += lib/x86_64/memstress.c
-LIBKVM_x86_64 += lib/x86_64/pmu.c
-LIBKVM_x86_64 += lib/x86_64/processor.c
-LIBKVM_x86_64 += lib/x86_64/sev.c
-LIBKVM_x86_64 += lib/x86_64/svm.c
-LIBKVM_x86_64 += lib/x86_64/ucall.c
-LIBKVM_x86_64 += lib/x86_64/vmx.c
-
-LIBKVM_aarch64 += lib/aarch64/gic.c
-LIBKVM_aarch64 += lib/aarch64/gic_v3.c
-LIBKVM_aarch64 += lib/aarch64/gic_v3_its.c
-LIBKVM_aarch64 += lib/aarch64/handlers.S
-LIBKVM_aarch64 += lib/aarch64/processor.c
-LIBKVM_aarch64 += lib/aarch64/spinlock.c
-LIBKVM_aarch64 += lib/aarch64/ucall.c
-LIBKVM_aarch64 += lib/aarch64/vgic.c
-
-LIBKVM_s390x += lib/s390x/diag318_test_handler.c
-LIBKVM_s390x += lib/s390x/processor.c
-LIBKVM_s390x += lib/s390x/ucall.c
-LIBKVM_s390x += lib/s390x/facility.c
-
-LIBKVM_riscv += lib/riscv/handlers.S
-LIBKVM_riscv += lib/riscv/processor.c
-LIBKVM_riscv += lib/riscv/ucall.c
-
-# Non-compiled test targets
-TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh
-
-# Compiled test targets
-TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
-TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
-TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test
-TEST_GEN_PROGS_x86_64 += x86_64/feature_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test
-TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
-TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_extended_hypercalls
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush
-TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test
-TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
-TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test
-TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test
-TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
-TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test
-TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
-TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test
-TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test
-TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
-TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
-TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test
-TEST_GEN_PROGS_x86_64 += x86_64/smm_test
-TEST_GEN_PROGS_x86_64 += x86_64/state_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test
-TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync
-TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
-TEST_GEN_PROGS_x86_64 += x86_64/ucna_injection_test
-TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test
-TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
-TEST_GEN_PROGS_x86_64 += x86_64/apic_bus_clock_test
-TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
-TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test
-TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test
-TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
-TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
-TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test
-TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
-TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
-TEST_GEN_PROGS_x86_64 += x86_64/sev_init2_tests
-TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
-TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test
-TEST_GEN_PROGS_x86_64 += x86_64/amx_test
-TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test
-TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test
-TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test
-TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
-TEST_GEN_PROGS_x86_64 += coalesced_io_test
-TEST_GEN_PROGS_x86_64 += demand_paging_test
-TEST_GEN_PROGS_x86_64 += dirty_log_test
-TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
-TEST_GEN_PROGS_x86_64 += guest_memfd_test
-TEST_GEN_PROGS_x86_64 += guest_print_test
-TEST_GEN_PROGS_x86_64 += hardware_disable_test
-TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_x86_64 += kvm_page_table_test
-TEST_GEN_PROGS_x86_64 += max_guest_memory_test
-TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
-TEST_GEN_PROGS_x86_64 += memslot_perf_test
-TEST_GEN_PROGS_x86_64 += rseq_test
-TEST_GEN_PROGS_x86_64 += set_memory_region_test
-TEST_GEN_PROGS_x86_64 += steal_time
-TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
-TEST_GEN_PROGS_x86_64 += system_counter_offset_test
-TEST_GEN_PROGS_x86_64 += pre_fault_memory_test
-
-# Compiled outputs used by test targets
-TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test
-
-TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs
-TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases
-TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
-TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
-TEST_GEN_PROGS_aarch64 += aarch64/mmio_abort
-TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
-TEST_GEN_PROGS_aarch64 += aarch64/psci_test
-TEST_GEN_PROGS_aarch64 += aarch64/set_id_regs
-TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter
-TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
-TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
-TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
-TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress
-TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access
-TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3
-TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
-TEST_GEN_PROGS_aarch64 += arch_timer
-TEST_GEN_PROGS_aarch64 += coalesced_io_test
-TEST_GEN_PROGS_aarch64 += demand_paging_test
-TEST_GEN_PROGS_aarch64 += dirty_log_test
-TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
-TEST_GEN_PROGS_aarch64 += guest_print_test
-TEST_GEN_PROGS_aarch64 += get-reg-list
-TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_aarch64 += kvm_page_table_test
-TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test
-TEST_GEN_PROGS_aarch64 += memslot_perf_test
-TEST_GEN_PROGS_aarch64 += rseq_test
-TEST_GEN_PROGS_aarch64 += set_memory_region_test
-TEST_GEN_PROGS_aarch64 += steal_time
-TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test
-
-TEST_GEN_PROGS_s390x = s390x/memop
-TEST_GEN_PROGS_s390x += s390x/resets
-TEST_GEN_PROGS_s390x += s390x/sync_regs_test
-TEST_GEN_PROGS_s390x += s390x/tprot
-TEST_GEN_PROGS_s390x += s390x/cmma_test
-TEST_GEN_PROGS_s390x += s390x/debug_test
-TEST_GEN_PROGS_s390x += s390x/cpumodel_subfuncs_test
-TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test
-TEST_GEN_PROGS_s390x += s390x/ucontrol_test
-TEST_GEN_PROGS_s390x += demand_paging_test
-TEST_GEN_PROGS_s390x += dirty_log_test
-TEST_GEN_PROGS_s390x += guest_print_test
-TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
-TEST_GEN_PROGS_s390x += kvm_page_table_test
-TEST_GEN_PROGS_s390x += rseq_test
-TEST_GEN_PROGS_s390x += set_memory_region_test
-TEST_GEN_PROGS_s390x += kvm_binary_stats_test
-
-TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
-TEST_GEN_PROGS_riscv += riscv/ebreak_test
-TEST_GEN_PROGS_riscv += arch_timer
-TEST_GEN_PROGS_riscv += coalesced_io_test
-TEST_GEN_PROGS_riscv += demand_paging_test
-TEST_GEN_PROGS_riscv += dirty_log_test
-TEST_GEN_PROGS_riscv += get-reg-list
-TEST_GEN_PROGS_riscv += guest_print_test
-TEST_GEN_PROGS_riscv += kvm_binary_stats_test
-TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
-TEST_GEN_PROGS_riscv += kvm_page_table_test
-TEST_GEN_PROGS_riscv += set_memory_region_test
-TEST_GEN_PROGS_riscv += steal_time
-
-SPLIT_TESTS += arch_timer
-SPLIT_TESTS += get-reg-list
-
-TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR))
-TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR))
-TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR))
-LIBKVM += $(LIBKVM_$(ARCH_DIR))
-
-OVERRIDE_TARGETS = 1
-
-# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most
-# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`,
-# which causes the environment variable to override the makefile).
-include ../lib.mk
-
-INSTALL_HDR_PATH = $(top_srcdir)/usr
-LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
-LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
+ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64))
+# Top-level selftests allows ARCH=x86_64 :-(
ifeq ($(ARCH),x86_64)
-LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include
-else
-LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
+ ARCH := x86
endif
-CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
- -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
- -fno-builtin-memcmp -fno-builtin-memcpy \
- -fno-builtin-memset -fno-builtin-strnlen \
- -fno-stack-protector -fno-PIE -fno-strict-aliasing \
- -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \
- -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(ARCH_DIR) \
- -I ../rseq -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
-ifeq ($(ARCH),s390)
- CFLAGS += -march=z10
-endif
-ifeq ($(ARCH),x86)
-ifeq ($(shell echo "void foo(void) { }" | $(CC) -march=x86-64-v2 -x c - -c -o /dev/null 2>/dev/null; echo "$$?"),0)
- CFLAGS += -march=x86-64-v2
-endif
-endif
-ifeq ($(ARCH),arm64)
-tools_dir := $(top_srcdir)/tools
-arm64_tools_dir := $(tools_dir)/arch/arm64/tools/
-
-ifneq ($(abs_objdir),)
-arm64_hdr_outdir := $(abs_objdir)/tools/
+include Makefile.kvm
else
-arm64_hdr_outdir := $(tools_dir)/
-endif
-
-GEN_HDRS := $(arm64_hdr_outdir)arch/arm64/include/generated/
-CFLAGS += -I$(GEN_HDRS)
-
-$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*)
- $(MAKE) -C $(arm64_tools_dir) OUTPUT=$(arm64_hdr_outdir)
+# Empty targets for unsupported architectures
+all:
+clean:
endif
-
-no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \
- $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
-
-# On s390, build the testcases KVM-enabled
-pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \
- $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste)
-
-LDLIBS += -ldl
-LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
-
-LIBKVM_C := $(filter %.c,$(LIBKVM))
-LIBKVM_S := $(filter %.S,$(LIBKVM))
-LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
-LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
-LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
-LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
-SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS))
-SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH_DIR)/%.o, $(SPLIT_TESTS))
-
-TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS))
-TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED))
-TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ))
-TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS))
-TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TEST_GEN_OBJ))
--include $(TEST_DEP_FILES)
-
-$(shell mkdir -p $(sort $(OUTPUT)/$(ARCH_DIR) $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
-
-$(filter-out $(SPLIT_TEST_GEN_PROGS), $(TEST_GEN_PROGS)) \
-$(TEST_GEN_PROGS_EXTENDED): %: %.o
- $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@
-$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c
- $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-
-$(SPLIT_TEST_GEN_PROGS): $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/$(ARCH_DIR)/%.o
- $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@
-$(SPLIT_TEST_GEN_OBJ): $(OUTPUT)/$(ARCH_DIR)/%.o: $(ARCH_DIR)/%.c
- $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-
-EXTRA_CLEAN += $(GEN_HDRS) \
- $(LIBKVM_OBJS) \
- $(SPLIT_TEST_GEN_OBJ) \
- $(TEST_DEP_FILES) \
- $(TEST_GEN_OBJ) \
- cscope.*
-
-$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS)
- $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-
-$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS)
- $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-
-# Compile the string overrides as freestanding to prevent the compiler from
-# generating self-referential code, e.g. without "freestanding" the compiler may
-# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion.
-$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c
- $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@
-
-$(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
-$(SPLIT_TEST_GEN_OBJ): $(GEN_HDRS)
-$(TEST_GEN_PROGS): $(LIBKVM_OBJS)
-$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS)
-$(TEST_GEN_OBJ): $(GEN_HDRS)
-
-cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
-cscope:
- $(RM) cscope.*
- (find $(include_paths) -name '*.h' \
- -exec realpath --relative-base=$(PWD) {} \;; \
- find . -name '*.c' \
- -exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files
- cscope -b
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
new file mode 100644
index 000000000000..f773f8f99249
--- /dev/null
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -0,0 +1,331 @@
+# SPDX-License-Identifier: GPL-2.0-only
+include ../../../build/Build.include
+
+all:
+
+LIBKVM += lib/assert.c
+LIBKVM += lib/elf.c
+LIBKVM += lib/guest_modes.c
+LIBKVM += lib/io.c
+LIBKVM += lib/kvm_util.c
+LIBKVM += lib/memstress.c
+LIBKVM += lib/guest_sprintf.c
+LIBKVM += lib/rbtree.c
+LIBKVM += lib/sparsebit.c
+LIBKVM += lib/test_util.c
+LIBKVM += lib/ucall_common.c
+LIBKVM += lib/userfaultfd_util.c
+
+LIBKVM_STRING += lib/string_override.c
+
+LIBKVM_x86 += lib/x86/apic.c
+LIBKVM_x86 += lib/x86/handlers.S
+LIBKVM_x86 += lib/x86/hyperv.c
+LIBKVM_x86 += lib/x86/memstress.c
+LIBKVM_x86 += lib/x86/pmu.c
+LIBKVM_x86 += lib/x86/processor.c
+LIBKVM_x86 += lib/x86/sev.c
+LIBKVM_x86 += lib/x86/svm.c
+LIBKVM_x86 += lib/x86/ucall.c
+LIBKVM_x86 += lib/x86/vmx.c
+
+LIBKVM_arm64 += lib/arm64/gic.c
+LIBKVM_arm64 += lib/arm64/gic_v3.c
+LIBKVM_arm64 += lib/arm64/gic_v3_its.c
+LIBKVM_arm64 += lib/arm64/handlers.S
+LIBKVM_arm64 += lib/arm64/processor.c
+LIBKVM_arm64 += lib/arm64/spinlock.c
+LIBKVM_arm64 += lib/arm64/ucall.c
+LIBKVM_arm64 += lib/arm64/vgic.c
+
+LIBKVM_s390 += lib/s390/diag318_test_handler.c
+LIBKVM_s390 += lib/s390/processor.c
+LIBKVM_s390 += lib/s390/ucall.c
+LIBKVM_s390 += lib/s390/facility.c
+
+LIBKVM_riscv += lib/riscv/handlers.S
+LIBKVM_riscv += lib/riscv/processor.c
+LIBKVM_riscv += lib/riscv/ucall.c
+
+# Non-compiled test targets
+TEST_PROGS_x86 += x86/nx_huge_pages_test.sh
+
+# Compiled test targets
+TEST_GEN_PROGS_x86 = x86/cpuid_test
+TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test
+TEST_GEN_PROGS_x86 += x86/feature_msrs_test
+TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test
+TEST_GEN_PROGS_x86 += x86/fix_hypercall_test
+TEST_GEN_PROGS_x86 += x86/hwcr_msr_test
+TEST_GEN_PROGS_x86 += x86/hyperv_clock
+TEST_GEN_PROGS_x86 += x86/hyperv_cpuid
+TEST_GEN_PROGS_x86 += x86/hyperv_evmcs
+TEST_GEN_PROGS_x86 += x86/hyperv_extended_hypercalls
+TEST_GEN_PROGS_x86 += x86/hyperv_features
+TEST_GEN_PROGS_x86 += x86/hyperv_ipi
+TEST_GEN_PROGS_x86 += x86/hyperv_svm_test
+TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush
+TEST_GEN_PROGS_x86 += x86/kvm_clock_test
+TEST_GEN_PROGS_x86 += x86/kvm_pv_test
+TEST_GEN_PROGS_x86 += x86/monitor_mwait_test
+TEST_GEN_PROGS_x86 += x86/nested_emulation_test
+TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
+TEST_GEN_PROGS_x86 += x86/platform_info_test
+TEST_GEN_PROGS_x86 += x86/pmu_counters_test
+TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test
+TEST_GEN_PROGS_x86 += x86/private_mem_conversions_test
+TEST_GEN_PROGS_x86 += x86/private_mem_kvm_exits_test
+TEST_GEN_PROGS_x86 += x86/set_boot_cpu_id
+TEST_GEN_PROGS_x86 += x86/set_sregs_test
+TEST_GEN_PROGS_x86 += x86/smaller_maxphyaddr_emulation_test
+TEST_GEN_PROGS_x86 += x86/smm_test
+TEST_GEN_PROGS_x86 += x86/state_test
+TEST_GEN_PROGS_x86 += x86/vmx_preemption_timer_test
+TEST_GEN_PROGS_x86 += x86/svm_vmcall_test
+TEST_GEN_PROGS_x86 += x86/svm_int_ctl_test
+TEST_GEN_PROGS_x86 += x86/svm_nested_shutdown_test
+TEST_GEN_PROGS_x86 += x86/svm_nested_soft_inject_test
+TEST_GEN_PROGS_x86 += x86/tsc_scaling_sync
+TEST_GEN_PROGS_x86 += x86/sync_regs_test
+TEST_GEN_PROGS_x86 += x86/ucna_injection_test
+TEST_GEN_PROGS_x86 += x86/userspace_io_test
+TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test
+TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test
+TEST_GEN_PROGS_x86 += x86/vmx_close_while_nested_test
+TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test
+TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state
+TEST_GEN_PROGS_x86 += x86/vmx_msrs_test
+TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state
+TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test
+TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test
+TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test
+TEST_GEN_PROGS_x86 += x86/xapic_ipi_test
+TEST_GEN_PROGS_x86 += x86/xapic_state_test
+TEST_GEN_PROGS_x86 += x86/xcr0_cpuid_test
+TEST_GEN_PROGS_x86 += x86/xss_msr_test
+TEST_GEN_PROGS_x86 += x86/debug_regs
+TEST_GEN_PROGS_x86 += x86/tsc_msrs_test
+TEST_GEN_PROGS_x86 += x86/vmx_pmu_caps_test
+TEST_GEN_PROGS_x86 += x86/xen_shinfo_test
+TEST_GEN_PROGS_x86 += x86/xen_vmcall_test
+TEST_GEN_PROGS_x86 += x86/sev_init2_tests
+TEST_GEN_PROGS_x86 += x86/sev_migrate_tests
+TEST_GEN_PROGS_x86 += x86/sev_smoke_test
+TEST_GEN_PROGS_x86 += x86/amx_test
+TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test
+TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
+TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
+TEST_GEN_PROGS_x86 += access_tracking_perf_test
+TEST_GEN_PROGS_x86 += coalesced_io_test
+TEST_GEN_PROGS_x86 += demand_paging_test
+TEST_GEN_PROGS_x86 += dirty_log_test
+TEST_GEN_PROGS_x86 += dirty_log_perf_test
+TEST_GEN_PROGS_x86 += guest_memfd_test
+TEST_GEN_PROGS_x86 += guest_print_test
+TEST_GEN_PROGS_x86 += hardware_disable_test
+TEST_GEN_PROGS_x86 += kvm_create_max_vcpus
+TEST_GEN_PROGS_x86 += kvm_page_table_test
+TEST_GEN_PROGS_x86 += memslot_modification_stress_test
+TEST_GEN_PROGS_x86 += memslot_perf_test
+TEST_GEN_PROGS_x86 += mmu_stress_test
+TEST_GEN_PROGS_x86 += rseq_test
+TEST_GEN_PROGS_x86 += set_memory_region_test
+TEST_GEN_PROGS_x86 += steal_time
+TEST_GEN_PROGS_x86 += kvm_binary_stats_test
+TEST_GEN_PROGS_x86 += system_counter_offset_test
+TEST_GEN_PROGS_x86 += pre_fault_memory_test
+
+# Compiled outputs used by test targets
+TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
+
+TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
+TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
+TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
+TEST_GEN_PROGS_arm64 += arm64/hypercalls
+TEST_GEN_PROGS_arm64 += arm64/mmio_abort
+TEST_GEN_PROGS_arm64 += arm64/page_fault_test
+TEST_GEN_PROGS_arm64 += arm64/psci_test
+TEST_GEN_PROGS_arm64 += arm64/set_id_regs
+TEST_GEN_PROGS_arm64 += arm64/smccc_filter
+TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config
+TEST_GEN_PROGS_arm64 += arm64/vgic_init
+TEST_GEN_PROGS_arm64 += arm64/vgic_irq
+TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress
+TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access
+TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3
+TEST_GEN_PROGS_arm64 += access_tracking_perf_test
+TEST_GEN_PROGS_arm64 += arch_timer
+TEST_GEN_PROGS_arm64 += coalesced_io_test
+TEST_GEN_PROGS_arm64 += demand_paging_test
+TEST_GEN_PROGS_arm64 += dirty_log_test
+TEST_GEN_PROGS_arm64 += dirty_log_perf_test
+TEST_GEN_PROGS_arm64 += guest_print_test
+TEST_GEN_PROGS_arm64 += get-reg-list
+TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_arm64 += kvm_page_table_test
+TEST_GEN_PROGS_arm64 += memslot_modification_stress_test
+TEST_GEN_PROGS_arm64 += memslot_perf_test
+TEST_GEN_PROGS_arm64 += mmu_stress_test
+TEST_GEN_PROGS_arm64 += rseq_test
+TEST_GEN_PROGS_arm64 += set_memory_region_test
+TEST_GEN_PROGS_arm64 += steal_time
+TEST_GEN_PROGS_arm64 += kvm_binary_stats_test
+
+TEST_GEN_PROGS_s390 = s390/memop
+TEST_GEN_PROGS_s390 += s390/resets
+TEST_GEN_PROGS_s390 += s390/sync_regs_test
+TEST_GEN_PROGS_s390 += s390/tprot
+TEST_GEN_PROGS_s390 += s390/cmma_test
+TEST_GEN_PROGS_s390 += s390/debug_test
+TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test
+TEST_GEN_PROGS_s390 += s390/shared_zeropage_test
+TEST_GEN_PROGS_s390 += s390/ucontrol_test
+TEST_GEN_PROGS_s390 += demand_paging_test
+TEST_GEN_PROGS_s390 += dirty_log_test
+TEST_GEN_PROGS_s390 += guest_print_test
+TEST_GEN_PROGS_s390 += kvm_create_max_vcpus
+TEST_GEN_PROGS_s390 += kvm_page_table_test
+TEST_GEN_PROGS_s390 += rseq_test
+TEST_GEN_PROGS_s390 += set_memory_region_test
+TEST_GEN_PROGS_s390 += kvm_binary_stats_test
+
+TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
+TEST_GEN_PROGS_riscv += riscv/ebreak_test
+TEST_GEN_PROGS_riscv += arch_timer
+TEST_GEN_PROGS_riscv += coalesced_io_test
+TEST_GEN_PROGS_riscv += demand_paging_test
+TEST_GEN_PROGS_riscv += dirty_log_test
+TEST_GEN_PROGS_riscv += get-reg-list
+TEST_GEN_PROGS_riscv += guest_print_test
+TEST_GEN_PROGS_riscv += kvm_binary_stats_test
+TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
+TEST_GEN_PROGS_riscv += kvm_page_table_test
+TEST_GEN_PROGS_riscv += set_memory_region_test
+TEST_GEN_PROGS_riscv += steal_time
+
+SPLIT_TESTS += arch_timer
+SPLIT_TESTS += get-reg-list
+
+TEST_PROGS += $(TEST_PROGS_$(ARCH))
+TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH))
+TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH))
+LIBKVM += $(LIBKVM_$(ARCH))
+
+OVERRIDE_TARGETS = 1
+
+# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most
+# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`,
+# which causes the environment variable to override the makefile).
+include ../lib.mk
+
+INSTALL_HDR_PATH = $(top_srcdir)/usr
+LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
+LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
+LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
+CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
+ -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
+ -fno-builtin-memcmp -fno-builtin-memcpy \
+ -fno-builtin-memset -fno-builtin-strnlen \
+ -fno-stack-protector -fno-PIE -fno-strict-aliasing \
+ -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \
+ -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(ARCH) \
+ -I ../rseq -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
+ifeq ($(ARCH),s390)
+ CFLAGS += -march=z10
+endif
+ifeq ($(ARCH),x86)
+ifeq ($(shell echo "void foo(void) { }" | $(CC) -march=x86-64-v2 -x c - -c -o /dev/null 2>/dev/null; echo "$$?"),0)
+ CFLAGS += -march=x86-64-v2
+endif
+endif
+ifeq ($(ARCH),arm64)
+tools_dir := $(top_srcdir)/tools
+arm64_tools_dir := $(tools_dir)/arch/arm64/tools/
+
+ifneq ($(abs_objdir),)
+arm64_hdr_outdir := $(abs_objdir)/tools/
+else
+arm64_hdr_outdir := $(tools_dir)/
+endif
+
+GEN_HDRS := $(arm64_hdr_outdir)arch/arm64/include/generated/
+CFLAGS += -I$(GEN_HDRS)
+
+$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*)
+ $(MAKE) -C $(arm64_tools_dir) OUTPUT=$(arm64_hdr_outdir)
+endif
+
+no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \
+ $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
+
+# On s390, build the testcases KVM-enabled
+pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \
+ $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste)
+
+LDLIBS += -ldl
+LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
+
+LIBKVM_C := $(filter %.c,$(LIBKVM))
+LIBKVM_S := $(filter %.S,$(LIBKVM))
+LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
+LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
+LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
+SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS))
+SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH)/%.o, $(SPLIT_TESTS))
+
+TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS))
+TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED))
+TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ))
+TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS))
+TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TEST_GEN_OBJ))
+-include $(TEST_DEP_FILES)
+
+$(shell mkdir -p $(sort $(OUTPUT)/$(ARCH) $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
+
+$(filter-out $(SPLIT_TEST_GEN_PROGS), $(TEST_GEN_PROGS)) \
+$(TEST_GEN_PROGS_EXTENDED): %: %.o
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@
+$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+$(SPLIT_TEST_GEN_PROGS): $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/$(ARCH)/%.o
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@
+$(SPLIT_TEST_GEN_OBJ): $(OUTPUT)/$(ARCH)/%.o: $(ARCH)/%.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+EXTRA_CLEAN += $(GEN_HDRS) \
+ $(LIBKVM_OBJS) \
+ $(SPLIT_TEST_GEN_OBJ) \
+ $(TEST_DEP_FILES) \
+ $(TEST_GEN_OBJ) \
+ cscope.*
+
+$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS)
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS)
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+# Compile the string overrides as freestanding to prevent the compiler from
+# generating self-referential code, e.g. without "freestanding" the compiler may
+# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion.
+$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@
+
+$(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
+$(SPLIT_TEST_GEN_OBJ): $(GEN_HDRS)
+$(TEST_GEN_PROGS): $(LIBKVM_OBJS)
+$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS)
+$(TEST_GEN_OBJ): $(GEN_HDRS)
+
+cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
+cscope:
+ $(RM) cscope.*
+ (find $(include_paths) -name '*.h' \
+ -exec realpath --relative-base=$(PWD) {} \;; \
+ find . -name '*.c' \
+ -exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files
+ cscope -b
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 3c7defd34f56..447e619cf856 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -239,7 +239,7 @@ static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args)
case ITERATION_MARK_IDLE:
mark_vcpu_memory_idle(vm, vcpu_args);
break;
- };
+ }
vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
}
diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
index 8e5bd07a3727..cef8f7323ceb 100644
--- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
@@ -97,7 +97,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu)
uint64_t reg_id = raz_wi_reg_ids[i];
uint64_t val;
- vcpu_get_reg(vcpu, reg_id, &val);
+ val = vcpu_get_reg(vcpu, reg_id);
TEST_ASSERT_EQ(val, 0);
/*
@@ -106,7 +106,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu)
*/
vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
- vcpu_get_reg(vcpu, reg_id, &val);
+ val = vcpu_get_reg(vcpu, reg_id);
TEST_ASSERT_EQ(val, 0);
}
}
@@ -126,14 +126,14 @@ static void test_user_raz_invariant(struct kvm_vcpu *vcpu)
uint64_t reg_id = raz_invariant_reg_ids[i];
uint64_t val;
- vcpu_get_reg(vcpu, reg_id, &val);
+ val = vcpu_get_reg(vcpu, reg_id);
TEST_ASSERT_EQ(val, 0);
r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
TEST_ASSERT(r < 0 && errno == EINVAL,
"unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
- vcpu_get_reg(vcpu, reg_id, &val);
+ val = vcpu_get_reg(vcpu, reg_id);
TEST_ASSERT_EQ(val, 0);
}
}
@@ -144,10 +144,10 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
{
uint64_t val, el0;
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val);
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
- return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY;
+ return el0 == ID_AA64PFR0_EL1_EL0_IMP;
}
int main(void)
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/arm64/arch_timer.c
index eeba1cc87ff8..eeba1cc87ff8 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/arm64/arch_timer.c
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
index a36a7e2db434..a36a7e2db434 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c
+++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
index ff7a949fc96a..c7fb55c9135b 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
@@ -501,7 +501,7 @@ void test_single_step_from_userspace(int test_cnt)
TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG");
/* Check if the current pc is expected. */
- vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc);
+ pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
TEST_ASSERT(!test_pc || pc == test_pc,
"Unexpected pc 0x%lx (expected 0x%lx)",
pc, test_pc);
@@ -583,7 +583,7 @@ int main(int argc, char *argv[])
uint64_t aa64dfr0;
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &aa64dfr0);
+ aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
__TEST_REQUIRE(debug_version(aa64dfr0) >= 6,
"Armv8 debug architecture not supported.");
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c
index d43fb3f49050..d01798b6b3b4 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c
@@ -332,6 +332,7 @@ static __u64 base_regs[] = {
KVM_REG_ARM_FW_FEAT_BMAP_REG(0), /* KVM_REG_ARM_STD_BMAP */
KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */
KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */
+ KVM_REG_ARM_FW_FEAT_BMAP_REG(3), /* KVM_REG_ARM_VENDOR_HYP_BMAP_2 */
ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */
ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */
ARM64_SYS_REG(3, 3, 14, 0, 2),
diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c
index 9d192ce0078d..44cfcf8a7f46 100644
--- a/tools/testing/selftests/kvm/aarch64/hypercalls.c
+++ b/tools/testing/selftests/kvm/arm64/hypercalls.c
@@ -21,22 +21,31 @@
#define KVM_REG_ARM_STD_BMAP_BIT_MAX 0
#define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX 0
#define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX 1
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_BIT_MAX 1
+
+#define KVM_REG_ARM_STD_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_BMAP_BIT_MAX)
+#define KVM_REG_ARM_STD_HYP_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX)
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX)
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_RESET_VAL 0
struct kvm_fw_reg_info {
uint64_t reg; /* Register definition */
uint64_t max_feat_bit; /* Bit that represents the upper limit of the feature-map */
+ uint64_t reset_val; /* Reset value for the register */
};
#define FW_REG_INFO(r) \
{ \
.reg = r, \
.max_feat_bit = r##_BIT_MAX, \
+ .reset_val = r##_RESET_VAL \
}
static const struct kvm_fw_reg_info fw_reg_info[] = {
FW_REG_INFO(KVM_REG_ARM_STD_BMAP),
FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP),
FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP),
+ FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP_2),
};
enum test_stage {
@@ -171,22 +180,39 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
+ uint64_t set_val;
+
+ /* First 'read' should be the reset value for the reg */
+ val = vcpu_get_reg(vcpu, reg_info->reg);
+ TEST_ASSERT(val == reg_info->reset_val,
+ "Unexpected reset value for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
+ reg_info->reg, reg_info->reset_val, val);
- /* First 'read' should be an upper limit of the features supported */
- vcpu_get_reg(vcpu, reg_info->reg, &val);
- TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit),
- "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
- reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val);
+ if (reg_info->reset_val)
+ set_val = 0;
+ else
+ set_val = FW_REG_ULIMIT_VAL(reg_info->max_feat_bit);
- /* Test a 'write' by disabling all the features of the register map */
- ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, set_val);
TEST_ASSERT(ret == 0,
+ "Failed to %s all the features of reg: 0x%lx; ret: %d",
+ (set_val ? "set" : "clear"), reg_info->reg, errno);
+
+ val = vcpu_get_reg(vcpu, reg_info->reg);
+ TEST_ASSERT(val == set_val,
+ "Expected all the features to be %s for reg: 0x%lx",
+ (set_val ? "set" : "cleared"), reg_info->reg);
+
+ /*
+ * If the reg has been set, clear it as test_fw_regs_after_vm_start()
+ * expects it to be cleared.
+ */
+ if (set_val) {
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
+ TEST_ASSERT(ret == 0,
"Failed to clear all the features of reg: 0x%lx; ret: %d",
reg_info->reg, errno);
-
- vcpu_get_reg(vcpu, reg_info->reg, &val);
- TEST_ASSERT(val == 0,
- "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg);
+ }
/*
* Test enabling a feature that's not supported.
@@ -214,7 +240,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu)
* Before starting the VM, the test clears all the bits.
* Check if that's still the case.
*/
- vcpu_get_reg(vcpu, reg_info->reg, &val);
+ val = vcpu_get_reg(vcpu, reg_info->reg);
TEST_ASSERT(val == 0,
"Expected all the features to be cleared for reg: 0x%lx",
reg_info->reg);
diff --git a/tools/testing/selftests/kvm/aarch64/mmio_abort.c b/tools/testing/selftests/kvm/arm64/mmio_abort.c
index 8b7a80a51b1c..8b7a80a51b1c 100644
--- a/tools/testing/selftests/kvm/aarch64/mmio_abort.c
+++ b/tools/testing/selftests/kvm/arm64/mmio_abort.c
diff --git a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
index 58304bbc2036..ebd70430c89d 100644
--- a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c
+++ b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
@@ -164,7 +164,7 @@ int main(int argc, char *argv[])
uint64_t pfr0;
vm = vm_create_with_one_vcpu(&vcpu, NULL);
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &pfr0);
+ pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
__TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0),
"GICv3 not supported.");
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c
index ec33a8f9c908..ec33a8f9c908 100644
--- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c
diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/arm64/psci_test.c
index eaa7655fefc1..ab491ee9e5f7 100644
--- a/tools/testing/selftests/kvm/aarch64/psci_test.c
+++ b/tools/testing/selftests/kvm/arm64/psci_test.c
@@ -111,8 +111,8 @@ static void assert_vcpu_reset(struct kvm_vcpu *vcpu)
{
uint64_t obs_pc, obs_x0;
- vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &obs_pc);
- vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]), &obs_x0);
+ obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
+ obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]));
TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR,
"unexpected target cpu pc: %lx (expected: %lx)",
@@ -152,7 +152,7 @@ static void host_test_cpu_on(void)
*/
vcpu_power_off(target);
- vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr);
+ target_mpidr = vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1));
vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK);
enter_guest(source);
@@ -244,7 +244,7 @@ static void host_test_system_off2(void)
setup_vm(guest_test_system_off2, &source, &target);
- vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION, &psci_version);
+ psci_version = vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION);
TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3),
"Unexpected PSCI version %lu.%lu",
diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
index 3a97c160b5fe..322b9d3b0125 100644
--- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -146,6 +146,9 @@ static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN4_2, 1),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN64_2, 1),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN16_2, 1),
S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0),
S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0),
@@ -230,6 +233,9 @@ static void guest_code(void)
GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
GUEST_REG_SYNC(SYS_CTR_EL0);
+ GUEST_REG_SYNC(SYS_MIDR_EL1);
+ GUEST_REG_SYNC(SYS_REVIDR_EL1);
+ GUEST_REG_SYNC(SYS_AIDR_EL1);
GUEST_DONE();
}
@@ -345,7 +351,7 @@ static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
uint64_t mask = ftr_bits->mask;
uint64_t val, new_val, ftr;
- vcpu_get_reg(vcpu, reg, &val);
+ val = vcpu_get_reg(vcpu, reg);
ftr = (val & mask) >> shift;
ftr = get_safe_value(ftr_bits, ftr);
@@ -355,7 +361,7 @@ static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
val |= ftr;
vcpu_set_reg(vcpu, reg, val);
- vcpu_get_reg(vcpu, reg, &new_val);
+ new_val = vcpu_get_reg(vcpu, reg);
TEST_ASSERT_EQ(new_val, val);
return new_val;
@@ -369,7 +375,7 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
uint64_t val, old_val, ftr;
int r;
- vcpu_get_reg(vcpu, reg, &val);
+ val = vcpu_get_reg(vcpu, reg);
ftr = (val & mask) >> shift;
ftr = get_invalid_value(ftr_bits, ftr);
@@ -383,7 +389,7 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
TEST_ASSERT(r < 0 && errno == EINVAL,
"Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
- vcpu_get_reg(vcpu, reg, &val);
+ val = vcpu_get_reg(vcpu, reg);
TEST_ASSERT_EQ(val, old_val);
}
@@ -470,7 +476,7 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu)
}
/* Get the id register value */
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val);
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
/* Try to set MPAM=0. This should always be possible. */
val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
@@ -507,7 +513,7 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu)
}
/* Get the id register value */
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), &val);
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
/* Try to set MPAM_frac=0. This should always be possible. */
val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
@@ -575,7 +581,7 @@ static void test_clidr(struct kvm_vcpu *vcpu)
uint64_t clidr;
int level;
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), &clidr);
+ clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1));
/* find the first empty level in the cache hierarchy */
for (level = 1; level < 7; level++) {
@@ -600,7 +606,7 @@ static void test_ctr(struct kvm_vcpu *vcpu)
{
u64 ctr;
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), &ctr);
+ ctr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0));
ctr &= ~CTR_EL0_DIC_MASK;
if (ctr & CTR_EL0_IminLine_MASK)
ctr--;
@@ -609,18 +615,31 @@ static void test_ctr(struct kvm_vcpu *vcpu)
test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr;
}
-static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
+static void test_id_reg(struct kvm_vcpu *vcpu, u32 id)
{
u64 val;
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(id));
+ val++;
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(id), val);
+ test_reg_vals[encoding_to_range_idx(id)] = val;
+}
+
+static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
+{
test_clidr(vcpu);
test_ctr(vcpu);
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &val);
- val++;
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val);
+ test_id_reg(vcpu, SYS_MPIDR_EL1);
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+static void test_vcpu_non_ftr_id_regs(struct kvm_vcpu *vcpu)
+{
+ test_id_reg(vcpu, SYS_MIDR_EL1);
+ test_id_reg(vcpu, SYS_REVIDR_EL1);
+ test_id_reg(vcpu, SYS_AIDR_EL1);
- test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val;
ksft_test_result_pass("%s\n", __func__);
}
@@ -629,7 +648,7 @@ static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encodin
size_t idx = encoding_to_range_idx(encoding);
uint64_t observed;
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding), &observed);
+ observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding));
TEST_ASSERT_EQ(test_reg_vals[idx], observed);
}
@@ -647,6 +666,9 @@ static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1);
test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1);
test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0);
+ test_assert_id_reg_unchanged(vcpu, SYS_MIDR_EL1);
+ test_assert_id_reg_unchanged(vcpu, SYS_REVIDR_EL1);
+ test_assert_id_reg_unchanged(vcpu, SYS_AIDR_EL1);
ksft_test_result_pass("%s\n", __func__);
}
@@ -660,13 +682,16 @@ int main(void)
int test_cnt;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_WRITABLE_IMP_ID_REGS));
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vm = vm_create(1);
+ vm_enable_cap(vm, KVM_CAP_ARM_WRITABLE_IMP_ID_REGS, 0);
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
/* Check for AARCH64 only system */
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val);
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
- aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
+ aarch64_only = (el0 == ID_AA64PFR0_EL1_EL0_IMP);
ksft_print_header();
@@ -675,13 +700,14 @@ int main(void)
ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
- ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2 +
+ ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 3 +
MPAM_IDREG_TEST;
ksft_set_plan(test_cnt);
test_vm_ftr_id_regs(vcpu, aarch64_only);
test_vcpu_ftr_id_regs(vcpu);
+ test_vcpu_non_ftr_id_regs(vcpu);
test_user_set_mpam_reg(vcpu);
test_guest_reg_read(vcpu);
diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/arm64/smccc_filter.c
index 2d189f3da228..2d189f3da228 100644
--- a/tools/testing/selftests/kvm/aarch64/smccc_filter.c
+++ b/tools/testing/selftests/kvm/arm64/smccc_filter.c
diff --git a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c
index 80b74c6f152b..80b74c6f152b 100644
--- a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c
+++ b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c
index b3b5fb0ff0a9..b3b5fb0ff0a9 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_init.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_init.c
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c
index f4ac28d53747..f4ac28d53747 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
index fc4fe52fb6f8..fc4fe52fb6f8 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
index f9c0c86d7e85..f16b3b27e32e 100644
--- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
+++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
@@ -440,8 +440,7 @@ static void create_vpmu_vm(void *guest_code)
"Failed to create vgic-v3, skipping");
/* Make sure that PMUv3 support is indicated in the ID register */
- vcpu_get_reg(vpmu_vm.vcpu,
- KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &dfr0);
+ dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0);
TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF &&
pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP,
@@ -484,7 +483,7 @@ static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
create_vpmu_vm(guest_code);
vcpu = vpmu_vm.vcpu;
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr_orig);
+ pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
pmcr = pmcr_orig;
/*
@@ -493,7 +492,7 @@ static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
*/
set_pmcr_n(&pmcr, pmcr_n);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr);
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr);
+ pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
if (expect_fail)
TEST_ASSERT(pmcr_orig == pmcr,
@@ -521,7 +520,7 @@ static void run_access_test(uint64_t pmcr_n)
vcpu = vpmu_vm.vcpu;
/* Save the initial sp to restore them later to run the guest again */
- vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1), &sp);
+ sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1));
run_vcpu(vcpu, pmcr_n);
@@ -572,12 +571,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n)
* Test if the 'set' and 'clr' variants of the registers
* are initialized based on the number of valid counters.
*/
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), &reg_val);
+ reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id));
TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
"Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
KVM_ARM64_SYS_REG(set_reg_id), reg_val);
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), &reg_val);
+ reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id));
TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
"Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
@@ -589,12 +588,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n)
*/
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask);
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), &reg_val);
+ reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id));
TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
"Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
KVM_ARM64_SYS_REG(set_reg_id), reg_val);
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), &reg_val);
+ reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id));
TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
"Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
@@ -625,7 +624,7 @@ static uint64_t get_pmcr_n_limit(void)
uint64_t pmcr;
create_vpmu_vm(guest_code);
- vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr);
+ pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
destroy_vpmu_vm();
return get_pmcr_n(pmcr);
}
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 9f24303acb8c..e79817bd0e29 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -21,7 +21,7 @@
#include "ucall_common.h"
#ifdef __aarch64__
-#include "aarch64/vgic.h"
+#include "arm64/vgic.h"
static int gic_fd;
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index aacf80f57439..23593d9eeba9 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -31,15 +31,18 @@
/* Default guest test virtual memory offset */
#define DEFAULT_GUEST_TEST_MEM 0xc0000000
-/* How many pages to dirty for each guest loop */
-#define TEST_PAGES_PER_LOOP 1024
-
/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
#define TEST_HOST_LOOP_N 32UL
/* Interval for each host loop (ms) */
#define TEST_HOST_LOOP_INTERVAL 10UL
+/*
+ * Ensure the vCPU is able to perform a reasonable number of writes in each
+ * iteration to provide a lower bound on coverage.
+ */
+#define TEST_MIN_WRITES_PER_ITERATION 0x100
+
/* Dirty bitmaps are always little endian, so we need to swap on big endian */
#if defined(__s390x__)
# define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7)
@@ -75,6 +78,8 @@ static uint64_t host_page_size;
static uint64_t guest_page_size;
static uint64_t guest_num_pages;
static uint64_t iteration;
+static uint64_t nr_writes;
+static bool vcpu_stop;
/*
* Guest physical memory offset of the testing memory slot.
@@ -96,7 +101,9 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
static void guest_code(void)
{
uint64_t addr;
- int i;
+
+#ifdef __s390x__
+ uint64_t i;
/*
* On s390x, all pages of a 1M segment are initially marked as dirty
@@ -107,16 +114,19 @@ static void guest_code(void)
for (i = 0; i < guest_num_pages; i++) {
addr = guest_test_virt_mem + i * guest_page_size;
vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
+ nr_writes++;
}
+#endif
while (true) {
- for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
+ while (!READ_ONCE(vcpu_stop)) {
addr = guest_test_virt_mem;
addr += (guest_random_u64(&guest_rng) % guest_num_pages)
* guest_page_size;
addr = align_down(addr, host_page_size);
vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
+ nr_writes++;
}
GUEST_SYNC(1);
@@ -133,25 +143,18 @@ static uint64_t host_num_pages;
/* For statistics only */
static uint64_t host_dirty_count;
static uint64_t host_clear_count;
-static uint64_t host_track_next_count;
/* Whether dirty ring reset is requested, or finished */
static sem_t sem_vcpu_stop;
static sem_t sem_vcpu_cont;
-/*
- * This is only set by main thread, and only cleared by vcpu thread. It is
- * used to request vcpu thread to stop at the next GUEST_SYNC, since GUEST_SYNC
- * is the only place that we'll guarantee both "dirty bit" and "dirty data"
- * will match. E.g., SIG_IPI won't guarantee that if the vcpu is interrupted
- * after setting dirty bit but before the data is written.
- */
-static atomic_t vcpu_sync_stop_requested;
+
/*
* This is updated by the vcpu thread to tell the host whether it's a
* ring-full event. It should only be read until a sem_wait() of
* sem_vcpu_stop and before vcpu continues to run.
*/
static bool dirty_ring_vcpu_ring_full;
+
/*
* This is only used for verifying the dirty pages. Dirty ring has a very
* tricky case when the ring just got full, kvm will do userspace exit due to
@@ -166,7 +169,51 @@ static bool dirty_ring_vcpu_ring_full;
* dirty gfn we've collected, so that if a mismatch of data found later in the
* verifying process, we let it pass.
*/
-static uint64_t dirty_ring_last_page;
+static uint64_t dirty_ring_last_page = -1ULL;
+
+/*
+ * In addition to the above, it is possible (especially if this
+ * test is run nested) for the above scenario to repeat multiple times:
+ *
+ * The following can happen:
+ *
+ * - L1 vCPU: Memory write is logged to PML but not committed.
+ *
+ * - L1 test thread: Ignores the write because its last dirty ring entry
+ * Resets the dirty ring which:
+ * - Resets the A/D bits in EPT
+ * - Issues tlb flush (invept), which is intercepted by L0
+ *
+ * - L0: frees the whole nested ept mmu root as the response to invept,
+ * and thus ensures that when memory write is retried, it will fault again
+ *
+ * - L1 vCPU: Same memory write is logged to the PML but not committed again.
+ *
+ * - L1 test thread: Ignores the write because its last dirty ring entry (again)
+ * Resets the dirty ring which:
+ * - Resets the A/D bits in EPT (again)
+ * - Issues tlb flush (again) which is intercepted by L0
+ *
+ * ...
+ *
+ * N times
+ *
+ * - L1 vCPU: Memory write is logged in the PML and then committed.
+ * Lots of other memory writes are logged and committed.
+ * ...
+ *
+ * - L1 test thread: Sees the memory write along with other memory writes
+ * in the dirty ring, and since the write is usually not
+ * the last entry in the dirty-ring and has a very outdated
+ * iteration, the test fails.
+ *
+ *
+ * Note that this is only possible when the write was the last log entry
+ * write during iteration N-1, thus remember last iteration last log entry
+ * and also don't fail when it is reported in the next iteration, together with
+ * an outdated iteration count.
+ */
+static uint64_t dirty_ring_prev_iteration_last_page;
enum log_mode_t {
/* Only use KVM_GET_DIRTY_LOG for logging */
@@ -191,24 +238,6 @@ static enum log_mode_t host_log_mode;
static pthread_t vcpu_thread;
static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT;
-static void vcpu_kick(void)
-{
- pthread_kill(vcpu_thread, SIG_IPI);
-}
-
-/*
- * In our test we do signal tricks, let's use a better version of
- * sem_wait to avoid signal interrupts
- */
-static void sem_wait_until(sem_t *sem)
-{
- int ret;
-
- do
- ret = sem_wait(sem);
- while (ret == -1 && errno == EINTR);
-}
-
static bool clear_log_supported(void)
{
return kvm_has_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
@@ -243,21 +272,16 @@ static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
/* Should only be called after a GUEST_SYNC */
static void vcpu_handle_sync_stop(void)
{
- if (atomic_read(&vcpu_sync_stop_requested)) {
- /* It means main thread is sleeping waiting */
- atomic_set(&vcpu_sync_stop_requested, false);
+ if (READ_ONCE(vcpu_stop)) {
sem_post(&sem_vcpu_stop);
- sem_wait_until(&sem_vcpu_cont);
+ sem_wait(&sem_vcpu_cont);
}
}
-static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+static void default_after_vcpu_run(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
- TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR),
- "vcpu run failed: errno=%d", err);
-
TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
"Invalid guest sync status: exit_reason=%s",
exit_reason_str(run->exit_reason));
@@ -324,7 +348,6 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
"%u != %u", cur->slot, slot);
TEST_ASSERT(cur->offset < num_pages, "Offset overflow: "
"0x%llx >= 0x%x", cur->offset, num_pages);
- //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset);
__set_bit_le(cur->offset, bitmap);
dirty_ring_last_page = cur->offset;
dirty_gfn_set_collected(cur);
@@ -335,36 +358,11 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
return count;
}
-static void dirty_ring_wait_vcpu(void)
-{
- /* This makes sure that hardware PML cache flushed */
- vcpu_kick();
- sem_wait_until(&sem_vcpu_stop);
-}
-
-static void dirty_ring_continue_vcpu(void)
-{
- pr_info("Notifying vcpu to continue\n");
- sem_post(&sem_vcpu_cont);
-}
-
static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
void *bitmap, uint32_t num_pages,
uint32_t *ring_buf_idx)
{
- uint32_t count = 0, cleared;
- bool continued_vcpu = false;
-
- dirty_ring_wait_vcpu();
-
- if (!dirty_ring_vcpu_ring_full) {
- /*
- * This is not a ring-full event, it's safe to allow
- * vcpu to continue
- */
- dirty_ring_continue_vcpu();
- continued_vcpu = true;
- }
+ uint32_t count, cleared;
/* Only have one vcpu */
count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu),
@@ -379,35 +377,18 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
*/
TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
"with collected (%u)", cleared, count);
-
- if (!continued_vcpu) {
- TEST_ASSERT(dirty_ring_vcpu_ring_full,
- "Didn't continue vcpu even without ring full");
- dirty_ring_continue_vcpu();
- }
-
- pr_info("Iteration %ld collected %u pages\n", iteration, count);
}
-static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
/* A ucall-sync or ring-full event is allowed */
if (get_ucall(vcpu, NULL) == UCALL_SYNC) {
- /* We should allow this to continue */
- ;
- } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL ||
- (ret == -1 && err == EINTR)) {
- /* Update the flag first before pause */
- WRITE_ONCE(dirty_ring_vcpu_ring_full,
- run->exit_reason == KVM_EXIT_DIRTY_RING_FULL);
- sem_post(&sem_vcpu_stop);
- pr_info("vcpu stops because %s...\n",
- dirty_ring_vcpu_ring_full ?
- "dirty ring is full" : "vcpu is kicked out");
- sem_wait_until(&sem_vcpu_cont);
- pr_info("vcpu continues now.\n");
+ vcpu_handle_sync_stop();
+ } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) {
+ WRITE_ONCE(dirty_ring_vcpu_ring_full, true);
+ vcpu_handle_sync_stop();
} else {
TEST_ASSERT(false, "Invalid guest sync status: "
"exit_reason=%s",
@@ -426,7 +407,7 @@ struct log_mode {
void *bitmap, uint32_t num_pages,
uint32_t *ring_buf_idx);
/* Hook to call when after each vcpu run */
- void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err);
+ void (*after_vcpu_run)(struct kvm_vcpu *vcpu);
} log_modes[LOG_MODE_NUM] = {
{
.name = "dirty-log",
@@ -449,15 +430,6 @@ struct log_mode {
},
};
-/*
- * We use this bitmap to track some pages that should have its dirty
- * bit set in the _next_ iteration. For example, if we detected the
- * page value changed to current iteration but at the same time the
- * page bit is cleared in the latest bitmap, then the system must
- * report that write in the next get dirty log call.
- */
-static unsigned long *host_bmap_track;
-
static void log_modes_dump(void)
{
int i;
@@ -497,170 +469,109 @@ static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx);
}
-static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu)
{
struct log_mode *mode = &log_modes[host_log_mode];
if (mode->after_vcpu_run)
- mode->after_vcpu_run(vcpu, ret, err);
+ mode->after_vcpu_run(vcpu);
}
static void *vcpu_worker(void *data)
{
- int ret;
struct kvm_vcpu *vcpu = data;
- uint64_t pages_count = 0;
- struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset)
- + sizeof(sigset_t));
- sigset_t *sigset = (sigset_t *) &sigmask->sigset;
- /*
- * SIG_IPI is unblocked atomically while in KVM_RUN. It causes the
- * ioctl to return with -EINTR, but it is still pending and we need
- * to accept it with the sigwait.
- */
- sigmask->len = 8;
- pthread_sigmask(0, NULL, sigset);
- sigdelset(sigset, SIG_IPI);
- vcpu_ioctl(vcpu, KVM_SET_SIGNAL_MASK, sigmask);
-
- sigemptyset(sigset);
- sigaddset(sigset, SIG_IPI);
+ sem_wait(&sem_vcpu_cont);
while (!READ_ONCE(host_quit)) {
- /* Clear any existing kick signals */
- pages_count += TEST_PAGES_PER_LOOP;
/* Let the guest dirty the random pages */
- ret = __vcpu_run(vcpu);
- if (ret == -1 && errno == EINTR) {
- int sig = -1;
- sigwait(sigset, &sig);
- assert(sig == SIG_IPI);
- }
- log_mode_after_vcpu_run(vcpu, ret, errno);
+ vcpu_run(vcpu);
+ log_mode_after_vcpu_run(vcpu);
}
- pr_info("Dirtied %"PRIu64" pages\n", pages_count);
-
return NULL;
}
-static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
+static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap)
{
+ uint64_t page, nr_dirty_pages = 0, nr_clean_pages = 0;
uint64_t step = vm_num_host_pages(mode, 1);
- uint64_t page;
- uint64_t *value_ptr;
- uint64_t min_iter = 0;
for (page = 0; page < host_num_pages; page += step) {
- value_ptr = host_test_mem + page * host_page_size;
-
- /* If this is a special page that we were tracking... */
- if (__test_and_clear_bit_le(page, host_bmap_track)) {
- host_track_next_count++;
- TEST_ASSERT(test_bit_le(page, bmap),
- "Page %"PRIu64" should have its dirty bit "
- "set in this iteration but it is missing",
- page);
- }
+ uint64_t val = *(uint64_t *)(host_test_mem + page * host_page_size);
+ bool bmap0_dirty = __test_and_clear_bit_le(page, bmap[0]);
- if (__test_and_clear_bit_le(page, bmap)) {
- bool matched;
-
- host_dirty_count++;
+ /*
+ * Ensure both bitmaps are cleared, as a page can be written
+ * multiple times per iteration, i.e. can show up in both
+ * bitmaps, and the dirty ring is additive, i.e. doesn't purge
+ * bitmap entries from previous collections.
+ */
+ if (__test_and_clear_bit_le(page, bmap[1]) || bmap0_dirty) {
+ nr_dirty_pages++;
/*
- * If the bit is set, the value written onto
- * the corresponding page should be either the
- * previous iteration number or the current one.
+ * If the page is dirty, the value written to memory
+ * should be the current iteration number.
*/
- matched = (*value_ptr == iteration ||
- *value_ptr == iteration - 1);
-
- if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) {
- if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) {
- /*
- * Short answer: this case is special
- * only for dirty ring test where the
- * page is the last page before a kvm
- * dirty ring full in iteration N-2.
- *
- * Long answer: Assuming ring size R,
- * one possible condition is:
- *
- * main thr vcpu thr
- * -------- --------
- * iter=1
- * write 1 to page 0~(R-1)
- * full, vmexit
- * collect 0~(R-1)
- * kick vcpu
- * write 1 to (R-1)~(2R-2)
- * full, vmexit
- * iter=2
- * collect (R-1)~(2R-2)
- * kick vcpu
- * write 1 to (2R-2)
- * (NOTE!!! "1" cached in cpu reg)
- * write 2 to (2R-1)~(3R-3)
- * full, vmexit
- * iter=3
- * collect (2R-2)~(3R-3)
- * (here if we read value on page
- * "2R-2" is 1, while iter=3!!!)
- *
- * This however can only happen once per iteration.
- */
- min_iter = iteration - 1;
+ if (val == iteration)
+ continue;
+
+ if (host_log_mode == LOG_MODE_DIRTY_RING) {
+ /*
+ * The last page in the ring from previous
+ * iteration can be written with the value
+ * from the previous iteration, as the value to
+ * be written may be cached in a CPU register.
+ */
+ if (page == dirty_ring_prev_iteration_last_page &&
+ val == iteration - 1)
continue;
- } else if (page == dirty_ring_last_page) {
- /*
- * Please refer to comments in
- * dirty_ring_last_page.
- */
+
+ /*
+ * Any value from a previous iteration is legal
+ * for the last entry, as the write may not yet
+ * have retired, i.e. the page may hold whatever
+ * it had before this iteration started.
+ */
+ if (page == dirty_ring_last_page &&
+ val < iteration)
continue;
- }
+ } else if (!val && iteration == 1 && bmap0_dirty) {
+ /*
+ * When testing get+clear, the dirty bitmap
+ * starts with all bits set, and so the first
+ * iteration can observe a "dirty" page that
+ * was never written, but only in the first
+ * bitmap (collecting the bitmap also clears
+ * all dirty pages).
+ */
+ continue;
}
- TEST_ASSERT(matched,
- "Set page %"PRIu64" value %"PRIu64
- " incorrect (iteration=%"PRIu64")",
- page, *value_ptr, iteration);
+ TEST_FAIL("Dirty page %lu value (%lu) != iteration (%lu) "
+ "(last = %lu, prev_last = %lu)",
+ page, val, iteration, dirty_ring_last_page,
+ dirty_ring_prev_iteration_last_page);
} else {
- host_clear_count++;
+ nr_clean_pages++;
/*
* If cleared, the value written can be any
- * value smaller or equals to the iteration
- * number. Note that the value can be exactly
- * (iteration-1) if that write can happen
- * like this:
- *
- * (1) increase loop count to "iteration-1"
- * (2) write to page P happens (with value
- * "iteration-1")
- * (3) get dirty log for "iteration-1"; we'll
- * see that page P bit is set (dirtied),
- * and not set the bit in host_bmap_track
- * (4) increase loop count to "iteration"
- * (which is current iteration)
- * (5) get dirty log for current iteration,
- * we'll see that page P is cleared, with
- * value "iteration-1".
+ * value smaller than the iteration number.
*/
- TEST_ASSERT(*value_ptr <= iteration,
- "Clear page %"PRIu64" value %"PRIu64
- " incorrect (iteration=%"PRIu64")",
- page, *value_ptr, iteration);
- if (*value_ptr == iteration) {
- /*
- * This page is _just_ modified; it
- * should report its dirtyness in the
- * next run
- */
- __set_bit_le(page, host_bmap_track);
- }
+ TEST_ASSERT(val < iteration,
+ "Clear page %lu value (%lu) >= iteration (%lu) "
+ "(last = %lu, prev_last = %lu)",
+ page, val, iteration, dirty_ring_last_page,
+ dirty_ring_prev_iteration_last_page);
}
}
+
+ pr_info("Iteration %2ld: dirty: %-6lu clean: %-6lu writes: %-6lu\n",
+ iteration, nr_dirty_pages, nr_clean_pages, nr_writes);
+
+ host_dirty_count += nr_dirty_pages;
+ host_clear_count += nr_clean_pages;
}
static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu,
@@ -688,7 +599,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
struct test_params *p = arg;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- unsigned long *bmap;
+ unsigned long *bmap[2];
uint32_t ring_buf_idx = 0;
int sem_val;
@@ -731,12 +642,21 @@ static void run_test(enum vm_guest_mode mode, void *arg)
#ifdef __s390x__
/* Align to 1M (segment size) */
guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20);
+
+ /*
+ * The workaround in guest_code() to write all pages prior to the first
+ * iteration isn't compatible with the dirty ring, as the dirty ring
+ * support relies on the vCPU to actually stop when vcpu_stop is set so
+ * that the vCPU doesn't hang waiting for the dirty ring to be emptied.
+ */
+ TEST_ASSERT(host_log_mode != LOG_MODE_DIRTY_RING,
+ "Test needs to be updated to support s390 dirty ring");
#endif
pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
- bmap = bitmap_zalloc(host_num_pages);
- host_bmap_track = bitmap_zalloc(host_num_pages);
+ bmap[0] = bitmap_zalloc(host_num_pages);
+ bmap[1] = bitmap_zalloc(host_num_pages);
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
@@ -757,14 +677,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
sync_global_to_guest(vm, guest_test_virt_mem);
sync_global_to_guest(vm, guest_num_pages);
- /* Start the iterations */
- iteration = 1;
- sync_global_to_guest(vm, iteration);
- WRITE_ONCE(host_quit, false);
host_dirty_count = 0;
host_clear_count = 0;
- host_track_next_count = 0;
- WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
+ WRITE_ONCE(host_quit, false);
/*
* Ensure the previous iteration didn't leave a dangling semaphore, i.e.
@@ -776,21 +691,95 @@ static void run_test(enum vm_guest_mode mode, void *arg)
sem_getvalue(&sem_vcpu_cont, &sem_val);
TEST_ASSERT_EQ(sem_val, 0);
+ TEST_ASSERT_EQ(vcpu_stop, false);
+
pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu);
- while (iteration < p->iterations) {
- /* Give the vcpu thread some time to dirty some pages */
- usleep(p->interval * 1000);
- log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
- bmap, host_num_pages,
- &ring_buf_idx);
+ for (iteration = 1; iteration <= p->iterations; iteration++) {
+ unsigned long i;
+
+ sync_global_to_guest(vm, iteration);
+
+ WRITE_ONCE(nr_writes, 0);
+ sync_global_to_guest(vm, nr_writes);
+
+ dirty_ring_prev_iteration_last_page = dirty_ring_last_page;
+ WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
+
+ sem_post(&sem_vcpu_cont);
+
+ /*
+ * Let the vCPU run beyond the configured interval until it has
+ * performed the minimum number of writes. This verifies the
+ * guest is making forward progress, e.g. isn't stuck because
+ * of a KVM bug, and puts a firm floor on test coverage.
+ */
+ for (i = 0; i < p->interval || nr_writes < TEST_MIN_WRITES_PER_ITERATION; i++) {
+ /*
+ * Sleep in 1ms chunks to keep the interval math simple
+ * and so that the test doesn't run too far beyond the
+ * specified interval.
+ */
+ usleep(1000);
+
+ sync_global_from_guest(vm, nr_writes);
+
+ /*
+ * Reap dirty pages while the guest is running so that
+ * dirty ring full events are resolved, i.e. so that a
+ * larger interval doesn't always end up with a vCPU
+ * that's effectively blocked. Collecting while the
+ * guest is running also verifies KVM doesn't lose any
+ * state.
+ *
+ * For bitmap modes, KVM overwrites the entire bitmap,
+ * i.e. collecting the bitmaps is destructive. Collect
+ * the bitmap only on the first pass, otherwise this
+ * test would lose track of dirty pages.
+ */
+ if (i && host_log_mode != LOG_MODE_DIRTY_RING)
+ continue;
+
+ /*
+ * For the dirty ring, empty the ring on subsequent
+ * passes only if the ring was filled at least once,
+ * to verify KVM's handling of a full ring (emptying
+ * the ring on every pass would make it unlikely the
+ * vCPU would ever fill the fing).
+ */
+ if (i && !READ_ONCE(dirty_ring_vcpu_ring_full))
+ continue;
+
+ log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
+ bmap[0], host_num_pages,
+ &ring_buf_idx);
+ }
+
+ /*
+ * Stop the vCPU prior to collecting and verifying the dirty
+ * log. If the vCPU is allowed to run during collection, then
+ * pages that are written during this iteration may be missed,
+ * i.e. collected in the next iteration. And if the vCPU is
+ * writing memory during verification, pages that this thread
+ * sees as clean may be written with this iteration's value.
+ */
+ WRITE_ONCE(vcpu_stop, true);
+ sync_global_to_guest(vm, vcpu_stop);
+ sem_wait(&sem_vcpu_stop);
/*
- * See vcpu_sync_stop_requested definition for details on why
- * we need to stop vcpu when verify data.
+ * Clear vcpu_stop after the vCPU thread has acknowledge the
+ * stop request and is waiting, i.e. is definitely not running!
*/
- atomic_set(&vcpu_sync_stop_requested, true);
- sem_wait_until(&sem_vcpu_stop);
+ WRITE_ONCE(vcpu_stop, false);
+ sync_global_to_guest(vm, vcpu_stop);
+
+ /*
+ * Sync the number of writes performed before verification, the
+ * info will be printed along with the dirty/clean page counts.
+ */
+ sync_global_from_guest(vm, nr_writes);
+
/*
* NOTE: for dirty ring, it's possible that we didn't stop at
* GUEST_SYNC but instead we stopped because ring is full;
@@ -798,32 +787,22 @@ static void run_test(enum vm_guest_mode mode, void *arg)
* the flush of the last page, and since we handle the last
* page specially verification will succeed anyway.
*/
- assert(host_log_mode == LOG_MODE_DIRTY_RING ||
- atomic_read(&vcpu_sync_stop_requested) == false);
+ log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
+ bmap[1], host_num_pages,
+ &ring_buf_idx);
vm_dirty_log_verify(mode, bmap);
-
- /*
- * Set host_quit before sem_vcpu_cont in the final iteration to
- * ensure that the vCPU worker doesn't resume the guest. As
- * above, the dirty ring test may stop and wait even when not
- * explicitly request to do so, i.e. would hang waiting for a
- * "continue" if it's allowed to resume the guest.
- */
- if (++iteration == p->iterations)
- WRITE_ONCE(host_quit, true);
-
- sem_post(&sem_vcpu_cont);
- sync_global_to_guest(vm, iteration);
}
+ WRITE_ONCE(host_quit, true);
+ sem_post(&sem_vcpu_cont);
+
pthread_join(vcpu_thread, NULL);
- pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
- "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
- host_track_next_count);
+ pr_info("Total bits checked: dirty (%lu), clear (%lu)\n",
+ host_dirty_count, host_clear_count);
- free(bmap);
- free(host_bmap_track);
+ free(bmap[0]);
+ free(bmap[1]);
kvm_vm_free(vm);
}
@@ -857,7 +836,6 @@ int main(int argc, char *argv[])
.interval = TEST_HOST_LOOP_INTERVAL,
};
int opt, i;
- sigset_t sigset;
sem_init(&sem_vcpu_stop, 0, 0);
sem_init(&sem_vcpu_cont, 0, 0);
@@ -908,19 +886,12 @@ int main(int argc, char *argv[])
}
}
- TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two");
+ TEST_ASSERT(p.iterations > 0, "Iterations must be greater than zero");
TEST_ASSERT(p.interval > 0, "Interval must be greater than zero");
pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
p.iterations, p.interval);
- srandom(time(0));
-
- /* Ensure that vCPU threads start with SIG_IPI blocked. */
- sigemptyset(&sigset);
- sigaddset(&sigset, SIG_IPI);
- pthread_sigmask(SIG_BLOCK, &sigset, NULL);
-
if (host_log_mode_option == LOG_MODE_ALL) {
/* Run each log mode */
for (i = 0; i < LOG_MODE_NUM; i++) {
diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h
index bf461de34785..bf461de34785 100644
--- a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
+++ b/tools/testing/selftests/kvm/include/arm64/arch_timer.h
diff --git a/tools/testing/selftests/kvm/include/aarch64/delay.h b/tools/testing/selftests/kvm/include/arm64/delay.h
index 329e4f5079ea..329e4f5079ea 100644
--- a/tools/testing/selftests/kvm/include/aarch64/delay.h
+++ b/tools/testing/selftests/kvm/include/arm64/delay.h
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h
index baeb3c859389..baeb3c859389 100644
--- a/tools/testing/selftests/kvm/include/aarch64/gic.h
+++ b/tools/testing/selftests/kvm/include/arm64/gic.h
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h b/tools/testing/selftests/kvm/include/arm64/gic_v3.h
index a76615fa39a1..a76615fa39a1 100644
--- a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h
+++ b/tools/testing/selftests/kvm/include/arm64/gic_v3.h
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
index 3722ed9c8f96..3722ed9c8f96 100644
--- a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h
+++ b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
diff --git a/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
index e43a57d99b56..e43a57d99b56 100644
--- a/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h
index 1e8d0d531fbd..1e8d0d531fbd 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/arm64/processor.h
diff --git a/tools/testing/selftests/kvm/include/aarch64/spinlock.h b/tools/testing/selftests/kvm/include/arm64/spinlock.h
index cf0984106d14..cf0984106d14 100644
--- a/tools/testing/selftests/kvm/include/aarch64/spinlock.h
+++ b/tools/testing/selftests/kvm/include/arm64/spinlock.h
diff --git a/tools/testing/selftests/kvm/include/aarch64/ucall.h b/tools/testing/selftests/kvm/include/arm64/ucall.h
index 4ec801f37f00..4ec801f37f00 100644
--- a/tools/testing/selftests/kvm/include/aarch64/ucall.h
+++ b/tools/testing/selftests/kvm/include/arm64/ucall.h
diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/arm64/vgic.h
index c481d0c00a5d..c481d0c00a5d 100644
--- a/tools/testing/selftests/kvm/include/aarch64/vgic.h
+++ b/tools/testing/selftests/kvm/include/arm64/vgic.h
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index bc7c242480d6..373912464fb4 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -46,6 +46,12 @@ struct userspace_mem_region {
struct hlist_node slot_node;
};
+struct kvm_binary_stats {
+ int fd;
+ struct kvm_stats_header header;
+ struct kvm_stats_desc *desc;
+};
+
struct kvm_vcpu {
struct list_head list;
uint32_t id;
@@ -55,6 +61,7 @@ struct kvm_vcpu {
#ifdef __x86_64__
struct kvm_cpuid2 *cpuid;
#endif
+ struct kvm_binary_stats stats;
struct kvm_dirty_gfn *dirty_gfns;
uint32_t fetch_index;
uint32_t dirty_gfns_count;
@@ -99,10 +106,7 @@ struct kvm_vm {
struct kvm_vm_arch arch;
- /* Cache of information for binary stats interface */
- int stats_fd;
- struct kvm_stats_header stats_header;
- struct kvm_stats_desc *stats_desc;
+ struct kvm_binary_stats stats;
/*
* KVM region slots. These are the default memslots used by page
@@ -531,16 +535,19 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header,
struct kvm_stats_desc *desc, uint64_t *data,
size_t max_elements);
-void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
- size_t max_elements);
+void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
+ uint64_t *data, size_t max_elements);
-static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name)
-{
- uint64_t data;
+#define __get_stat(stats, stat) \
+({ \
+ uint64_t data; \
+ \
+ kvm_get_stat(stats, #stat, &data, 1); \
+ data; \
+})
- __vm_get_stat(vm, stat_name, &data, 1);
- return data;
-}
+#define vm_get_stat(vm, stat) __get_stat(&(vm)->stats, stat)
+#define vcpu_get_stat(vcpu, stat) __get_stat(&(vcpu)->stats, stat)
void vm_create_irqchip(struct kvm_vm *vm);
@@ -702,16 +709,22 @@ static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t va
return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
}
-static inline void vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
+static inline uint64_t vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id)
{
- struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr };
+ uint64_t val;
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
+
+ TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id);
vcpu_ioctl(vcpu, KVM_GET_ONE_REG, &reg);
+ return val;
}
static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
{
struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
+ TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id);
+
vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
}
@@ -957,6 +970,8 @@ static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape
struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
+void kvm_set_files_rlimit(uint32_t nr_vcpus);
+
void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
void kvm_print_vcpu_pinning_help(void);
void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
diff --git a/tools/testing/selftests/kvm/include/s390x/debug_print.h b/tools/testing/selftests/kvm/include/s390/debug_print.h
index 1bf275631cc6..1bf275631cc6 100644
--- a/tools/testing/selftests/kvm/include/s390x/debug_print.h
+++ b/tools/testing/selftests/kvm/include/s390/debug_print.h
diff --git a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h
index b0ed71302722..b0ed71302722 100644
--- a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h
+++ b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h
diff --git a/tools/testing/selftests/kvm/include/s390x/facility.h b/tools/testing/selftests/kvm/include/s390/facility.h
index 00a1ced6538b..00a1ced6538b 100644
--- a/tools/testing/selftests/kvm/include/s390x/facility.h
+++ b/tools/testing/selftests/kvm/include/s390/facility.h
diff --git a/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h
index e43a57d99b56..e43a57d99b56 100644
--- a/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h
diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390/processor.h
index 33fef6fd9617..33fef6fd9617 100644
--- a/tools/testing/selftests/kvm/include/s390x/processor.h
+++ b/tools/testing/selftests/kvm/include/s390/processor.h
diff --git a/tools/testing/selftests/kvm/include/s390x/sie.h b/tools/testing/selftests/kvm/include/s390/sie.h
index 160acd4a1db9..160acd4a1db9 100644
--- a/tools/testing/selftests/kvm/include/s390x/sie.h
+++ b/tools/testing/selftests/kvm/include/s390/sie.h
diff --git a/tools/testing/selftests/kvm/include/s390x/ucall.h b/tools/testing/selftests/kvm/include/s390/ucall.h
index 8035a872a351..8035a872a351 100644
--- a/tools/testing/selftests/kvm/include/s390x/ucall.h
+++ b/tools/testing/selftests/kvm/include/s390/ucall.h
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 3e473058849f..77d13d7920cb 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -22,7 +22,7 @@
#define msecs_to_usecs(msec) ((msec) * 1000ULL)
-static inline int _no_printf(const char *format, ...) { return 0; }
+static inline __printf(1, 2) int _no_printf(const char *format, ...) { return 0; }
#ifdef DEBUG
#define pr_debug(...) printf(__VA_ARGS__)
diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h
index 51990094effd..80fe9f69b38d 100644
--- a/tools/testing/selftests/kvm/include/x86_64/apic.h
+++ b/tools/testing/selftests/kvm/include/x86/apic.h
@@ -1,7 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * tools/testing/selftests/kvm/include/x86_64/apic.h
- *
* Copyright (C) 2021, Google LLC.
*/
diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86/evmcs.h
index 901caf0e0939..5a74bb30e2f8 100644
--- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h
+++ b/tools/testing/selftests/kvm/include/x86/evmcs.h
@@ -1,9 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * tools/testing/selftests/kvm/include/x86_64/evmcs.h
- *
* Copyright (C) 2018, Red Hat, Inc.
- *
*/
#ifndef SELFTEST_KVM_EVMCS_H
diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86/hyperv.h
index 6849e2552f1b..f13e532be240 100644
--- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h
+++ b/tools/testing/selftests/kvm/include/x86/hyperv.h
@@ -1,9 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * tools/testing/selftests/kvm/include/x86_64/hyperv.h
- *
* Copyright (C) 2021, Red Hat, Inc.
- *
*/
#ifndef SELFTEST_KVM_HYPERV_H
diff --git a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
index 972bb1c4ab4c..972bb1c4ab4c 100644
--- a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
diff --git a/tools/testing/selftests/kvm/include/x86_64/mce.h b/tools/testing/selftests/kvm/include/x86/mce.h
index 6119321f3f5d..295f2d554754 100644
--- a/tools/testing/selftests/kvm/include/x86_64/mce.h
+++ b/tools/testing/selftests/kvm/include/x86/mce.h
@@ -1,7 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * tools/testing/selftests/kvm/include/x86_64/mce.h
- *
* Copyright (C) 2022, Google LLC.
*/
diff --git a/tools/testing/selftests/kvm/include/x86_64/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h
index 3c10c4dc0ae8..3c10c4dc0ae8 100644
--- a/tools/testing/selftests/kvm/include/x86_64/pmu.h
+++ b/tools/testing/selftests/kvm/include/x86/pmu.h
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index 645200e95f89..32ab6ca7ec32 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -1,7 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * tools/testing/selftests/kvm/include/x86_64/processor.h
- *
* Copyright (C) 2018, Google LLC.
*/
@@ -29,6 +27,8 @@ extern uint64_t guest_tsc_khz;
#define MAX_NR_CPUID_ENTRIES 100
#endif
+#define NONCANONICAL 0xaaaaaaaaaaaaaaaaull
+
/* Forced emulation prefix, used to invoke the emulator unconditionally. */
#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
@@ -183,6 +183,9 @@ struct kvm_x86_cpu_feature {
* Extended Leafs, a.k.a. AMD defined
*/
#define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2)
+#define X86_FEATURE_PERFCTR_CORE KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 23)
+#define X86_FEATURE_PERFCTR_NB KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 24)
+#define X86_FEATURE_PERFCTR_LLC KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 28)
#define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20)
#define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
#define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
@@ -197,8 +200,11 @@ struct kvm_x86_cpu_feature {
#define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
#define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
#define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
+#define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30)
#define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
#define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
+#define X86_FEATURE_PERFMON_V2 KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0)
+#define X86_FEATURE_LBR_PMC_FREEZE KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2)
/*
* KVM defined paravirt features.
@@ -285,6 +291,8 @@ struct kvm_x86_cpu_property {
#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
#define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
+#define X86_PROPERTY_NR_PERFCTR_CORE KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 0, 3)
+#define X86_PROPERTY_NR_PERFCTR_NB KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 10, 15)
#define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
@@ -571,6 +579,11 @@ static inline void set_cr4(uint64_t val)
__asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
}
+static inline void set_idt(const struct desc_ptr *idt_desc)
+{
+ __asm__ __volatile__("lidt %0"::"m"(*idt_desc));
+}
+
static inline u64 xgetbv(u32 index)
{
u32 eax, edx;
@@ -1012,10 +1025,19 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries)
void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid);
+static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu)
+{
+ vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+}
+
static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
uint32_t function,
uint32_t index)
{
+ TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first (or equivalent)");
+
+ vcpu_get_cpuid(vcpu);
+
return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid,
function, index);
}
@@ -1036,7 +1058,7 @@ static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu)
return r;
/* On success, refresh the cache to pick up adjustments made by KVM. */
- vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+ vcpu_get_cpuid(vcpu);
return 0;
}
@@ -1046,12 +1068,7 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
/* Refresh the cache to pick up adjustments made by KVM. */
- vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
-}
-
-static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu)
-{
- vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+ vcpu_get_cpuid(vcpu);
}
void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
@@ -1235,7 +1252,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
uint64_t ign_error_code; \
uint8_t vector; \
\
- asm volatile(KVM_ASM_SAFE(insn) \
+ asm volatile(KVM_ASM_SAFE_FEP(insn) \
: KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
: inputs \
: KVM_ASM_SAFE_CLOBBERS); \
@@ -1330,6 +1347,46 @@ static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size,
GUEST_ASSERT(!ret);
}
+/*
+ * Execute HLT in an STI interrupt shadow to ensure that a pending IRQ that's
+ * intended to be a wake event arrives *after* HLT is executed. Modern CPUs,
+ * except for a few oddballs that KVM is unlikely to run on, block IRQs for one
+ * instruction after STI, *if* RFLAGS.IF=0 before STI. Note, Intel CPUs may
+ * block other events beyond regular IRQs, e.g. may block NMIs and SMIs too.
+ */
+static inline void safe_halt(void)
+{
+ asm volatile("sti; hlt");
+}
+
+/*
+ * Enable interrupts and ensure that interrupts are evaluated upon return from
+ * this function, i.e. execute a nop to consume the STi interrupt shadow.
+ */
+static inline void sti_nop(void)
+{
+ asm volatile ("sti; nop");
+}
+
+/*
+ * Enable interrupts for one instruction (nop), to allow the CPU to process all
+ * interrupts that are already pending.
+ */
+static inline void sti_nop_cli(void)
+{
+ asm volatile ("sti; nop; cli");
+}
+
+static inline void sti(void)
+{
+ asm volatile("sti");
+}
+
+static inline void cli(void)
+{
+ asm volatile ("cli");
+}
+
void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
#define vm_xsave_require_permission(xfeature) \
diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h
index 82c11c81a956..82c11c81a956 100644
--- a/tools/testing/selftests/kvm/include/x86_64/sev.h
+++ b/tools/testing/selftests/kvm/include/x86/sev.h
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86/svm.h
index 4803e1056055..29cffd0a9181 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm.h
+++ b/tools/testing/selftests/kvm/include/x86/svm.h
@@ -1,10 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * tools/testing/selftests/kvm/include/x86_64/svm.h
- * This is a copy of arch/x86/include/asm/svm.h
- *
- */
-
#ifndef SELFTEST_KVM_SVM_H
#define SELFTEST_KVM_SVM_H
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86/svm_util.h
index 044f0f872ba9..b74c6dcddcbd 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h
+++ b/tools/testing/selftests/kvm/include/x86/svm_util.h
@@ -1,8 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * tools/testing/selftests/kvm/include/x86_64/svm_utils.h
- * Header for nested SVM testing
- *
* Copyright (C) 2020, Red Hat, Inc.
*/
diff --git a/tools/testing/selftests/kvm/include/x86_64/ucall.h b/tools/testing/selftests/kvm/include/x86/ucall.h
index d3825dcc3cd9..d3825dcc3cd9 100644
--- a/tools/testing/selftests/kvm/include/x86_64/ucall.h
+++ b/tools/testing/selftests/kvm/include/x86/ucall.h
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h
index 5f0c0a29c556..edb3c391b982 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86/vmx.h
@@ -1,7 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * tools/testing/selftests/kvm/include/x86_64/vmx.h
- *
* Copyright (C) 2018, Google LLC.
*/
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index c78f34699f73..c5310736ed06 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -10,7 +10,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/resource.h>
#include "test_util.h"
@@ -39,36 +38,11 @@ int main(int argc, char *argv[])
{
int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID);
int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
- /*
- * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds +
- * an arbitrary number for everything else.
- */
- int nr_fds_wanted = kvm_max_vcpus + 100;
- struct rlimit rl;
pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
- /*
- * Check that we're allowed to open nr_fds_wanted file descriptors and
- * try raising the limits if needed.
- */
- TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
-
- if (rl.rlim_cur < nr_fds_wanted) {
- rl.rlim_cur = nr_fds_wanted;
- if (rl.rlim_max < nr_fds_wanted) {
- int old_rlim_max = rl.rlim_max;
- rl.rlim_max = nr_fds_wanted;
-
- int r = setrlimit(RLIMIT_NOFILE, &rl);
- __TEST_REQUIRE(r >= 0,
- "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)",
- old_rlim_max, nr_fds_wanted);
- } else {
- TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
- }
- }
+ kvm_set_files_rlimit(kvm_max_vcpus);
/*
* Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID.
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c
index 7abbf8866512..7abbf8866512 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic.c
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h
index d24e9ecc96c6..d24e9ecc96c6 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_private.h
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
index 66d05506f78b..66d05506f78b 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
index 09f270545646..09f270545646 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/arm64/handlers.S
index 0e443eadfac6..0e443eadfac6 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/handlers.S
+++ b/tools/testing/selftests/kvm/lib/arm64/handlers.S
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c
index 698e34f39241..7ba3aa3755f3 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/arm64/processor.c
@@ -281,8 +281,8 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
*/
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20);
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1);
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1);
+ sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1));
+ tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1));
/* Configure base granule size */
switch (vm->mode) {
@@ -360,8 +360,8 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
{
uint64_t pstate, pc;
- vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate), &pstate);
- vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc);
+ pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate));
+ pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n",
indent, "", pstate, pc);
diff --git a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c b/tools/testing/selftests/kvm/lib/arm64/spinlock.c
index a076e780be5d..a076e780be5d 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c
+++ b/tools/testing/selftests/kvm/lib/arm64/spinlock.c
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/arm64/ucall.c
index ddab0ce89d4d..ddab0ce89d4d 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/arm64/ucall.c
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c
index 4427f43f73ea..4427f43f73ea 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c
+++ b/tools/testing/selftests/kvm/lib/arm64/vgic.c
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 480e3a40d197..279ad8946040 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -12,6 +12,7 @@
#include <assert.h>
#include <sched.h>
#include <sys/mman.h>
+#include <sys/resource.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
@@ -196,6 +197,11 @@ static void vm_open(struct kvm_vm *vm)
vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type);
TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd));
+
+ if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
+ vm->stats.fd = vm_get_stats_fd(vm);
+ else
+ vm->stats.fd = -1;
}
const char *vm_guest_mode_string(uint32_t i)
@@ -406,6 +412,38 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
return vm_adjust_num_guest_pages(mode, nr_pages);
}
+void kvm_set_files_rlimit(uint32_t nr_vcpus)
+{
+ /*
+ * Each vCPU will open two file descriptors: the vCPU itself and the
+ * vCPU's binary stats file descriptor. Add an arbitrary amount of
+ * buffer for all other files a test may open.
+ */
+ int nr_fds_wanted = nr_vcpus * 2 + 100;
+ struct rlimit rl;
+
+ /*
+ * Check that we're allowed to open nr_fds_wanted file descriptors and
+ * try raising the limits if needed.
+ */
+ TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
+
+ if (rl.rlim_cur < nr_fds_wanted) {
+ rl.rlim_cur = nr_fds_wanted;
+ if (rl.rlim_max < nr_fds_wanted) {
+ int old_rlim_max = rl.rlim_max;
+
+ rl.rlim_max = nr_fds_wanted;
+ __TEST_REQUIRE(setrlimit(RLIMIT_NOFILE, &rl) >= 0,
+ "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)",
+ old_rlim_max, nr_fds_wanted);
+ } else {
+ TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
+ }
+ }
+
+}
+
struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
uint64_t nr_extra_pages)
{
@@ -415,6 +453,8 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
struct kvm_vm *vm;
int i;
+ kvm_set_files_rlimit(nr_runnable_vcpus);
+
pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__,
vm_guest_mode_string(shape.mode), shape.type, nr_pages);
@@ -657,6 +697,23 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
return NULL;
}
+static void kvm_stats_release(struct kvm_binary_stats *stats)
+{
+ int ret;
+
+ if (stats->fd < 0)
+ return;
+
+ if (stats->desc) {
+ free(stats->desc);
+ stats->desc = NULL;
+ }
+
+ ret = close(stats->fd);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ stats->fd = -1;
+}
+
__weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
{
@@ -690,6 +747,8 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
ret = close(vcpu->fd);
TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ kvm_stats_release(&vcpu->stats);
+
list_del(&vcpu->list);
vcpu_arch_free(vcpu);
@@ -709,6 +768,9 @@ void kvm_vm_release(struct kvm_vm *vmp)
ret = close(vmp->kvm_fd);
TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+
+ /* Free cached stats metadata and close FD */
+ kvm_stats_release(&vmp->stats);
}
static void __vm_mem_region_delete(struct kvm_vm *vm,
@@ -748,12 +810,6 @@ void kvm_vm_free(struct kvm_vm *vmp)
if (vmp == NULL)
return;
- /* Free cached stats metadata and close FD */
- if (vmp->stats_fd) {
- free(vmp->stats_desc);
- close(vmp->stats_fd);
- }
-
/* Free userspace_mem_regions. */
hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
__vm_mem_region_delete(vmp, region);
@@ -1286,6 +1342,11 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
TEST_ASSERT(vcpu->run != MAP_FAILED,
__KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+ if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
+ vcpu->stats.fd = vcpu_get_stats_fd(vcpu);
+ else
+ vcpu->stats.fd = -1;
+
/* Add to linked-list of VCPUs. */
list_add(&vcpu->list, &vm->vcpus);
@@ -1648,7 +1709,8 @@ int _vcpu_run(struct kvm_vcpu *vcpu)
rc = __vcpu_run(vcpu);
} while (rc == -1 && errno == EINTR);
- assert_on_unhandled_exception(vcpu);
+ if (!rc)
+ assert_on_unhandled_exception(vcpu);
return rc;
}
@@ -2197,46 +2259,31 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header,
desc->name, size, ret);
}
-/*
- * Read the data of the named stat
- *
- * Input Args:
- * vm - the VM for which the stat should be read
- * stat_name - the name of the stat to read
- * max_elements - the maximum number of 8-byte values to read into data
- *
- * Output Args:
- * data - the buffer into which stat data should be read
- *
- * Read the data values of a specified stat from the binary stats interface.
- */
-void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
- size_t max_elements)
+void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
+ uint64_t *data, size_t max_elements)
{
struct kvm_stats_desc *desc;
size_t size_desc;
int i;
- if (!vm->stats_fd) {
- vm->stats_fd = vm_get_stats_fd(vm);
- read_stats_header(vm->stats_fd, &vm->stats_header);
- vm->stats_desc = read_stats_descriptors(vm->stats_fd,
- &vm->stats_header);
+ if (!stats->desc) {
+ read_stats_header(stats->fd, &stats->header);
+ stats->desc = read_stats_descriptors(stats->fd, &stats->header);
}
- size_desc = get_stats_descriptor_size(&vm->stats_header);
+ size_desc = get_stats_descriptor_size(&stats->header);
- for (i = 0; i < vm->stats_header.num_desc; ++i) {
- desc = (void *)vm->stats_desc + (i * size_desc);
+ for (i = 0; i < stats->header.num_desc; ++i) {
+ desc = (void *)stats->desc + (i * size_desc);
- if (strcmp(desc->name, stat_name))
+ if (strcmp(desc->name, name))
continue;
- read_stat_data(vm->stats_fd, &vm->stats_header, desc,
- data, max_elements);
-
- break;
+ read_stat_data(stats->fd, &stats->header, desc, data, max_elements);
+ return;
}
+
+ TEST_FAIL("Unable to find stat '%s'", name);
}
__weak void kvm_arch_vm_post_create(struct kvm_vm *vm)
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index 6ae47b3d6b25..dd663bcf0cc0 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -221,39 +221,39 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
{
struct kvm_riscv_core core;
- vcpu_get_reg(vcpu, RISCV_CORE_REG(mode), &core.mode);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &core.regs.pc);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra), &core.regs.ra);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp), &core.regs.sp);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp), &core.regs.gp);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp), &core.regs.tp);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0), &core.regs.t0);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1), &core.regs.t1);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2), &core.regs.t2);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0), &core.regs.s0);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1), &core.regs.s1);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0), &core.regs.a0);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1), &core.regs.a1);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2), &core.regs.a2);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3), &core.regs.a3);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4), &core.regs.a4);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5), &core.regs.a5);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6), &core.regs.a6);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7), &core.regs.a7);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2), &core.regs.s2);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3), &core.regs.s3);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4), &core.regs.s4);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5), &core.regs.s5);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6), &core.regs.s6);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7), &core.regs.s7);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8), &core.regs.s8);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9), &core.regs.s9);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10), &core.regs.s10);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11), &core.regs.s11);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3), &core.regs.t3);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4), &core.regs.t4);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5), &core.regs.t5);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6), &core.regs.t6);
+ core.mode = vcpu_get_reg(vcpu, RISCV_CORE_REG(mode));
+ core.regs.pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc));
+ core.regs.ra = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra));
+ core.regs.sp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp));
+ core.regs.gp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp));
+ core.regs.tp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp));
+ core.regs.t0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0));
+ core.regs.t1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1));
+ core.regs.t2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2));
+ core.regs.s0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0));
+ core.regs.s1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1));
+ core.regs.a0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0));
+ core.regs.a1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1));
+ core.regs.a2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2));
+ core.regs.a3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3));
+ core.regs.a4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4));
+ core.regs.a5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5));
+ core.regs.a6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6));
+ core.regs.a7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7));
+ core.regs.s2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2));
+ core.regs.s3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3));
+ core.regs.s4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4));
+ core.regs.s5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5));
+ core.regs.s6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6));
+ core.regs.s7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7));
+ core.regs.s8 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8));
+ core.regs.s9 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9));
+ core.regs.s10 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10));
+ core.regs.s11 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11));
+ core.regs.t3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3));
+ core.regs.t4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4));
+ core.regs.t5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5));
+ core.regs.t6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6));
fprintf(stream,
" MODE: 0x%lx\n", core.mode);
diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c
index 2c432fa164f1..2c432fa164f1 100644
--- a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
+++ b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c
diff --git a/tools/testing/selftests/kvm/lib/s390x/facility.c b/tools/testing/selftests/kvm/lib/s390/facility.c
index d540812d911a..d540812d911a 100644
--- a/tools/testing/selftests/kvm/lib/s390x/facility.c
+++ b/tools/testing/selftests/kvm/lib/s390/facility.c
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c
index 20cfe970e3e3..20cfe970e3e3 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390/processor.c
diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390/ucall.c
index cca98734653d..cca98734653d 100644
--- a/tools/testing/selftests/kvm/lib/s390x/ucall.c
+++ b/tools/testing/selftests/kvm/lib/s390/ucall.c
diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
index 7c9de8414462..5bde176cedd5 100644
--- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c
+++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
@@ -114,7 +114,7 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
is_minor ? "MINOR" : "MISSING",
- is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
+ is_minor ? "UFFDIO_CONTINUE" : "UFFDIO_COPY");
uffd_desc = malloc(sizeof(struct uffd_desc));
TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor");
diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86/apic.c
index 89153a333e83..89153a333e83 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/apic.c
+++ b/tools/testing/selftests/kvm/lib/x86/apic.c
diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86/handlers.S
index 7629819734af..7629819734af 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/handlers.S
+++ b/tools/testing/selftests/kvm/lib/x86/handlers.S
diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86/hyperv.c
index 15bc8cd583aa..15bc8cd583aa 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
+++ b/tools/testing/selftests/kvm/lib/x86/hyperv.c
diff --git a/tools/testing/selftests/kvm/lib/x86_64/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c
index d61e623afc8c..7f5d62a65c68 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/memstress.c
+++ b/tools/testing/selftests/kvm/lib/x86/memstress.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * x86_64-specific extensions to memstress.c.
+ * x86-specific extensions to memstress.c.
*
* Copyright (C) 2022, Google, Inc.
*/
diff --git a/tools/testing/selftests/kvm/lib/x86_64/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c
index f31f0427c17c..f31f0427c17c 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/pmu.c
+++ b/tools/testing/selftests/kvm/lib/x86/pmu.c
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index 636b29ba8985..bd5a802fa7a5 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * tools/testing/selftests/kvm/lib/x86_64/processor.c
- *
* Copyright (C) 2018, Google LLC.
*/
diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c
index e9535ee20b7f..e9535ee20b7f 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/sev.c
+++ b/tools/testing/selftests/kvm/lib/x86/sev.c
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c
index 5495a92dfd5a..d239c2097391 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86/svm.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * tools/testing/selftests/kvm/lib/x86_64/svm.c
* Helpers used for nested SVM testing
* Largely inspired from KVM unit test svm.c
*
diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86/ucall.c
index 1265cecc7dd1..1265cecc7dd1 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/x86/ucall.c
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c
index d7ac122820bf..d4d1208dd023 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86/vmx.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * tools/testing/selftests/kvm/lib/x86_64/vmx.c
- *
* Copyright (C) 2018, Google LLC.
*/
diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c
index 0b9678858b6d..6a437d2be9fa 100644
--- a/tools/testing/selftests/kvm/max_guest_memory_test.c
+++ b/tools/testing/selftests/kvm/mmu_stress_test.c
@@ -15,16 +15,63 @@
#include "test_util.h"
#include "guest_modes.h"
#include "processor.h"
+#include "ucall_common.h"
+
+static bool mprotect_ro_done;
+static bool all_vcpus_hit_ro_fault;
static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
{
uint64_t gpa;
+ int i;
- for (;;) {
+ for (i = 0; i < 2; i++) {
for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
- *((volatile uint64_t *)gpa) = gpa;
- GUEST_SYNC(0);
+ vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa);
+ GUEST_SYNC(i);
}
+
+ for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
+ *((volatile uint64_t *)gpa);
+ GUEST_SYNC(2);
+
+ /*
+ * Write to the region while mprotect(PROT_READ) is underway. Keep
+ * looping until the memory is guaranteed to be read-only and a fault
+ * has occurred, otherwise vCPUs may complete their writes and advance
+ * to the next stage prematurely.
+ *
+ * For architectures that support skipping the faulting instruction,
+ * generate the store via inline assembly to ensure the exact length
+ * of the instruction is known and stable (vcpu_arch_put_guest() on
+ * fixed-length architectures should work, but the cost of paranoia
+ * is low in this case). For x86, hand-code the exact opcode so that
+ * there is no room for variability in the generated instruction.
+ */
+ do {
+ for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
+#ifdef __x86_64__
+ asm volatile(".byte 0x48,0x89,0x00" :: "a"(gpa) : "memory"); /* mov %rax, (%rax) */
+#elif defined(__aarch64__)
+ asm volatile("str %0, [%0]" :: "r" (gpa) : "memory");
+#else
+ vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa);
+#endif
+ } while (!READ_ONCE(mprotect_ro_done) || !READ_ONCE(all_vcpus_hit_ro_fault));
+
+ /*
+ * Only architectures that write the entire range can explicitly sync,
+ * as other architectures will be stuck on the write fault.
+ */
+#if defined(__x86_64__) || defined(__aarch64__)
+ GUEST_SYNC(3);
+#endif
+
+ for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
+ vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa);
+ GUEST_SYNC(4);
+
+ GUEST_ASSERT(0);
}
struct vcpu_info {
@@ -35,6 +82,7 @@ struct vcpu_info {
static int nr_vcpus;
static atomic_t rendezvous;
+static atomic_t nr_ro_faults;
static void rendezvous_with_boss(void)
{
@@ -51,34 +99,104 @@ static void rendezvous_with_boss(void)
}
}
-static void run_vcpu(struct kvm_vcpu *vcpu)
+static void assert_sync_stage(struct kvm_vcpu *vcpu, int stage)
+{
+ struct ucall uc;
+
+ TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+ TEST_ASSERT_EQ(uc.args[1], stage);
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
{
vcpu_run(vcpu);
- TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
+ assert_sync_stage(vcpu, stage);
}
static void *vcpu_worker(void *data)
{
+ struct kvm_sregs __maybe_unused sregs;
struct vcpu_info *info = data;
struct kvm_vcpu *vcpu = info->vcpu;
struct kvm_vm *vm = vcpu->vm;
- struct kvm_sregs sregs;
+ int r;
vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size);
rendezvous_with_boss();
- run_vcpu(vcpu);
+ /* Stage 0, write all of guest memory. */
+ run_vcpu(vcpu, 0);
rendezvous_with_boss();
- vcpu_sregs_get(vcpu, &sregs);
#ifdef __x86_64__
+ vcpu_sregs_get(vcpu, &sregs);
/* Toggle CR0.WP to trigger a MMU context reset. */
sregs.cr0 ^= X86_CR0_WP;
-#endif
vcpu_sregs_set(vcpu, &sregs);
+#endif
rendezvous_with_boss();
- run_vcpu(vcpu);
+ /* Stage 1, re-write all of guest memory. */
+ run_vcpu(vcpu, 1);
+ rendezvous_with_boss();
+
+ /* Stage 2, read all of guest memory, which is now read-only. */
+ run_vcpu(vcpu, 2);
+
+ /*
+ * Stage 3, write guest memory and verify KVM returns -EFAULT for once
+ * the mprotect(PROT_READ) lands. Only architectures that support
+ * validating *all* of guest memory sync for this stage, as vCPUs will
+ * be stuck on the faulting instruction for other architectures. Go to
+ * stage 3 without a rendezvous
+ */
+ r = _vcpu_run(vcpu);
+ TEST_ASSERT(r == -1 && errno == EFAULT,
+ "Expected EFAULT on write to RO memory, got r = %d, errno = %d", r, errno);
+
+ atomic_inc(&nr_ro_faults);
+ if (atomic_read(&nr_ro_faults) == nr_vcpus) {
+ WRITE_ONCE(all_vcpus_hit_ro_fault, true);
+ sync_global_to_guest(vm, all_vcpus_hit_ro_fault);
+ }
+
+#if defined(__x86_64__) || defined(__aarch64__)
+ /*
+ * Verify *all* writes from the guest hit EFAULT due to the VMA now
+ * being read-only. x86 and arm64 only at this time as skipping the
+ * instruction that hits the EFAULT requires advancing the program
+ * counter, which is arch specific and relies on inline assembly.
+ */
+#ifdef __x86_64__
+ vcpu->run->kvm_valid_regs = KVM_SYNC_X86_REGS;
+#endif
+ for (;;) {
+ r = _vcpu_run(vcpu);
+ if (!r)
+ break;
+ TEST_ASSERT_EQ(errno, EFAULT);
+#if defined(__x86_64__)
+ WRITE_ONCE(vcpu->run->kvm_dirty_regs, KVM_SYNC_X86_REGS);
+ vcpu->run->s.regs.regs.rip += 3;
+#elif defined(__aarch64__)
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc),
+ vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)) + 4);
+#endif
+
+ }
+ assert_sync_stage(vcpu, 3);
+#endif /* __x86_64__ || __aarch64__ */
+ rendezvous_with_boss();
+
+ /*
+ * Stage 4. Run to completion, waiting for mprotect(PROT_WRITE) to
+ * make the memory writable again.
+ */
+ do {
+ r = _vcpu_run(vcpu);
+ } while (r && errno == EFAULT);
+ TEST_ASSERT_EQ(r, 0);
+ assert_sync_stage(vcpu, 4);
rendezvous_with_boss();
return NULL;
@@ -161,7 +279,7 @@ int main(int argc, char *argv[])
const uint64_t start_gpa = SZ_4G;
const int first_slot = 1;
- struct timespec time_start, time_run1, time_reset, time_run2;
+ struct timespec time_start, time_run1, time_reset, time_run2, time_ro, time_rw;
uint64_t max_gpa, gpa, slot_size, max_mem, i;
int max_slots, slot, opt, fd;
bool hugepages = false;
@@ -209,7 +327,13 @@ int main(int argc, char *argv[])
vcpus = malloc(nr_vcpus * sizeof(*vcpus));
TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
- vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+ vm = __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus,
+#ifdef __x86_64__
+ max_mem / SZ_1G,
+#else
+ max_mem / vm_guest_mode_params[VM_MODE_DEFAULT].page_size,
+#endif
+ guest_code, vcpus);
max_gpa = vm->max_gfn << vm->page_shift;
TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb ");
@@ -259,14 +383,27 @@ int main(int argc, char *argv[])
rendezvous_with_vcpus(&time_reset, "reset");
rendezvous_with_vcpus(&time_run2, "run 2");
+ mprotect(mem, slot_size, PROT_READ);
+ mprotect_ro_done = true;
+ sync_global_to_guest(vm, mprotect_ro_done);
+
+ rendezvous_with_vcpus(&time_ro, "mprotect RO");
+ mprotect(mem, slot_size, PROT_READ | PROT_WRITE);
+ rendezvous_with_vcpus(&time_rw, "mprotect RW");
+
+ time_rw = timespec_sub(time_rw, time_ro);
+ time_ro = timespec_sub(time_ro, time_run2);
time_run2 = timespec_sub(time_run2, time_reset);
- time_reset = timespec_sub(time_reset, time_run1);
+ time_reset = timespec_sub(time_reset, time_run1);
time_run1 = timespec_sub(time_run1, time_start);
- pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds\n",
+ pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds, "
+ "ro = %ld.%.9lds, rw = %ld.%.9lds\n",
time_run1.tv_sec, time_run1.tv_nsec,
time_reset.tv_sec, time_reset.tv_nsec,
- time_run2.tv_sec, time_run2.tv_nsec);
+ time_run2.tv_sec, time_run2.tv_nsec,
+ time_ro.tv_sec, time_ro.tv_nsec,
+ time_rw.tv_sec, time_rw.tv_nsec);
/*
* Delete even numbered slots (arbitrary) and unmap the first half of
diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c
index 2c792228ac0b..9e370800a6a2 100644
--- a/tools/testing/selftests/kvm/riscv/arch_timer.c
+++ b/tools/testing/selftests/kvm/riscv/arch_timer.c
@@ -93,7 +93,7 @@ struct kvm_vm *test_vm_create(void)
vcpu_init_vector_tables(vcpus[i]);
/* Initialize guest timer frequency. */
- vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency), &timer_freq);
+ timer_freq = vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency));
sync_global_to_guest(vm, timer_freq);
pr_debug("timer_freq: %lu\n", timer_freq);
diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c
index 0e0712854953..cfed6c727bfc 100644
--- a/tools/testing/selftests/kvm/riscv/ebreak_test.c
+++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c
@@ -60,7 +60,7 @@ int main(void)
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
- vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &pc);
+ pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc));
TEST_ASSERT_EQ(pc, LABEL_ADDRESS(sw_bp_1));
/* skip sw_bp_1 */
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index 4bc1051848e5..8515921dfdbf 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -52,6 +52,8 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVVPTC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZABHA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA:
@@ -71,6 +73,7 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICCRSE:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICOND:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICSR:
@@ -112,6 +115,7 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SUSP:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR:
@@ -429,6 +433,8 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(SVINVAL),
KVM_ISA_EXT_ARR(SVNAPOT),
KVM_ISA_EXT_ARR(SVPBMT),
+ KVM_ISA_EXT_ARR(SVVPTC),
+ KVM_ISA_EXT_ARR(ZABHA),
KVM_ISA_EXT_ARR(ZACAS),
KVM_ISA_EXT_ARR(ZAWRS),
KVM_ISA_EXT_ARR(ZBA),
@@ -448,6 +454,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(ZFHMIN),
KVM_ISA_EXT_ARR(ZICBOM),
KVM_ISA_EXT_ARR(ZICBOZ),
+ KVM_ISA_EXT_ARR(ZICCRSE),
KVM_ISA_EXT_ARR(ZICNTR),
KVM_ISA_EXT_ARR(ZICOND),
KVM_ISA_EXT_ARR(ZICSR),
@@ -535,10 +542,11 @@ static const char *sbi_ext_single_id_to_str(__u64 reg_off)
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SRST),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_HSM),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_PMU),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SUSP),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR),
- KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN),
};
if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name))
@@ -949,6 +957,7 @@ KVM_SBI_EXT_SUBLIST_CONFIG(base, BASE);
KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA);
KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU);
KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN);
+KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP);
KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA);
KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F);
@@ -964,6 +973,8 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svadu, SVADU);
KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL);
KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT);
KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT);
+KVM_ISA_EXT_SIMPLE_CONFIG(svvptc, SVVPTC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zabha, ZABHA);
KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS);
KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS);
KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA);
@@ -983,6 +994,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH);
KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN);
KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM);
KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ);
+KVM_ISA_EXT_SIMPLE_CONFIG(ziccrse, ZICCRSE);
KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR);
KVM_ISA_EXT_SIMPLE_CONFIG(zicond, ZICOND);
KVM_ISA_EXT_SIMPLE_CONFIG(zicsr, ZICSR);
@@ -1017,6 +1029,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_sbi_sta,
&config_sbi_pmu,
&config_sbi_dbcn,
+ &config_sbi_susp,
&config_aia,
&config_fp_f,
&config_fp_d,
@@ -1031,6 +1044,8 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_svinval,
&config_svnapot,
&config_svpbmt,
+ &config_svvptc,
+ &config_zabha,
&config_zacas,
&config_zawrs,
&config_zba,
@@ -1050,6 +1065,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_zfhmin,
&config_zicbom,
&config_zicboz,
+ &config_ziccrse,
&config_zicntr,
&config_zicond,
&config_zicsr,
diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
index f299cbfd23ca..03406de4989d 100644
--- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
+++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
@@ -39,7 +39,13 @@ static bool illegal_handler_invoked;
#define SBI_PMU_TEST_SNAPSHOT BIT(2)
#define SBI_PMU_TEST_OVERFLOW BIT(3)
-static int disabled_tests;
+#define SBI_PMU_OVERFLOW_IRQNUM_DEFAULT 5
+struct test_args {
+ int disabled_tests;
+ int overflow_irqnum;
+};
+
+static struct test_args targs;
unsigned long pmu_csr_read_num(int csr_num)
{
@@ -118,8 +124,8 @@ static void stop_counter(unsigned long counter, unsigned long stop_flags)
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, stop_flags,
0, 0, 0);
- __GUEST_ASSERT(ret.error == 0, "Unable to stop counter %ld error %ld\n",
- counter, ret.error);
+ __GUEST_ASSERT(ret.error == 0 || ret.error == SBI_ERR_ALREADY_STOPPED,
+ "Unable to stop counter %ld error %ld\n", counter, ret.error);
}
static void guest_illegal_exception_handler(struct ex_regs *regs)
@@ -137,7 +143,6 @@ static void guest_irq_handler(struct ex_regs *regs)
unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG;
struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
unsigned long overflown_mask;
- unsigned long counter_val = 0;
/* Validate that we are in the correct irq handler */
GUEST_ASSERT_EQ(irq_num, IRQ_PMU_OVF);
@@ -151,10 +156,6 @@ static void guest_irq_handler(struct ex_regs *regs)
GUEST_ASSERT(overflown_mask & 0x01);
WRITE_ONCE(vcpu_shared_irq_count, vcpu_shared_irq_count+1);
-
- counter_val = READ_ONCE(snapshot_data->ctr_values[0]);
- /* Now start the counter to mimick the real driver behavior */
- start_counter(counter_in_use, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_val);
}
static unsigned long get_counter_index(unsigned long cbase, unsigned long cmask,
@@ -479,7 +480,7 @@ static void test_pmu_events_snaphost(void)
static void test_pmu_events_overflow(void)
{
- int num_counters = 0;
+ int num_counters = 0, i = 0;
/* Verify presence of SBI PMU and minimum requrired SBI version */
verify_sbi_requirement_assert();
@@ -496,11 +497,15 @@ static void test_pmu_events_overflow(void)
* Qemu supports overflow for cycle/instruction.
* This test may fail on any platform that do not support overflow for these two events.
*/
- test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES);
- GUEST_ASSERT_EQ(vcpu_shared_irq_count, 1);
+ for (i = 0; i < targs.overflow_irqnum; i++)
+ test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES);
+ GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum);
+
+ vcpu_shared_irq_count = 0;
- test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS);
- GUEST_ASSERT_EQ(vcpu_shared_irq_count, 2);
+ for (i = 0; i < targs.overflow_irqnum; i++)
+ test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS);
+ GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum);
GUEST_DONE();
}
@@ -608,8 +613,12 @@ static void test_vm_events_overflow(void *guest_code)
vcpu_init_vector_tables(vcpu);
/* Initialize guest timer frequency. */
- vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency), &timer_freq);
+ timer_freq = vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency));
+
+ /* Export the shared variables to the guest */
sync_global_to_guest(vm, timer_freq);
+ sync_global_to_guest(vm, vcpu_shared_irq_count);
+ sync_global_to_guest(vm, targs);
run_vcpu(vcpu);
@@ -618,28 +627,38 @@ static void test_vm_events_overflow(void *guest_code)
static void test_print_help(char *name)
{
- pr_info("Usage: %s [-h] [-d <test name>]\n", name);
- pr_info("\t-d: Test to disable. Available tests are 'basic', 'events', 'snapshot', 'overflow'\n");
+ pr_info("Usage: %s [-h] [-t <test name>] [-n <number of LCOFI interrupt for overflow test>]\n",
+ name);
+ pr_info("\t-t: Test to run (default all). Available tests are 'basic', 'events', 'snapshot', 'overflow'\n");
+ pr_info("\t-n: Number of LCOFI interrupt to trigger for each event in overflow test (default: %d)\n",
+ SBI_PMU_OVERFLOW_IRQNUM_DEFAULT);
pr_info("\t-h: print this help screen\n");
}
static bool parse_args(int argc, char *argv[])
{
int opt;
+ int temp_disabled_tests = SBI_PMU_TEST_BASIC | SBI_PMU_TEST_EVENTS | SBI_PMU_TEST_SNAPSHOT |
+ SBI_PMU_TEST_OVERFLOW;
+ int overflow_interrupts = 0;
- while ((opt = getopt(argc, argv, "hd:")) != -1) {
+ while ((opt = getopt(argc, argv, "ht:n:")) != -1) {
switch (opt) {
- case 'd':
+ case 't':
if (!strncmp("basic", optarg, 5))
- disabled_tests |= SBI_PMU_TEST_BASIC;
+ temp_disabled_tests &= ~SBI_PMU_TEST_BASIC;
else if (!strncmp("events", optarg, 6))
- disabled_tests |= SBI_PMU_TEST_EVENTS;
+ temp_disabled_tests &= ~SBI_PMU_TEST_EVENTS;
else if (!strncmp("snapshot", optarg, 8))
- disabled_tests |= SBI_PMU_TEST_SNAPSHOT;
+ temp_disabled_tests &= ~SBI_PMU_TEST_SNAPSHOT;
else if (!strncmp("overflow", optarg, 8))
- disabled_tests |= SBI_PMU_TEST_OVERFLOW;
+ temp_disabled_tests &= ~SBI_PMU_TEST_OVERFLOW;
else
goto done;
+ targs.disabled_tests = temp_disabled_tests;
+ break;
+ case 'n':
+ overflow_interrupts = atoi_positive("Number of LCOFI", optarg);
break;
case 'h':
default:
@@ -647,6 +666,15 @@ static bool parse_args(int argc, char *argv[])
}
}
+ if (overflow_interrupts > 0) {
+ if (targs.disabled_tests & SBI_PMU_TEST_OVERFLOW) {
+ pr_info("-n option is only available for overflow test\n");
+ goto done;
+ } else {
+ targs.overflow_irqnum = overflow_interrupts;
+ }
+ }
+
return true;
done:
test_print_help(argv[0]);
@@ -655,25 +683,28 @@ done:
int main(int argc, char *argv[])
{
+ targs.disabled_tests = 0;
+ targs.overflow_irqnum = SBI_PMU_OVERFLOW_IRQNUM_DEFAULT;
+
if (!parse_args(argc, argv))
exit(KSFT_SKIP);
- if (!(disabled_tests & SBI_PMU_TEST_BASIC)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_BASIC)) {
test_vm_basic_test(test_pmu_basic_sanity);
pr_info("SBI PMU basic test : PASS\n");
}
- if (!(disabled_tests & SBI_PMU_TEST_EVENTS)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_EVENTS)) {
test_vm_events_test(test_pmu_events);
pr_info("SBI PMU event verification test : PASS\n");
}
- if (!(disabled_tests & SBI_PMU_TEST_SNAPSHOT)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_SNAPSHOT)) {
test_vm_events_snapshot_test(test_pmu_events_snaphost);
pr_info("SBI PMU event verification with snapshot test : PASS\n");
}
- if (!(disabled_tests & SBI_PMU_TEST_OVERFLOW)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_OVERFLOW)) {
test_vm_events_overflow(test_pmu_events_overflow);
pr_info("SBI PMU event verification with overflow test : PASS\n");
}
diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c
index e32dd59703a0..85cc8c18d6e7 100644
--- a/tools/testing/selftests/kvm/s390x/cmma_test.c
+++ b/tools/testing/selftests/kvm/s390/cmma_test.c
@@ -444,7 +444,7 @@ static void assert_no_pages_cmma_dirty(struct kvm_vm *vm)
);
}
-static void test_get_inital_dirty(void)
+static void test_get_initial_dirty(void)
{
struct kvm_vm *vm = create_vm_two_memslots();
struct kvm_vcpu *vcpu;
@@ -651,7 +651,7 @@ struct testdef {
} testlist[] = {
{ "migration mode and dirty tracking", test_migration_mode },
{ "GET_CMMA_BITS: basic calls", test_get_cmma_basic },
- { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty },
+ { "GET_CMMA_BITS: all pages are dirty initially", test_get_initial_dirty },
{ "GET_CMMA_BITS: holes are skipped", test_get_skip_holes },
};
diff --git a/tools/testing/selftests/kvm/s390x/config b/tools/testing/selftests/kvm/s390/config
index 23270f2d679f..23270f2d679f 100644
--- a/tools/testing/selftests/kvm/s390x/config
+++ b/tools/testing/selftests/kvm/s390/config
diff --git a/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c
index 27255880dabd..27255880dabd 100644
--- a/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c
+++ b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c
diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390/debug_test.c
index ad8095968601..ad8095968601 100644
--- a/tools/testing/selftests/kvm/s390x/debug_test.c
+++ b/tools/testing/selftests/kvm/s390/debug_test.c
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390/memop.c
index 4374b4cd2a80..4374b4cd2a80 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390/memop.c
diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390/resets.c
index 357943f2bea8..b58f75b381e5 100644
--- a/tools/testing/selftests/kvm/s390x/resets.c
+++ b/tools/testing/selftests/kvm/s390/resets.c
@@ -61,7 +61,7 @@ static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value)
{
uint64_t eval_reg;
- vcpu_get_reg(vcpu, id, &eval_reg);
+ eval_reg = vcpu_get_reg(vcpu, id);
TEST_ASSERT(eval_reg == value, "value == 0x%lx", value);
}
diff --git a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c
index bba0d9a6dcc8..bba0d9a6dcc8 100644
--- a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c
+++ b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390/sync_regs_test.c
index 53def355ccba..53def355ccba 100644
--- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/s390/sync_regs_test.c
diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390/tprot.c
index 12d5e1cb62e3..12d5e1cb62e3 100644
--- a/tools/testing/selftests/kvm/s390x/tprot.c
+++ b/tools/testing/selftests/kvm/s390/tprot.c
diff --git a/tools/testing/selftests/kvm/s390x/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c
index 135ee22856cf..d265b34c54be 100644
--- a/tools/testing/selftests/kvm/s390x/ucontrol_test.c
+++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c
@@ -88,10 +88,6 @@ asm("test_skey_asm:\n"
" ahi %r0,1\n"
" st %r1,0(%r5,%r6)\n"
- " iske %r1,%r6\n"
- " ahi %r0,1\n"
- " diag 0,0,0x44\n"
-
" sske %r1,%r6\n"
" xgr %r1,%r1\n"
" iske %r1,%r6\n"
@@ -459,10 +455,14 @@ TEST_F(uc_kvm, uc_no_user_region)
};
ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, &region));
- ASSERT_EQ(EINVAL, errno);
+ ASSERT_TRUE(errno == EEXIST || errno == EINVAL)
+ TH_LOG("errno %s (%i) not expected for ioctl KVM_SET_USER_MEMORY_REGION",
+ strerror(errno), errno);
ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, &region2));
- ASSERT_EQ(EINVAL, errno);
+ ASSERT_TRUE(errno == EEXIST || errno == EINVAL)
+ TH_LOG("errno %s (%i) not expected for ioctl KVM_SET_USER_MEMORY_REGION2",
+ strerror(errno), errno);
}
TEST_F(uc_kvm, uc_map_unmap)
@@ -596,7 +596,9 @@ TEST_F(uc_kvm, uc_skey)
ASSERT_EQ(true, uc_handle_exit(self));
ASSERT_EQ(1, sync_regs->gprs[0]);
- /* ISKE */
+ /* SSKE + ISKE */
+ sync_regs->gprs[1] = skeyvalue;
+ run->kvm_dirty_regs |= KVM_SYNC_GPRS;
ASSERT_EQ(0, uc_run_once(self));
/*
@@ -608,21 +610,11 @@ TEST_F(uc_kvm, uc_skey)
TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE));
TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason);
TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode);
- TEST_REQUIRE(sie_block->ipa != 0xb229);
+ TEST_REQUIRE(sie_block->ipa != 0xb22b);
- /* ISKE contd. */
+ /* SSKE + ISKE contd. */
ASSERT_EQ(false, uc_handle_exit(self));
ASSERT_EQ(2, sync_regs->gprs[0]);
- /* assert initial skey (ACC = 0, R & C = 1) */
- ASSERT_EQ(0x06, sync_regs->gprs[1]);
- uc_assert_diag44(self);
-
- /* SSKE + ISKE */
- sync_regs->gprs[1] = skeyvalue;
- run->kvm_dirty_regs |= KVM_SYNC_GPRS;
- ASSERT_EQ(0, uc_run_once(self));
- ASSERT_EQ(false, uc_handle_exit(self));
- ASSERT_EQ(3, sync_regs->gprs[0]);
ASSERT_EQ(skeyvalue, sync_regs->gprs[1]);
uc_assert_diag44(self);
@@ -631,7 +623,7 @@ TEST_F(uc_kvm, uc_skey)
run->kvm_dirty_regs |= KVM_SYNC_GPRS;
ASSERT_EQ(0, uc_run_once(self));
ASSERT_EQ(false, uc_handle_exit(self));
- ASSERT_EQ(4, sync_regs->gprs[0]);
+ ASSERT_EQ(3, sync_regs->gprs[0]);
/* assert R reset but rest of skey unchanged */
ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]);
ASSERT_EQ(0, sync_regs->gprs[1] & 0x04);
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index a8267628e9ed..bc440d5aba57 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -17,9 +17,9 @@
#include <processor.h>
/*
- * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a
- * 2MB sized and aligned region so that the initial region corresponds to
- * exactly one large page.
+ * s390 needs at least 1MB alignment, and the x86 MOVE/DELETE tests need a 2MB
+ * sized and aligned region so that the initial region corresponds to exactly
+ * one large page.
*/
#define MEM_REGION_SIZE 0x200000
@@ -235,7 +235,7 @@ static void guest_code_delete_memory_region(void)
* in the guest will never succeed, and so isn't an option.
*/
memset(&idt, 0, sizeof(idt));
- __asm__ __volatile__("lidt %0" :: "m"(idt));
+ set_idt(&idt);
GUEST_SYNC(0);
@@ -553,6 +553,56 @@ static void test_add_overlapping_private_memory_regions(void)
close(memfd);
kvm_vm_free(vm);
}
+
+static void guest_code_mmio_during_vectoring(void)
+{
+ const struct desc_ptr idt_desc = {
+ .address = MEM_REGION_GPA,
+ .size = 0xFFF,
+ };
+
+ set_idt(&idt_desc);
+
+ /* Generate a #GP by dereferencing a non-canonical address */
+ *((uint8_t *)NONCANONICAL) = 0x1;
+
+ GUEST_ASSERT(0);
+}
+
+/*
+ * This test points the IDT descriptor base to an MMIO address. It should cause
+ * a KVM internal error when an event occurs in the guest.
+ */
+static void test_mmio_during_vectoring(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ u64 expected_gpa;
+
+ pr_info("Testing MMIO during vectoring error handling\n");
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_mmio_during_vectoring);
+ virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 1);
+
+ run = vcpu->run;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+ TEST_ASSERT(run->internal.suberror == KVM_INTERNAL_ERROR_DELIVERY_EV,
+ "Unexpected suberror = %d", vcpu->run->internal.suberror);
+ TEST_ASSERT(run->internal.ndata != 4, "Unexpected internal error data array size = %d",
+ run->internal.ndata);
+
+ /* The reported GPA should be IDT base + offset of the GP vector */
+ expected_gpa = MEM_REGION_GPA + GP_VECTOR * sizeof(struct idt_entry);
+
+ TEST_ASSERT(run->internal.data[3] == expected_gpa,
+ "Unexpected GPA = %llx (expected %lx)",
+ vcpu->run->internal.data[3], expected_gpa);
+
+ kvm_vm_free(vm);
+}
#endif
int main(int argc, char *argv[])
@@ -568,6 +618,7 @@ int main(int argc, char *argv[])
* KVM_RUN fails with ENOEXEC or EFAULT.
*/
test_zero_memory_regions();
+ test_mmio_during_vectoring();
#endif
test_invalid_memory_region_flags();
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index a8d3afa0b86b..cce2520af720 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -269,9 +269,8 @@ static void guest_code(int cpu)
static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
{
uint64_t id = RISCV_SBI_EXT_REG(KVM_RISCV_SBI_EXT_STA);
- unsigned long enabled;
+ unsigned long enabled = vcpu_get_reg(vcpu, id);
- vcpu_get_reg(vcpu, id, &enabled);
TEST_ASSERT(enabled == 0 || enabled == 1, "Expected boolean result");
return enabled;
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c
index f4ce5a185a7d..f4ce5a185a7d 100644
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ b/tools/testing/selftests/kvm/x86/amx_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c
index f8916bb34405..f8916bb34405 100644
--- a/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c
+++ b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c
index 7b3fda6842bc..7b3fda6842bc 100644
--- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86/cpuid_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c
index 28cc66454601..28cc66454601 100644
--- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
+++ b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86/debug_regs.c
index 2d814c1d1dc4..2d814c1d1dc4 100644
--- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86/debug_regs.c
diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
index 2929c067c207..b0d2b04a7ff2 100644
--- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
+++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
@@ -41,9 +41,9 @@ struct kvm_page_stats {
static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage)
{
- stats->pages_4k = vm_get_stat(vm, "pages_4k");
- stats->pages_2m = vm_get_stat(vm, "pages_2m");
- stats->pages_1g = vm_get_stat(vm, "pages_1g");
+ stats->pages_4k = vm_get_stat(vm, pages_4k);
+ stats->pages_2m = vm_get_stat(vm, pages_2m);
+ stats->pages_1g = vm_get_stat(vm, pages_1g);
stats->hugepages = stats->pages_2m + stats->pages_1g;
pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n",
diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c
index 81055476d394..81055476d394 100644
--- a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
+++ b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c b/tools/testing/selftests/kvm/x86/feature_msrs_test.c
index a72f13ae2edb..a72f13ae2edb 100644
--- a/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86/feature_msrs_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c
index 762628f7d4ba..762628f7d4ba 100644
--- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
+++ b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86/flds_emulation.h
index 37b1a9f52864..37b1a9f52864 100644
--- a/tools/testing/selftests/kvm/x86_64/flds_emulation.h
+++ b/tools/testing/selftests/kvm/x86/flds_emulation.h
diff --git a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c
index 10b1b0ba374e..10b1b0ba374e 100644
--- a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c
+++ b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86/hyperv_clock.c
index e058bc676cd6..e058bc676cd6 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_clock.c
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
index 4f5881d4ef66..4e920705681a 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
@@ -41,13 +41,19 @@ static bool smt_possible(void)
return res;
}
-static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
- bool evmcs_expected)
+static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
{
+ const bool has_irqchip = !vcpu || vcpu->vm->has_irqchip;
+ const struct kvm_cpuid2 *hv_cpuid_entries;
int i;
int nent_expected = 10;
u32 test_val;
+ if (vcpu)
+ hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
+ else
+ hv_cpuid_entries = kvm_get_supported_hv_cpuid();
+
TEST_ASSERT(hv_cpuid_entries->nent == nent_expected,
"KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
" (returned %d)",
@@ -80,12 +86,19 @@ static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
entry->eax, evmcs_expected
);
break;
+ case 0x40000003:
+ TEST_ASSERT(has_irqchip || !(entry->edx & BIT(19)),
+ "\"Direct\" Synthetic Timers should require in-kernel APIC");
+ break;
case 0x40000004:
test_val = entry->eax & (1UL << 18);
TEST_ASSERT(!!test_val == !smt_possible(),
"NoNonArchitecturalCoreSharing bit"
" doesn't reflect SMT setting");
+
+ TEST_ASSERT(has_irqchip || !(entry->eax & BIT(10)),
+ "Cluster IPI (i.e. SEND_IPI) should require in-kernel APIC");
break;
case 0x4000000A:
TEST_ASSERT(entry->eax & (1UL << 19),
@@ -109,9 +122,16 @@ static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
* entry->edx);
*/
}
+
+ /*
+ * Note, the CPUID array returned by the system-scoped helper is a one-
+ * time allocation, i.e. must not be freed.
+ */
+ if (vcpu)
+ free((void *)hv_cpuid_entries);
}
-void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+static void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
static struct kvm_cpuid2 cpuid = {.nent = 0};
int ret;
@@ -129,19 +149,20 @@ void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
int main(int argc, char *argv[])
{
struct kvm_vm *vm;
- const struct kvm_cpuid2 *hv_cpuid_entries;
struct kvm_vcpu *vcpu;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ /* Test the vCPU ioctl without an in-kernel local APIC. */
+ vm = vm_create_barebones();
+ vcpu = __vm_vcpu_add(vm, 0);
+ test_hv_cpuid(vcpu, false);
+ kvm_vm_free(vm);
/* Test vCPU ioctl version */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
test_hv_cpuid_e2big(vm, vcpu);
-
- hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
- test_hv_cpuid(hv_cpuid_entries, false);
- free((void *)hv_cpuid_entries);
+ test_hv_cpuid(vcpu, false);
if (!kvm_cpu_has(X86_FEATURE_VMX) ||
!kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
@@ -149,9 +170,7 @@ int main(int argc, char *argv[])
goto do_sys;
}
vcpu_enable_evmcs(vcpu);
- hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
- test_hv_cpuid(hv_cpuid_entries, true);
- free((void *)hv_cpuid_entries);
+ test_hv_cpuid(vcpu, true);
do_sys:
/* Test system ioctl version */
@@ -161,9 +180,7 @@ do_sys:
}
test_hv_cpuid_e2big(vm, NULL);
-
- hv_cpuid_entries = kvm_get_supported_hv_cpuid();
- test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX));
+ test_hv_cpuid(NULL, kvm_cpu_has(X86_FEATURE_VMX));
out:
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c
index 74cf19661309..74cf19661309 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c
index 949e08e98f31..949e08e98f31 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c
index 068e9c69710d..068e9c69710d 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_features.c
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
index 22c0c124582f..2b5b4bc6ef7e 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
@@ -63,8 +63,10 @@ static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
/* Signal sender vCPU we're ready */
ipis_rcvd[vcpu_id] = (u64)-1;
- for (;;)
- asm volatile("sti; hlt; cli");
+ for (;;) {
+ safe_halt();
+ cli();
+ }
}
static void guest_ipi_handler(struct ex_regs *regs)
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c
index 0ddb63229bcb..0ddb63229bcb 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
index 077cd0ec3040..077cd0ec3040 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86/kvm_clock_test.c
index 5bc12222d87a..5bc12222d87a 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
+++ b/tools/testing/selftests/kvm/x86/kvm_clock_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c
index 78878b3a2725..1b805cbdb47b 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
+++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c
@@ -139,10 +139,12 @@ static void test_pv_unhalt(void)
struct kvm_vm *vm;
struct kvm_cpuid_entry2 *ent;
u32 kvm_sig_old;
+ int r;
- pr_info("testing KVM_FEATURE_PV_UNHALT\n");
+ if (!(kvm_check_cap(KVM_CAP_X86_DISABLE_EXITS) & KVM_X86_DISABLE_EXITS_HLT))
+ return;
- TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS);
+ pr_info("testing KVM_FEATURE_PV_UNHALT\n");
/* KVM_PV_UNHALT test */
vm = vm_create_with_one_vcpu(&vcpu, guest_main);
@@ -151,19 +153,45 @@ static void test_pv_unhalt(void)
TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
"Enabling X86_FEATURE_KVM_PV_UNHALT had no effect");
- /* Make sure KVM clears vcpu->arch.kvm_cpuid */
+ /* Verify KVM disallows disabling exits after vCPU creation. */
+ r = __vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+ TEST_ASSERT(r && errno == EINVAL,
+ "Disabling exits after vCPU creation didn't fail as expected");
+
+ kvm_vm_free(vm);
+
+ /* Verify that KVM clear PV_UNHALT from guest CPUID. */
+ vm = vm_create(1);
+ vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+
+ vcpu = vm_vcpu_add(vm, 0, NULL);
+ TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "vCPU created with PV_UNHALT set by default");
+
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
+ TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "PV_UNHALT set in guest CPUID when HLT-exiting is disabled");
+
+ /*
+ * Clobber the KVM PV signature and verify KVM does NOT clear PV_UNHALT
+ * when KVM PV is not present, and DOES clear PV_UNHALT when switching
+ * back to the correct signature..
+ */
ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
kvm_sig_old = ent->ebx;
ent->ebx = 0xdeadbeef;
vcpu_set_cpuid(vcpu);
- vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
+ TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "PV_UNHALT cleared when using bogus KVM PV signature");
+
ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
ent->ebx = kvm_sig_old;
vcpu_set_cpuid(vcpu);
TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
- "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS");
+ "PV_UNHALT set in guest CPUID when HLT-exiting is disabled");
/* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */
diff --git a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c
index 7e2bfb3c3f3b..7e2bfb3c3f3b 100644
--- a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c
+++ b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
index 2b550eff35f1..2b550eff35f1 100644
--- a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
+++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
diff --git a/tools/testing/selftests/kvm/x86/nested_emulation_test.c b/tools/testing/selftests/kvm/x86/nested_emulation_test.c
new file mode 100644
index 000000000000..abc824dba04f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_emulation_test.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+enum {
+ SVM_F,
+ VMX_F,
+ NR_VIRTUALIZATION_FLAVORS,
+};
+
+struct emulated_instruction {
+ const char name[32];
+ uint8_t opcode[15];
+ uint32_t exit_reason[NR_VIRTUALIZATION_FLAVORS];
+};
+
+static struct emulated_instruction instructions[] = {
+ {
+ .name = "pause",
+ .opcode = { 0xf3, 0x90 },
+ .exit_reason = { SVM_EXIT_PAUSE,
+ EXIT_REASON_PAUSE_INSTRUCTION, }
+ },
+ {
+ .name = "hlt",
+ .opcode = { 0xf4 },
+ .exit_reason = { SVM_EXIT_HLT,
+ EXIT_REASON_HLT, }
+ },
+};
+
+static uint8_t kvm_fep[] = { 0x0f, 0x0b, 0x6b, 0x76, 0x6d }; /* ud2 ; .ascii "kvm" */
+static uint8_t l2_guest_code[sizeof(kvm_fep) + 15];
+static uint8_t *l2_instruction = &l2_guest_code[sizeof(kvm_fep)];
+
+static uint32_t get_instruction_length(struct emulated_instruction *insn)
+{
+ uint32_t i;
+
+ for (i = 0; i < ARRAY_SIZE(insn->opcode) && insn->opcode[i]; i++)
+ ;
+
+ return i;
+}
+
+static void guest_code(void *test_data)
+{
+ int f = this_cpu_has(X86_FEATURE_SVM) ? SVM_F : VMX_F;
+ int i;
+
+ memcpy(l2_guest_code, kvm_fep, sizeof(kvm_fep));
+
+ if (f == SVM_F) {
+ struct svm_test_data *svm = test_data;
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, NULL, NULL);
+ vmcb->save.idtr.limit = 0;
+ vmcb->save.rip = (u64)l2_guest_code;
+
+ vmcb->control.intercept |= BIT_ULL(INTERCEPT_SHUTDOWN) |
+ BIT_ULL(INTERCEPT_PAUSE) |
+ BIT_ULL(INTERCEPT_HLT);
+ vmcb->control.intercept_exceptions = 0;
+ } else {
+ GUEST_ASSERT(prepare_for_vmx_operation(test_data));
+ GUEST_ASSERT(load_vmcs(test_data));
+
+ prepare_vmcs(test_data, NULL, NULL);
+ GUEST_ASSERT(!vmwrite(GUEST_IDTR_LIMIT, 0));
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+ GUEST_ASSERT(!vmwrite(EXCEPTION_BITMAP, 0));
+
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
+ CPU_BASED_PAUSE_EXITING |
+ CPU_BASED_HLT_EXITING);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(instructions); i++) {
+ struct emulated_instruction *insn = &instructions[i];
+ uint32_t insn_len = get_instruction_length(insn);
+ uint32_t exit_insn_len;
+ u32 exit_reason;
+
+ /*
+ * Copy the target instruction to the L2 code stream, and fill
+ * the remaining bytes with INT3s so that a missed intercept
+ * results in a consistent failure mode (SHUTDOWN).
+ */
+ memcpy(l2_instruction, insn->opcode, insn_len);
+ memset(l2_instruction + insn_len, 0xcc, sizeof(insn->opcode) - insn_len);
+
+ if (f == SVM_F) {
+ struct svm_test_data *svm = test_data;
+ struct vmcb *vmcb = svm->vmcb;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ exit_reason = vmcb->control.exit_code;
+ exit_insn_len = vmcb->control.next_rip - vmcb->save.rip;
+ GUEST_ASSERT_EQ(vmcb->save.rip, (u64)l2_instruction);
+ } else {
+ GUEST_ASSERT_EQ(i ? vmresume() : vmlaunch(), 0);
+ exit_reason = vmreadz(VM_EXIT_REASON);
+ exit_insn_len = vmreadz(VM_EXIT_INSTRUCTION_LEN);
+ GUEST_ASSERT_EQ(vmreadz(GUEST_RIP), (u64)l2_instruction);
+ }
+
+ __GUEST_ASSERT(exit_reason == insn->exit_reason[f],
+ "Wanted exit_reason '0x%x' for '%s', got '0x%x'",
+ insn->exit_reason[f], insn->name, exit_reason);
+
+ __GUEST_ASSERT(exit_insn_len == insn_len,
+ "Wanted insn_len '%u' for '%s', got '%u'",
+ insn_len, insn->name, exit_insn_len);
+ }
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(is_forced_emulation_enabled);
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
+
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
index 3eb0313ffa39..3641a42934ac 100644
--- a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
@@ -85,6 +85,7 @@ static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector,
GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector));
GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code);
+ GUEST_ASSERT(!ctrl->int_state);
}
static void l1_svm_code(struct svm_test_data *svm)
@@ -122,6 +123,7 @@ static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code)
GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector);
GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code);
+ GUEST_ASSERT(!vmreadz(GUEST_INTERRUPTIBILITY_INFO));
}
static void l1_vmx_code(struct vmx_pages *vmx)
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
index e7efb2b35f8b..c0d84827f736 100644
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
@@ -73,7 +73,7 @@ static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
{
int actual_pages_2m;
- actual_pages_2m = vm_get_stat(vm, "pages_2m");
+ actual_pages_2m = vm_get_stat(vm, pages_2m);
TEST_ASSERT(actual_pages_2m == expected_pages_2m,
"Unexpected 2m page count. Expected %d, got %d",
@@ -84,7 +84,7 @@ static void check_split_count(struct kvm_vm *vm, int expected_splits)
{
int actual_splits;
- actual_splits = vm_get_stat(vm, "nx_lpage_splits");
+ actual_splits = vm_get_stat(vm, nx_lpage_splits);
TEST_ASSERT(actual_splits == expected_splits,
"Unexpected NX huge page split count. Expected %d, got %d",
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh
index caad084b8bfd..caad084b8bfd 100755
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
+++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86/platform_info_test.c
index 9cbf283ebc55..9cbf283ebc55 100644
--- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86/platform_info_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
index 698cb36989db..8aaaf25b6111 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
@@ -17,7 +17,7 @@
* Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
* 1 LOOP.
*/
-#define NUM_INSNS_PER_LOOP 3
+#define NUM_INSNS_PER_LOOP 4
/*
* Number of "extra" instructions that will be counted, i.e. the number of
@@ -29,10 +29,59 @@
/* Total number of instructions retired within the measured section. */
#define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
+/* Track which architectural events are supported by hardware. */
+static uint32_t hardware_pmu_arch_events;
static uint8_t kvm_pmu_version;
static bool kvm_has_perf_caps;
+#define X86_PMU_FEATURE_NULL \
+({ \
+ struct kvm_x86_pmu_feature feature = {}; \
+ \
+ feature; \
+})
+
+static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
+{
+ return !(*(u64 *)&event);
+}
+
+struct kvm_intel_pmu_event {
+ struct kvm_x86_pmu_feature gp_event;
+ struct kvm_x86_pmu_feature fixed_event;
+};
+
+/*
+ * Wrap the array to appease the compiler, as the macros used to construct each
+ * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the
+ * compiler often thinks the feature definitions aren't compile-time constants.
+ */
+static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx)
+{
+ const struct kvm_intel_pmu_event __intel_event_to_feature[] = {
+ [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
+ [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
+ /*
+ * Note, the fixed counter for reference cycles is NOT the same as the
+ * general purpose architectural event. The fixed counter explicitly
+ * counts at the same frequency as the TSC, whereas the GP event counts
+ * at a fixed, but uarch specific, frequency. Bundle them here for
+ * simplicity.
+ */
+ [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
+ [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
+ };
+
+ kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS);
+
+ return __intel_event_to_feature[idx];
+}
+
static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
void *guest_code,
uint8_t pmu_version,
@@ -42,6 +91,7 @@ static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
vm = vm_create_with_one_vcpu(vcpu, guest_code);
sync_global_to_guest(vm, kvm_pmu_version);
+ sync_global_to_guest(vm, hardware_pmu_arch_events);
/*
* Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
@@ -98,14 +148,12 @@ static uint8_t guest_get_pmu_version(void)
* Sanity check that in all cases, the event doesn't count when it's disabled,
* and that KVM correctly emulates the write of an arbitrary value.
*/
-static void guest_assert_event_count(uint8_t idx,
- struct kvm_x86_pmu_feature event,
- uint32_t pmc, uint32_t pmc_msr)
+static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr)
{
uint64_t count;
count = _rdpmc(pmc);
- if (!this_pmu_has(event))
+ if (!(hardware_pmu_arch_events & BIT(idx)))
goto sanity_checks;
switch (idx) {
@@ -126,7 +174,9 @@ static void guest_assert_event_count(uint8_t idx,
GUEST_ASSERT_NE(count, 0);
break;
case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
- GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+ __GUEST_ASSERT(count >= NUM_INSNS_RETIRED,
+ "Expected top-down slots >= %u, got count = %lu",
+ NUM_INSNS_RETIRED, count);
break;
default:
break;
@@ -162,75 +212,42 @@ do { \
"1:\n\t" \
clflush "\n\t" \
"mfence\n\t" \
+ "mov %[m], %%eax\n\t" \
FEP "loop 1b\n\t" \
FEP "mov %%edi, %%ecx\n\t" \
FEP "xor %%eax, %%eax\n\t" \
FEP "xor %%edx, %%edx\n\t" \
"wrmsr\n\t" \
:: "a"((uint32_t)_value), "d"(_value >> 32), \
- "c"(_msr), "D"(_msr) \
+ "c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version) \
); \
} while (0)
-#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
+#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
do { \
- wrmsr(pmc_msr, 0); \
+ wrmsr(_pmc_msr, 0); \
\
if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
- GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP); \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \
else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
- GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP); \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \
else \
GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
\
- guest_assert_event_count(_idx, _event, _pmc, _pmc_msr); \
+ guest_assert_event_count(_idx, _pmc, _pmc_msr); \
} while (0)
-static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
- uint32_t pmc, uint32_t pmc_msr,
+static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr,
uint32_t ctrl_msr, uint64_t ctrl_msr_value)
{
- GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
+ GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
if (is_forced_emulation_enabled)
- GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
-}
-
-#define X86_PMU_FEATURE_NULL \
-({ \
- struct kvm_x86_pmu_feature feature = {}; \
- \
- feature; \
-})
-
-static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
-{
- return !(*(u64 *)&event);
+ GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
}
static void guest_test_arch_event(uint8_t idx)
{
- const struct {
- struct kvm_x86_pmu_feature gp_event;
- struct kvm_x86_pmu_feature fixed_event;
- } intel_event_to_feature[] = {
- [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
- [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
- /*
- * Note, the fixed counter for reference cycles is NOT the same
- * as the general purpose architectural event. The fixed counter
- * explicitly counts at the same frequency as the TSC, whereas
- * the GP event counts at a fixed, but uarch specific, frequency.
- * Bundle them here for simplicity.
- */
- [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
- [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
- };
-
uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
uint32_t pmu_version = guest_get_pmu_version();
/* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
@@ -248,7 +265,7 @@ static void guest_test_arch_event(uint8_t idx)
else
base_pmc_msr = MSR_IA32_PERFCTR0;
- gp_event = intel_event_to_feature[idx].gp_event;
+ gp_event = intel_event_to_feature(idx).gp_event;
GUEST_ASSERT_EQ(idx, gp_event.f.bit);
GUEST_ASSERT(nr_gp_counters);
@@ -262,14 +279,14 @@ static void guest_test_arch_event(uint8_t idx)
if (guest_has_perf_global_ctrl)
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
- __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
+ __guest_test_arch_event(idx, i, base_pmc_msr + i,
MSR_P6_EVNTSEL0 + i, eventsel);
}
if (!guest_has_perf_global_ctrl)
return;
- fixed_event = intel_event_to_feature[idx].fixed_event;
+ fixed_event = intel_event_to_feature(idx).fixed_event;
if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
return;
@@ -277,7 +294,7 @@ static void guest_test_arch_event(uint8_t idx)
wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
- __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
+ __guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED,
MSR_CORE_PERF_FIXED_CTR0 + i,
MSR_CORE_PERF_GLOBAL_CTRL,
FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
@@ -331,9 +348,9 @@ __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \
expect_gp ? "#GP" : "no fault", msr, vector) \
#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \
- __GUEST_ASSERT(val == expected_val, \
+ __GUEST_ASSERT(val == expected, \
"Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \
- msr, expected_val, val);
+ msr, expected, val);
static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
uint64_t expected_val)
@@ -545,7 +562,6 @@ static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
static void test_intel_counters(void)
{
- uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
@@ -567,18 +583,26 @@ static void test_intel_counters(void)
/*
* Detect the existence of events that aren't supported by selftests.
- * This will (obviously) fail any time the kernel adds support for a
- * new event, but it's worth paying that price to keep the test fresh.
+ * This will (obviously) fail any time hardware adds support for a new
+ * event, but it's worth paying that price to keep the test fresh.
*/
- TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
+ TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS,
"New architectural event(s) detected; please update this test (length = %u, mask = %x)",
- nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
+ this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH),
+ this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
/*
- * Force iterating over known arch events regardless of whether or not
- * KVM/hardware supports a given event.
+ * Iterate over known arch events irrespective of KVM/hardware support
+ * to verify that KVM doesn't reject programming of events just because
+ * the *architectural* encoding is unsupported. Track which events are
+ * supported in hardware; the guest side will validate supported events
+ * count correctly, even if *enumeration* of the event is unsupported
+ * by KVM and/or isn't exposed to the guest.
*/
- nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
+ for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) {
+ if (this_pmu_has(intel_event_to_feature(i).gp_event))
+ hardware_pmu_arch_events |= BIT(i);
+ }
for (v = 0; v <= max_pmu_version; v++) {
for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
@@ -594,8 +618,8 @@ static void test_intel_counters(void)
* vector length.
*/
if (v == pmu_version) {
- for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
- test_arch_events(v, perf_caps[i], nr_arch_events, k);
+ for (k = 1; k < (BIT(NR_INTEL_ARCH_EVENTS) - 1); k++)
+ test_arch_events(v, perf_caps[i], NR_INTEL_ARCH_EVENTS, k);
}
/*
* Test single bits for all PMU version and lengths up
@@ -604,11 +628,11 @@ static void test_intel_counters(void)
* host length). Explicitly test a mask of '0' and all
* ones i.e. all events being available and unavailable.
*/
- for (j = 0; j <= nr_arch_events + 1; j++) {
+ for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) {
test_arch_events(v, perf_caps[i], j, 0);
test_arch_events(v, perf_caps[i], j, 0xff);
- for (k = 0; k < nr_arch_events; k++)
+ for (k = 0; k < NR_INTEL_ARCH_EVENTS; k++)
test_arch_events(v, perf_caps[i], j, BIT(k));
}
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
index c15513cd74d1..c15513cd74d1 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
index 82a8d88b5338..82a8d88b5338 100644
--- a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c
+++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
index 13e72fcec8dd..13e72fcec8dd 100644
--- a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c
+++ b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c
index cbc92a862ea9..cbc92a862ea9 100644
--- a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c
+++ b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c
index 49913784bc82..49913784bc82 100644
--- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
+++ b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c
diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86/set_sregs_test.c
index c021c0795a96..f4095a3d1278 100644
--- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86/set_sregs_test.c
@@ -41,13 +41,15 @@ do { \
TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \
} while (0)
+#define KVM_ALWAYS_ALLOWED_CR4 (X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \
+ X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \
+ X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \
+ X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT)
+
static uint64_t calc_supported_cr4_feature_bits(void)
{
- uint64_t cr4;
+ uint64_t cr4 = KVM_ALWAYS_ALLOWED_CR4;
- cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE |
- X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE |
- X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
if (kvm_cpu_has(X86_FEATURE_UMIP))
cr4 |= X86_CR4_UMIP;
if (kvm_cpu_has(X86_FEATURE_LA57))
@@ -72,36 +74,31 @@ static uint64_t calc_supported_cr4_feature_bits(void)
return cr4;
}
-int main(int argc, char *argv[])
+static void test_cr_bits(struct kvm_vcpu *vcpu, uint64_t cr4)
{
struct kvm_sregs sregs;
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- uint64_t cr4;
int rc, i;
- /*
- * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
- * use it to verify all supported CR4 bits can be set prior to defining
- * the vCPU model, i.e. without doing KVM_SET_CPUID2.
- */
- vm = vm_create_barebones();
- vcpu = __vm_vcpu_add(vm, 0);
-
vcpu_sregs_get(vcpu, &sregs);
-
- sregs.cr0 = 0;
- sregs.cr4 |= calc_supported_cr4_feature_bits();
- cr4 = sregs.cr4;
-
+ sregs.cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+ sregs.cr4 |= cr4;
rc = _vcpu_sregs_set(vcpu, &sregs);
TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
+ TEST_ASSERT(!!(sregs.cr4 & X86_CR4_OSXSAVE) ==
+ (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSXSAVE)),
+ "KVM didn't %s OSXSAVE in CPUID as expected",
+ (sregs.cr4 & X86_CR4_OSXSAVE) ? "set" : "clear");
+
+ TEST_ASSERT(!!(sregs.cr4 & X86_CR4_PKE) ==
+ (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSPKE)),
+ "KVM didn't %s OSPKE in CPUID as expected",
+ (sregs.cr4 & X86_CR4_PKE) ? "set" : "clear");
+
vcpu_sregs_get(vcpu, &sregs);
TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
sregs.cr4, cr4);
- /* Verify all unsupported features are rejected by KVM. */
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP);
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57);
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE);
@@ -119,10 +116,28 @@ int main(int argc, char *argv[])
/* NW without CD is illegal, as is PG without PE. */
TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW);
TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_sregs sregs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+ /*
+ * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
+ * use it to verify KVM enforces guest CPUID even if *userspace* never
+ * sets CPUID.
+ */
+ vm = vm_create_barebones();
+ vcpu = __vm_vcpu_add(vm, 0);
+ test_cr_bits(vcpu, KVM_ALWAYS_ALLOWED_CR4);
kvm_vm_free(vm);
- /* Create a "real" VM and verify APIC_BASE can be set. */
+ /* Create a "real" VM with a fully populated guest CPUID and verify
+ * APIC_BASE and all supported CR4 can be set.
+ */
vm = vm_create_with_one_vcpu(&vcpu, NULL);
vcpu_sregs_get(vcpu, &sregs);
@@ -135,6 +150,8 @@ int main(int argc, char *argv[])
TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
sregs.apic_base);
+ test_cr_bits(vcpu, calc_supported_cr4_feature_bits());
+
kvm_vm_free(vm);
return 0;
diff --git a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
index 3fb967f40c6a..3fb967f40c6a 100644
--- a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c
+++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
index 0a6dfba3905b..0a6dfba3905b 100644
--- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
+++ b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
index ae77698e6e97..d97816dc476a 100644
--- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
+++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
@@ -52,7 +52,8 @@ static void compare_xsave(u8 *from_host, u8 *from_guest)
bool bad = false;
for (i = 0; i < 4095; i++) {
if (from_host[i] != from_guest[i]) {
- printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]);
+ printf("mismatch at %u | %02hhx %02hhx\n",
+ i, from_host[i], from_guest[i]);
bad = true;
}
}
@@ -155,7 +156,7 @@ static void guest_shutdown_code(void)
/* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */
memset(&idt, 0, sizeof(idt));
- __asm__ __volatile__("lidt %0" :: "m"(idt));
+ set_idt(&idt);
__asm__ __volatile__("ud2");
}
diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
index fabeeaddfb3a..fabeeaddfb3a 100644
--- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
+++ b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c
index 55c88d664a94..55c88d664a94 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86/smm_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c
index 141b7fc0c965..141b7fc0c965 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86/state_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
index 916e04248fbb..917b6066cfc1 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
@@ -42,10 +42,7 @@ static void l2_guest_code(struct svm_test_data *svm)
x2apic_write_reg(APIC_ICR,
APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
- __asm__ __volatile__(
- "sti\n"
- "nop\n"
- );
+ sti_nop();
GUEST_ASSERT(vintr_irq_called);
GUEST_ASSERT(intr_irq_called);
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c
index 00135cbba35e..00135cbba35e 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
index 7b6481d6c0d3..7b6481d6c0d3 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c
index 8a62cca28cfb..8a62cca28cfb 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86/sync_regs_test.c
index 8fa3948b0170..8fa3948b0170 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86/sync_regs_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c
index 56306a19144a..56306a19144a 100644
--- a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
+++ b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c
index 12b0964f4f13..12b0964f4f13 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c
index 59c7304f805e..59c7304f805e 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
+++ b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c
diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c
index 57f157c06b39..1e5e564523b3 100644
--- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
+++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c
@@ -86,7 +86,7 @@ static void ucna_injection_guest_code(void)
wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
/* Enables interrupt in guest. */
- asm volatile("sti");
+ sti();
/* Let user space inject the first UCNA */
GUEST_SYNC(SYNC_FIRST_UCNA);
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c
index 9481cbcf284f..9481cbcf284f 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
+++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
index 32b2794b78fe..32b2794b78fe 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c
index a81a24761aac..a81a24761aac 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c
index dad988351493..dad988351493 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
index fa512d033205..fa512d033205 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
index 3fd6eceab46f..3fd6eceab46f 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
+++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c
index a100ee5f0009..a100ee5f0009 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
+++ b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c
index 90720b6205f4..90720b6205f4 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c
index 1759fa5cb3f2..1759fa5cb3f2 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
index a1f5ff45d518..a1f5ff45d518 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c
index 00dd2ac07a61..00dd2ac07a61 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c
index 67a62a5a8895..67a62a5a8895 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c
index 2ceb5c78c442..2ceb5c78c442 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
index a76078a08ff8..35cb9de54a82 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
@@ -106,7 +106,8 @@ static void halter_guest_code(struct test_data_page *data)
data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
data->hlt_count++;
- asm volatile("sti; hlt; cli");
+ safe_halt();
+ cli();
data->wake_count++;
}
}
@@ -465,6 +466,19 @@ int main(int argc, char *argv[])
cancel_join_vcpu_thread(threads[0], params[0].vcpu);
cancel_join_vcpu_thread(threads[1], params[1].vcpu);
+ /*
+ * If the host support Idle HLT, i.e. KVM *might* be using Idle HLT,
+ * then the number of HLT exits may be less than the number of HLTs
+ * that were executed, as Idle HLT elides the exit if the vCPU has an
+ * unmasked, pending IRQ (or NMI).
+ */
+ if (this_cpu_has(X86_FEATURE_IDLE_HLT))
+ TEST_ASSERT(data->hlt_count >= vcpu_get_stat(params[0].vcpu, halt_exits),
+ "HLT insns = %lu, HLT exits = %lu",
+ data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
+ else
+ TEST_ASSERT_EQ(data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
+
fprintf(stderr,
"Test successful after running for %d seconds.\n"
"Sending vCPU sent %lu IPIs to halting vCPU\n"
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c
index 88bcca188799..fdebff1165c7 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c
@@ -18,7 +18,7 @@ struct xapic_vcpu {
static void xapic_guest_code(void)
{
- asm volatile("cli");
+ cli();
xapic_enable();
@@ -38,7 +38,7 @@ static void xapic_guest_code(void)
static void x2apic_guest_code(void)
{
- asm volatile("cli");
+ cli();
x2apic_enable();
diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
index c8a5c5e51661..c8a5c5e51661 100644
--- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
index a59b3c799bb2..287829f850f7 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
@@ -191,10 +191,7 @@ static void guest_code(void)
struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
int i;
- __asm__ __volatile__(
- "sti\n"
- "nop\n"
- );
+ sti_nop();
/* Trigger an interrupt injection */
GUEST_SYNC(TEST_INJECT_VECTOR);
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c
index 2585087cdf5c..2585087cdf5c 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86/xss_msr_test.c
index f331a4e9bae3..f331a4e9bae3 100644
--- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
+++ b/tools/testing/selftests/kvm/x86/xss_msr_test.c
diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore
index 470203a7cd73..a820329cae0d 100644
--- a/tools/testing/selftests/landlock/.gitignore
+++ b/tools/testing/selftests/landlock/.gitignore
@@ -1,2 +1,5 @@
/*_test
+/sandbox-and-launch
/true
+/wait-pipe
+/wait-pipe-sandbox
diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile
index 5cb0828f0514..a3f449914bf9 100644
--- a/tools/testing/selftests/landlock/Makefile
+++ b/tools/testing/selftests/landlock/Makefile
@@ -10,7 +10,11 @@ src_test := $(wildcard *_test.c)
TEST_GEN_PROGS := $(src_test:.c=)
-TEST_GEN_PROGS_EXTENDED := true sandbox-and-launch wait-pipe
+TEST_GEN_PROGS_EXTENDED := \
+ true \
+ sandbox-and-launch \
+ wait-pipe \
+ wait-pipe-sandbox
# Short targets:
$(TEST_GEN_PROGS): LDLIBS += -lcap -lpthread
diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h
new file mode 100644
index 000000000000..b9054086a0c9
--- /dev/null
+++ b/tools/testing/selftests/landlock/audit.h
@@ -0,0 +1,472 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Landlock audit helpers
+ *
+ * Copyright © 2024-2025 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/audit.h>
+#include <linux/limits.h>
+#include <linux/netlink.h>
+#include <regex.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+#ifndef __maybe_unused
+#define __maybe_unused __attribute__((__unused__))
+#endif
+
+#define REGEX_LANDLOCK_PREFIX "^audit([0-9.:]\\+): domain=\\([0-9a-f]\\+\\)"
+
+struct audit_filter {
+ __u32 record_type;
+ size_t exe_len;
+ char exe[PATH_MAX];
+};
+
+struct audit_message {
+ struct nlmsghdr header;
+ union {
+ struct audit_status status;
+ struct audit_features features;
+ struct audit_rule_data rule;
+ struct nlmsgerr err;
+ char data[PATH_MAX + 200];
+ };
+};
+
+static const struct timeval audit_tv_dom_drop = {
+ /*
+ * Because domain deallocation is tied to asynchronous credential
+ * freeing, receiving such event may take some time. In practice,
+ * on a small VM, it should not exceed 100k usec, but let's wait up
+ * to 1 second to be safe.
+ */
+ .tv_sec = 1,
+};
+
+static const struct timeval audit_tv_default = {
+ .tv_usec = 1,
+};
+
+static int audit_send(const int fd, const struct audit_message *const msg)
+{
+ struct sockaddr_nl addr = {
+ .nl_family = AF_NETLINK,
+ };
+ int ret;
+
+ do {
+ ret = sendto(fd, msg, msg->header.nlmsg_len, 0,
+ (struct sockaddr *)&addr, sizeof(addr));
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret < 0)
+ return -errno;
+
+ if (ret != msg->header.nlmsg_len)
+ return -E2BIG;
+
+ return 0;
+}
+
+static int audit_recv(const int fd, struct audit_message *msg)
+{
+ struct sockaddr_nl addr;
+ socklen_t addrlen = sizeof(addr);
+ struct audit_message msg_tmp;
+ int err;
+
+ if (!msg)
+ msg = &msg_tmp;
+
+ do {
+ err = recvfrom(fd, msg, sizeof(*msg), 0,
+ (struct sockaddr *)&addr, &addrlen);
+ } while (err < 0 && errno == EINTR);
+
+ if (err < 0)
+ return -errno;
+
+ if (addrlen != sizeof(addr) || addr.nl_pid != 0)
+ return -EINVAL;
+
+ /* Checks Netlink error or end of messages. */
+ if (msg->header.nlmsg_type == NLMSG_ERROR)
+ return msg->err.error;
+
+ return 0;
+}
+
+static int audit_request(const int fd,
+ const struct audit_message *const request,
+ struct audit_message *reply)
+{
+ struct audit_message msg_tmp;
+ bool first_reply = true;
+ int err;
+
+ err = audit_send(fd, request);
+ if (err)
+ return err;
+
+ if (!reply)
+ reply = &msg_tmp;
+
+ do {
+ if (first_reply)
+ first_reply = false;
+ else
+ reply = &msg_tmp;
+
+ err = audit_recv(fd, reply);
+ if (err)
+ return err;
+ } while (reply->header.nlmsg_type != NLMSG_ERROR &&
+ reply->err.msg.nlmsg_type != request->header.nlmsg_type);
+
+ return reply->err.error;
+}
+
+static int audit_filter_exe(const int audit_fd,
+ const struct audit_filter *const filter,
+ const __u16 type)
+{
+ struct audit_message msg = {
+ .header = {
+ .nlmsg_len = NLMSG_SPACE(sizeof(msg.rule)) +
+ NLMSG_ALIGN(filter->exe_len),
+ .nlmsg_type = type,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+ },
+ .rule = {
+ .flags = AUDIT_FILTER_EXCLUDE,
+ .action = AUDIT_NEVER,
+ .field_count = 1,
+ .fields[0] = filter->record_type,
+ .fieldflags[0] = AUDIT_NOT_EQUAL,
+ .values[0] = filter->exe_len,
+ .buflen = filter->exe_len,
+ }
+ };
+
+ if (filter->record_type != AUDIT_EXE)
+ return -EINVAL;
+
+ memcpy(msg.rule.buf, filter->exe, filter->exe_len);
+ return audit_request(audit_fd, &msg, NULL);
+}
+
+static int audit_filter_drop(const int audit_fd, const __u16 type)
+{
+ struct audit_message msg = {
+ .header = {
+ .nlmsg_len = NLMSG_SPACE(sizeof(msg.rule)),
+ .nlmsg_type = type,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+ },
+ .rule = {
+ .flags = AUDIT_FILTER_EXCLUDE,
+ .action = AUDIT_NEVER,
+ .field_count = 1,
+ .fields[0] = AUDIT_MSGTYPE,
+ .fieldflags[0] = AUDIT_NOT_EQUAL,
+ .values[0] = AUDIT_LANDLOCK_DOMAIN,
+ }
+ };
+
+ return audit_request(audit_fd, &msg, NULL);
+}
+
+static int audit_set_status(int fd, __u32 key, __u32 val)
+{
+ const struct audit_message msg = {
+ .header = {
+ .nlmsg_len = NLMSG_SPACE(sizeof(msg.status)),
+ .nlmsg_type = AUDIT_SET,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+ },
+ .status = {
+ .mask = key,
+ .enabled = key == AUDIT_STATUS_ENABLED ? val : 0,
+ .pid = key == AUDIT_STATUS_PID ? val : 0,
+ }
+ };
+
+ return audit_request(fd, &msg, NULL);
+}
+
+/* Returns a pointer to the last filled character of @dst, which is `\0`. */
+static __maybe_unused char *regex_escape(const char *const src, char *dst,
+ size_t dst_size)
+{
+ char *d = dst;
+
+ for (const char *s = src; *s; s++) {
+ switch (*s) {
+ case '$':
+ case '*':
+ case '.':
+ case '[':
+ case '\\':
+ case ']':
+ case '^':
+ if (d >= dst + dst_size - 2)
+ return (char *)-ENOMEM;
+
+ *d++ = '\\';
+ *d++ = *s;
+ break;
+ default:
+ if (d >= dst + dst_size - 1)
+ return (char *)-ENOMEM;
+
+ *d++ = *s;
+ }
+ }
+ if (d >= dst + dst_size - 1)
+ return (char *)-ENOMEM;
+
+ *d = '\0';
+ return d;
+}
+
+/*
+ * @domain_id: The domain ID extracted from the audit message (if the first part
+ * of @pattern is REGEX_LANDLOCK_PREFIX). It is set to 0 if the domain ID is
+ * not found.
+ */
+static int audit_match_record(int audit_fd, const __u16 type,
+ const char *const pattern, __u64 *domain_id)
+{
+ struct audit_message msg;
+ int ret, err = 0;
+ bool matches_record = !type;
+ regmatch_t matches[2];
+ regex_t regex;
+
+ ret = regcomp(&regex, pattern, 0);
+ if (ret)
+ return -EINVAL;
+
+ do {
+ memset(&msg, 0, sizeof(msg));
+ err = audit_recv(audit_fd, &msg);
+ if (err)
+ goto out;
+
+ if (msg.header.nlmsg_type == type)
+ matches_record = true;
+ } while (!matches_record);
+
+ ret = regexec(&regex, msg.data, ARRAY_SIZE(matches), matches, 0);
+ if (ret) {
+ printf("DATA: %s\n", msg.data);
+ printf("ERROR: no match for pattern: %s\n", pattern);
+ err = -ENOENT;
+ }
+
+ if (domain_id) {
+ *domain_id = 0;
+ if (matches[1].rm_so != -1) {
+ int match_len = matches[1].rm_eo - matches[1].rm_so;
+ /* The maximal characters of a 2^64 hexadecimal number is 17. */
+ char dom_id[18];
+
+ if (match_len > 0 && match_len < sizeof(dom_id)) {
+ memcpy(dom_id, msg.data + matches[1].rm_so,
+ match_len);
+ dom_id[match_len] = '\0';
+ if (domain_id)
+ *domain_id = strtoull(dom_id, NULL, 16);
+ }
+ }
+ }
+
+out:
+ regfree(&regex);
+ return err;
+}
+
+static int __maybe_unused matches_log_domain_allocated(int audit_fd,
+ __u64 *domain_id)
+{
+ return audit_match_record(
+ audit_fd, AUDIT_LANDLOCK_DOMAIN,
+ REGEX_LANDLOCK_PREFIX
+ " status=allocated mode=enforcing pid=[0-9]\\+ uid=[0-9]\\+"
+ " exe=\"[^\"]\\+\" comm=\".*_test\"$",
+ domain_id);
+}
+
+static int __maybe_unused matches_log_domain_deallocated(
+ int audit_fd, unsigned int num_denials, __u64 *domain_id)
+{
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " status=deallocated denials=%u$";
+ char log_match[sizeof(log_template) + 10];
+ int log_match_len;
+
+ log_match_len = snprintf(log_match, sizeof(log_match), log_template,
+ num_denials);
+ if (log_match_len > sizeof(log_match))
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match,
+ domain_id);
+}
+
+struct audit_records {
+ size_t access;
+ size_t domain;
+};
+
+static int audit_count_records(int audit_fd, struct audit_records *records)
+{
+ struct audit_message msg;
+ int err;
+
+ records->access = 0;
+ records->domain = 0;
+
+ do {
+ memset(&msg, 0, sizeof(msg));
+ err = audit_recv(audit_fd, &msg);
+ if (err) {
+ if (err == -EAGAIN)
+ return 0;
+ else
+ return err;
+ }
+
+ switch (msg.header.nlmsg_type) {
+ case AUDIT_LANDLOCK_ACCESS:
+ records->access++;
+ break;
+ case AUDIT_LANDLOCK_DOMAIN:
+ records->domain++;
+ break;
+ }
+ } while (true);
+
+ return 0;
+}
+
+static int audit_init(void)
+{
+ int fd, err;
+
+ fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT);
+ if (fd < 0)
+ return -errno;
+
+ err = audit_set_status(fd, AUDIT_STATUS_ENABLED, 1);
+ if (err)
+ return err;
+
+ err = audit_set_status(fd, AUDIT_STATUS_PID, getpid());
+ if (err)
+ return err;
+
+ /* Sets a timeout for negative tests. */
+ err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_default,
+ sizeof(audit_tv_default));
+ if (err)
+ return -errno;
+
+ return fd;
+}
+
+static int audit_init_filter_exe(struct audit_filter *filter, const char *path)
+{
+ char *absolute_path = NULL;
+
+ /* It is assume that there is not already filtering rules. */
+ filter->record_type = AUDIT_EXE;
+ if (!path) {
+ filter->exe_len = readlink("/proc/self/exe", filter->exe,
+ sizeof(filter->exe) - 1);
+ if (filter->exe_len < 0)
+ return -errno;
+
+ return 0;
+ }
+
+ absolute_path = realpath(path, NULL);
+ if (!absolute_path)
+ return -errno;
+
+ /* No need for the terminating NULL byte. */
+ filter->exe_len = strlen(absolute_path);
+ if (filter->exe_len > sizeof(filter->exe))
+ return -E2BIG;
+
+ memcpy(filter->exe, absolute_path, filter->exe_len);
+ free(absolute_path);
+ return 0;
+}
+
+static int audit_cleanup(int audit_fd, struct audit_filter *filter)
+{
+ struct audit_filter new_filter;
+
+ if (audit_fd < 0 || !filter) {
+ int err;
+
+ /*
+ * Simulates audit_init_with_exe_filter() when called from
+ * FIXTURE_TEARDOWN_PARENT().
+ */
+ audit_fd = audit_init();
+ if (audit_fd < 0)
+ return audit_fd;
+
+ filter = &new_filter;
+ err = audit_init_filter_exe(filter, NULL);
+ if (err)
+ return err;
+ }
+
+ /* Filters might not be in place. */
+ audit_filter_exe(audit_fd, filter, AUDIT_DEL_RULE);
+ audit_filter_drop(audit_fd, AUDIT_DEL_RULE);
+
+ /*
+ * Because audit_cleanup() might not be called by the test auditd
+ * process, it might not be possible to explicitly set it. Anyway,
+ * AUDIT_STATUS_ENABLED will implicitly be set to 0 when the auditd
+ * process will exit.
+ */
+ return close(audit_fd);
+}
+
+static int audit_init_with_exe_filter(struct audit_filter *filter)
+{
+ int fd, err;
+
+ fd = audit_init();
+ if (fd < 0)
+ return fd;
+
+ err = audit_init_filter_exe(filter, NULL);
+ if (err)
+ return err;
+
+ err = audit_filter_exe(fd, filter, AUDIT_ADD_RULE);
+ if (err)
+ return err;
+
+ return fd;
+}
diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
new file mode 100644
index 000000000000..a0643070c403
--- /dev/null
+++ b/tools/testing/selftests/landlock/audit_test.c
@@ -0,0 +1,551 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - Audit
+ *
+ * Copyright © 2024-2025 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <limits.h>
+#include <linux/landlock.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "audit.h"
+#include "common.h"
+
+static int matches_log_signal(struct __test_metadata *const _metadata,
+ int audit_fd, const pid_t opid, __u64 *domain_id)
+{
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " blockers=scope\\.signal opid=%d ocomm=\"audit_test\"$";
+ char log_match[sizeof(log_template) + 10];
+ int log_match_len;
+
+ log_match_len =
+ snprintf(log_match, sizeof(log_match), log_template, opid);
+ if (log_match_len > sizeof(log_match))
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match,
+ domain_id);
+}
+
+FIXTURE(audit)
+{
+ struct audit_filter audit_filter;
+ int audit_fd;
+ __u64(*domain_stack)[16];
+};
+
+FIXTURE_SETUP(audit)
+{
+ disable_caps(_metadata);
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd)
+ {
+ const char *error_msg;
+
+ /* kill "$(auditctl -s | sed -ne 's/^pid \([0-9]\+\)$/\1/p')" */
+ if (self->audit_fd == -EEXIST)
+ error_msg = "socket already in use (e.g. auditd)";
+ else
+ error_msg = strerror(-self->audit_fd);
+ TH_LOG("Failed to initialize audit: %s", error_msg);
+ }
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ self->domain_stack = mmap(NULL, sizeof(*self->domain_stack),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, self->domain_stack);
+ memset(self->domain_stack, 0, sizeof(*self->domain_stack));
+}
+
+FIXTURE_TEARDOWN(audit)
+{
+ EXPECT_EQ(0, munmap(self->domain_stack, sizeof(*self->domain_stack)));
+
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+TEST_F(audit, layers)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ int status, ruleset_fd, i;
+ __u64 prev_dom = 3;
+ pid_t child;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ for (i = 0; i < ARRAY_SIZE(*self->domain_stack); i++) {
+ __u64 denial_dom = 1;
+ __u64 allocated_dom = 2;
+
+ EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+
+ /* Creates a denial to get the domain ID. */
+ EXPECT_EQ(-1, kill(getppid(), 0));
+ EXPECT_EQ(EPERM, errno);
+ EXPECT_EQ(0,
+ matches_log_signal(_metadata, self->audit_fd,
+ getppid(), &denial_dom));
+ EXPECT_EQ(0, matches_log_domain_allocated(
+ self->audit_fd, &allocated_dom));
+ EXPECT_NE(denial_dom, 1);
+ EXPECT_NE(denial_dom, 0);
+ EXPECT_EQ(denial_dom, allocated_dom);
+
+ /* Checks that the new domain is younger than the previous one. */
+ EXPECT_GT(allocated_dom, prev_dom);
+ prev_dom = allocated_dom;
+ (*self->domain_stack)[i] = allocated_dom;
+ }
+
+ /* Checks that we reached the maximum number of layers. */
+ EXPECT_EQ(-1, landlock_restrict_self(ruleset_fd, 0));
+ EXPECT_EQ(E2BIG, errno);
+
+ /* Updates filter rules to match the drop record. */
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE));
+ EXPECT_EQ(0,
+ audit_filter_exe(self->audit_fd, &self->audit_filter,
+ AUDIT_DEL_RULE));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+
+ /* Purges log from deallocated domains. */
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_dom_drop, sizeof(audit_tv_dom_drop)));
+ for (i = ARRAY_SIZE(*self->domain_stack) - 1; i >= 0; i--) {
+ __u64 deallocated_dom = 2;
+
+ EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1,
+ &deallocated_dom));
+ EXPECT_EQ((*self->domain_stack)[i], deallocated_dom)
+ {
+ TH_LOG("Failed to match domain %llx (#%d)",
+ (*self->domain_stack)[i], i);
+ }
+ }
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_default, sizeof(audit_tv_default)));
+
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+FIXTURE(audit_flags)
+{
+ struct audit_filter audit_filter;
+ int audit_fd;
+ __u64 *domain_id;
+};
+
+FIXTURE_VARIANT(audit_flags)
+{
+ const int restrict_flags;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_flags, default) {
+ /* clang-format on */
+ .restrict_flags = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_flags, same_exec_off) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_flags, subdomains_off) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_flags, cross_exec_on) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON,
+};
+
+FIXTURE_SETUP(audit_flags)
+{
+ disable_caps(_metadata);
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd)
+ {
+ const char *error_msg;
+
+ /* kill "$(auditctl -s | sed -ne 's/^pid \([0-9]\+\)$/\1/p')" */
+ if (self->audit_fd == -EEXIST)
+ error_msg = "socket already in use (e.g. auditd)";
+ else
+ error_msg = strerror(-self->audit_fd);
+ TH_LOG("Failed to initialize audit: %s", error_msg);
+ }
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ self->domain_id = mmap(NULL, sizeof(*self->domain_id),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, self->domain_id);
+ /* Domain IDs are greater or equal to 2^32. */
+ *self->domain_id = 1;
+}
+
+FIXTURE_TEARDOWN(audit_flags)
+{
+ EXPECT_EQ(0, munmap(self->domain_id, sizeof(*self->domain_id)));
+
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+TEST_F(audit_flags, signal)
+{
+ int status;
+ pid_t child;
+ struct audit_records records;
+ __u64 deallocated_dom = 2;
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ int ruleset_fd;
+
+ /* Add filesystem restrictions. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd,
+ variant->restrict_flags));
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ /* First signal checks to test log entries. */
+ EXPECT_EQ(-1, kill(getppid(), 0));
+ EXPECT_EQ(EPERM, errno);
+
+ if (variant->restrict_flags &
+ LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) {
+ EXPECT_EQ(-EAGAIN, matches_log_signal(
+ _metadata, self->audit_fd,
+ getppid(), self->domain_id));
+ EXPECT_EQ(*self->domain_id, 1);
+ } else {
+ __u64 allocated_dom = 3;
+
+ EXPECT_EQ(0, matches_log_signal(
+ _metadata, self->audit_fd,
+ getppid(), self->domain_id));
+
+ /* Checks domain information records. */
+ EXPECT_EQ(0, matches_log_domain_allocated(
+ self->audit_fd, &allocated_dom));
+ EXPECT_NE(*self->domain_id, 1);
+ EXPECT_NE(*self->domain_id, 0);
+ EXPECT_EQ(*self->domain_id, allocated_dom);
+ }
+
+ /* Second signal checks to test audit_count_records(). */
+ EXPECT_EQ(-1, kill(getppid(), 0));
+ EXPECT_EQ(EPERM, errno);
+
+ /* Makes sure there is no superfluous logged records. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ if (variant->restrict_flags &
+ LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) {
+ EXPECT_EQ(0, records.access);
+ } else {
+ EXPECT_EQ(1, records.access);
+ }
+ EXPECT_EQ(0, records.domain);
+
+ /* Updates filter rules to match the drop record. */
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE));
+ EXPECT_EQ(0,
+ audit_filter_exe(self->audit_fd, &self->audit_filter,
+ AUDIT_DEL_RULE));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+
+ if (variant->restrict_flags &
+ LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) {
+ EXPECT_EQ(-EAGAIN,
+ matches_log_domain_deallocated(self->audit_fd, 0,
+ &deallocated_dom));
+ EXPECT_EQ(deallocated_dom, 2);
+ } else {
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_dom_drop,
+ sizeof(audit_tv_dom_drop)));
+ EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 2,
+ &deallocated_dom));
+ EXPECT_NE(deallocated_dom, 2);
+ EXPECT_NE(deallocated_dom, 0);
+ EXPECT_EQ(deallocated_dom, *self->domain_id);
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_default,
+ sizeof(audit_tv_default)));
+ }
+}
+
+static int matches_log_fs_read_root(int audit_fd)
+{
+ return audit_match_record(
+ audit_fd, AUDIT_LANDLOCK_ACCESS,
+ REGEX_LANDLOCK_PREFIX
+ " blockers=fs\\.read_dir path=\"/\" dev=\"[^\"]\\+\" ino=[0-9]\\+$",
+ NULL);
+}
+
+FIXTURE(audit_exec)
+{
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_VARIANT(audit_exec)
+{
+ const int restrict_flags;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_exec, default) {
+ /* clang-format on */
+ .restrict_flags = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_exec, same_exec_off) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_exec, subdomains_off) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_exec, cross_exec_on) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_exec, subdomains_off_and_cross_exec_on) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
+ LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON,
+};
+
+FIXTURE_SETUP(audit_exec)
+{
+ disable_caps(_metadata);
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ self->audit_fd = audit_init();
+ EXPECT_LE(0, self->audit_fd)
+ {
+ const char *error_msg;
+
+ /* kill "$(auditctl -s | sed -ne 's/^pid \([0-9]\+\)$/\1/p')" */
+ if (self->audit_fd == -EEXIST)
+ error_msg = "socket already in use (e.g. auditd)";
+ else
+ error_msg = strerror(-self->audit_fd);
+ TH_LOG("Failed to initialize audit: %s", error_msg);
+ }
+
+ /* Applies test filter for the bin_wait_pipe_sandbox program. */
+ EXPECT_EQ(0, audit_init_filter_exe(&self->audit_filter,
+ bin_wait_pipe_sandbox));
+ EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter,
+ AUDIT_ADD_RULE));
+
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+FIXTURE_TEARDOWN(audit_exec)
+{
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter,
+ AUDIT_DEL_RULE));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, close(self->audit_fd));
+}
+
+TEST_F(audit_exec, signal_and_open)
+{
+ struct audit_records records;
+ int pipe_child[2], pipe_parent[2];
+ char buf_parent;
+ pid_t child;
+ int status;
+
+ ASSERT_EQ(0, pipe2(pipe_child, 0));
+ ASSERT_EQ(0, pipe2(pipe_parent, 0));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ const struct landlock_ruleset_attr layer1 = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ char pipe_child_str[12], pipe_parent_str[12];
+ char *const argv[] = { (char *)bin_wait_pipe_sandbox,
+ pipe_child_str, pipe_parent_str, NULL };
+ int ruleset_fd;
+
+ /* Passes the pipe FDs to the executed binary. */
+ EXPECT_EQ(0, close(pipe_child[0]));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ snprintf(pipe_child_str, sizeof(pipe_child_str), "%d",
+ pipe_child[1]);
+ snprintf(pipe_parent_str, sizeof(pipe_parent_str), "%d",
+ pipe_parent[0]);
+
+ ruleset_fd =
+ landlock_create_ruleset(&layer1, sizeof(layer1), 0);
+ if (ruleset_fd < 0) {
+ perror("Failed to create a ruleset");
+ _exit(1);
+ }
+ prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ if (landlock_restrict_self(ruleset_fd,
+ variant->restrict_flags)) {
+ perror("Failed to restrict self");
+ _exit(1);
+ }
+ close(ruleset_fd);
+
+ ASSERT_EQ(0, execve(argv[0], argv, NULL))
+ {
+ TH_LOG("Failed to execute \"%s\": %s", argv[0],
+ strerror(errno));
+ };
+ _exit(1);
+ return;
+ }
+
+ EXPECT_EQ(0, close(pipe_child[1]));
+ EXPECT_EQ(0, close(pipe_parent[0]));
+
+ /* Waits for the child. */
+ EXPECT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+ /* Tests that there was no denial until now. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+
+ /*
+ * Wait for the child to do a first denied action by layer1 and
+ * sandbox itself with layer2.
+ */
+ EXPECT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+ /* Tests that the audit record only matches the child. */
+ if (variant->restrict_flags & LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON) {
+ /* Matches the current domain. */
+ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
+ getpid(), NULL));
+ }
+
+ /* Checks that we didn't miss anything. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+
+ /*
+ * Wait for the child to do a second denied action by layer1 and
+ * layer2, and sandbox itself with layer3.
+ */
+ EXPECT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+ /* Tests that the audit record only matches the child. */
+ if (variant->restrict_flags & LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON) {
+ /* Matches the current domain. */
+ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
+ getpid(), NULL));
+ }
+
+ if (!(variant->restrict_flags &
+ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
+ /* Matches the child domain. */
+ EXPECT_EQ(0, matches_log_fs_read_root(self->audit_fd));
+ }
+
+ /* Checks that we didn't miss anything. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+
+ /* Waits for the child to terminate. */
+ EXPECT_EQ(1, write(pipe_parent[1], ".", 1));
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFEXITED(status));
+ ASSERT_EQ(0, WEXITSTATUS(status));
+
+ /* Tests that the audit record only matches the child. */
+ if (!(variant->restrict_flags &
+ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
+ /*
+ * Matches the child domains, which tests that the
+ * llcred->domain_exec bitmask is correctly updated with a new
+ * domain.
+ */
+ EXPECT_EQ(0, matches_log_fs_read_root(self->audit_fd));
+ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
+ getpid(), NULL));
+ }
+
+ /* Checks that we didn't miss anything. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index 1bc16fde2e8a..7b69002239d7 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -76,7 +76,7 @@ TEST(abi_version)
const struct landlock_ruleset_attr ruleset_attr = {
.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
};
- ASSERT_EQ(6, landlock_create_ruleset(NULL, 0,
+ ASSERT_EQ(7, landlock_create_ruleset(NULL, 0,
LANDLOCK_CREATE_RULESET_VERSION));
ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
@@ -98,10 +98,54 @@ TEST(abi_version)
ASSERT_EQ(EINVAL, errno);
}
+/*
+ * Old source trees might not have the set of Kselftest fixes related to kernel
+ * UAPI headers.
+ */
+#ifndef LANDLOCK_CREATE_RULESET_ERRATA
+#define LANDLOCK_CREATE_RULESET_ERRATA (1U << 1)
+#endif
+
+TEST(errata)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
+ };
+ int errata;
+
+ errata = landlock_create_ruleset(NULL, 0,
+ LANDLOCK_CREATE_RULESET_ERRATA);
+ /* The errata bitmask will not be backported to tests. */
+ ASSERT_LE(0, errata);
+ TH_LOG("errata: 0x%x", errata);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
+ LANDLOCK_CREATE_RULESET_ERRATA));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr),
+ LANDLOCK_CREATE_RULESET_ERRATA));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1,
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr),
+ LANDLOCK_CREATE_RULESET_ERRATA));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(
+ NULL, 0,
+ LANDLOCK_CREATE_RULESET_VERSION |
+ LANDLOCK_CREATE_RULESET_ERRATA));
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0,
+ LANDLOCK_CREATE_RULESET_ERRATA |
+ 1 << 31));
+ ASSERT_EQ(EINVAL, errno);
+}
+
/* Tests ordering of syscall argument checks. */
TEST(create_ruleset_checks_ordering)
{
- const int last_flag = LANDLOCK_CREATE_RULESET_VERSION;
+ const int last_flag = LANDLOCK_CREATE_RULESET_ERRATA;
const int invalid_flag = last_flag << 1;
int ruleset_fd;
const struct landlock_ruleset_attr ruleset_attr = {
@@ -233,6 +277,88 @@ TEST(restrict_self_checks_ordering)
ASSERT_EQ(0, close(ruleset_fd));
}
+TEST(restrict_self_fd)
+{
+ int fd;
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, fd);
+
+ EXPECT_EQ(-1, landlock_restrict_self(fd, 0));
+ EXPECT_EQ(EBADFD, errno);
+}
+
+TEST(restrict_self_fd_flags)
+{
+ int fd;
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, fd);
+
+ /*
+ * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF accepts -1 but not any file
+ * descriptor.
+ */
+ EXPECT_EQ(-1, landlock_restrict_self(
+ fd, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
+ EXPECT_EQ(EBADFD, errno);
+}
+
+TEST(restrict_self_flags)
+{
+ const __u32 last_flag = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF;
+
+ /* Tests invalid flag combinations. */
+
+ EXPECT_EQ(-1, landlock_restrict_self(-1, last_flag << 1));
+ EXPECT_EQ(EINVAL, errno);
+
+ EXPECT_EQ(-1, landlock_restrict_self(-1, -1));
+ EXPECT_EQ(EINVAL, errno);
+
+ /* Tests valid flag combinations. */
+
+ EXPECT_EQ(-1, landlock_restrict_self(-1, 0));
+ EXPECT_EQ(EBADF, errno);
+
+ EXPECT_EQ(-1, landlock_restrict_self(
+ -1, LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF));
+ EXPECT_EQ(EBADF, errno);
+
+ EXPECT_EQ(-1,
+ landlock_restrict_self(
+ -1,
+ LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
+ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
+ EXPECT_EQ(EBADF, errno);
+
+ EXPECT_EQ(-1,
+ landlock_restrict_self(
+ -1,
+ LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
+ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
+ EXPECT_EQ(EBADF, errno);
+
+ EXPECT_EQ(-1, landlock_restrict_self(
+ -1, LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON));
+ EXPECT_EQ(EBADF, errno);
+
+ EXPECT_EQ(-1,
+ landlock_restrict_self(
+ -1, LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
+ LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON));
+ EXPECT_EQ(EBADF, errno);
+
+ /* Tests with an invalid ruleset_fd. */
+
+ EXPECT_EQ(-1, landlock_restrict_self(
+ -2, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
+ EXPECT_EQ(EBADF, errno);
+
+ EXPECT_EQ(0, landlock_restrict_self(
+ -1, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
+}
+
TEST(ruleset_fd_io)
{
struct landlock_ruleset_attr ruleset_attr = {
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
index a604ea5d8297..88a3c78f5d98 100644
--- a/tools/testing/selftests/landlock/common.h
+++ b/tools/testing/selftests/landlock/common.h
@@ -11,6 +11,7 @@
#include <errno.h>
#include <linux/securebits.h>
#include <sys/capability.h>
+#include <sys/prctl.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/wait.h>
@@ -30,6 +31,7 @@
static const char bin_sandbox_and_launch[] = "./sandbox-and-launch";
static const char bin_wait_pipe[] = "./wait-pipe";
+static const char bin_wait_pipe_sandbox[] = "./wait-pipe-sandbox";
static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
{
@@ -37,10 +39,12 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
/* Only these three capabilities are useful for the tests. */
const cap_value_t caps[] = {
/* clang-format off */
+ CAP_AUDIT_CONTROL,
CAP_DAC_OVERRIDE,
CAP_MKNOD,
CAP_NET_ADMIN,
CAP_NET_BIND_SERVICE,
+ CAP_SETUID,
CAP_SYS_ADMIN,
CAP_SYS_CHROOT,
/* clang-format on */
@@ -204,9 +208,26 @@ enforce_ruleset(struct __test_metadata *const _metadata, const int ruleset_fd)
}
}
+static void __maybe_unused
+drop_access_rights(struct __test_metadata *const _metadata,
+ const struct landlock_ruleset_attr *const ruleset_attr)
+{
+ int ruleset_fd;
+
+ ruleset_fd =
+ landlock_create_ruleset(ruleset_attr, sizeof(*ruleset_attr), 0);
+ EXPECT_LE(0, ruleset_fd)
+ {
+ TH_LOG("Failed to create a ruleset: %s", strerror(errno));
+ }
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
struct protocol_variant {
int domain;
int type;
+ int protocol;
};
struct service_fixture {
diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config
index 29af19c4e9f9..8fe9b461b1fd 100644
--- a/tools/testing/selftests/landlock/config
+++ b/tools/testing/selftests/landlock/config
@@ -1,8 +1,12 @@
+CONFIG_AF_UNIX_OOB=y
+CONFIG_AUDIT=y
CONFIG_CGROUPS=y
CONFIG_CGROUP_SCHED=y
CONFIG_INET=y
CONFIG_IPV6=y
CONFIG_KEYS=y
+CONFIG_MPTCP=y
+CONFIG_MPTCP_IPV6=y
CONFIG_NET=y
CONFIG_NET_NS=y
CONFIG_OVERLAY_FS=y
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 2af86bd796ba..f819011a8798 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -41,6 +41,7 @@
#define _ASM_GENERIC_FCNTL_H
#include <linux/fcntl.h>
+#include "audit.h"
#include "common.h"
#ifndef renameat2
@@ -59,6 +60,12 @@ int open_tree(int dfd, const char *filename, unsigned int flags)
}
#endif
+static int sys_execveat(int dirfd, const char *pathname, char *const argv[],
+ char *const envp[], int flags)
+{
+ return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
+}
+
#ifndef RENAME_EXCHANGE
#define RENAME_EXCHANGE (1 << 1)
#endif
@@ -2024,8 +2031,8 @@ static void test_check_exec(struct __test_metadata *const _metadata,
int ret;
char *const argv[] = { (char *)path, NULL };
- ret = execveat(AT_FDCWD, path, argv, NULL,
- AT_EMPTY_PATH | AT_EXECVE_CHECK);
+ ret = sys_execveat(AT_FDCWD, path, argv, NULL,
+ AT_EMPTY_PATH | AT_EXECVE_CHECK);
if (err) {
EXPECT_EQ(-1, ret);
EXPECT_EQ(errno, err);
@@ -5548,4 +5555,597 @@ TEST_F_FORK(layout3_fs, release_inodes)
ASSERT_EQ(EACCES, test_open(TMP_DIR, O_RDONLY));
}
+static int matches_log_fs_extra(struct __test_metadata *const _metadata,
+ int audit_fd, const char *const blockers,
+ const char *const path, const char *const extra)
+{
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " blockers=fs\\.%s path=\"%s\" dev=\"[^\"]\\+\" ino=[0-9]\\+$";
+ char *absolute_path = NULL;
+ size_t log_match_remaining = sizeof(log_template) + strlen(blockers) +
+ PATH_MAX * 2 +
+ (extra ? strlen(extra) : 0) + 1;
+ char log_match[log_match_remaining];
+ char *log_match_cursor = log_match;
+ size_t chunk_len;
+
+ chunk_len = snprintf(log_match_cursor, log_match_remaining,
+ REGEX_LANDLOCK_PREFIX " blockers=%s path=\"",
+ blockers);
+ if (chunk_len < 0 || chunk_len >= log_match_remaining)
+ return -E2BIG;
+
+ /*
+ * It is assume that absolute_path does not contain control characters nor
+ * spaces, see audit_string_contains_control().
+ */
+ absolute_path = realpath(path, NULL);
+ if (!absolute_path)
+ return -errno;
+
+ log_match_remaining -= chunk_len;
+ log_match_cursor += chunk_len;
+ log_match_cursor = regex_escape(absolute_path, log_match_cursor,
+ log_match_remaining);
+ free(absolute_path);
+ if (log_match_cursor < 0)
+ return (long long)log_match_cursor;
+
+ log_match_remaining -= log_match_cursor - log_match;
+ chunk_len = snprintf(log_match_cursor, log_match_remaining,
+ "\" dev=\"[^\"]\\+\" ino=[0-9]\\+%s$",
+ extra ?: "");
+ if (chunk_len < 0 || chunk_len >= log_match_remaining)
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match,
+ NULL);
+}
+
+static int matches_log_fs(struct __test_metadata *const _metadata, int audit_fd,
+ const char *const blockers, const char *const path)
+{
+ return matches_log_fs_extra(_metadata, audit_fd, blockers, path, NULL);
+}
+
+FIXTURE(audit_layout1)
+{
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_SETUP(audit_layout1)
+{
+ prepare_layout(_metadata);
+
+ create_layout1(_metadata);
+
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd);
+ disable_caps(_metadata);
+}
+
+FIXTURE_TEARDOWN_PARENT(audit_layout1)
+{
+ remove_layout1(_metadata);
+
+ cleanup_layout(_metadata);
+
+ EXPECT_EQ(0, audit_cleanup(-1, NULL));
+}
+
+TEST_F(audit_layout1, execute_make)
+{
+ struct audit_records records;
+
+ copy_file(_metadata, bin_true, file1_s1d1);
+ test_execute(_metadata, 0, file1_s1d1);
+ test_check_exec(_metadata, 0, file1_s1d1);
+
+ drop_access_rights(_metadata,
+ &(struct landlock_ruleset_attr){
+ .handled_access_fs =
+ LANDLOCK_ACCESS_FS_EXECUTE,
+ });
+
+ test_execute(_metadata, EACCES, file1_s1d1);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.execute",
+ file1_s1d1));
+ test_check_exec(_metadata, EACCES, file1_s1d1);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.execute",
+ file1_s1d1));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+/*
+ * Using a set of handled/denied access rights make it possible to check that
+ * only the blocked ones are logged.
+ */
+
+/* clang-format off */
+static const __u64 access_fs_16 =
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_READ_DIR |
+ LANDLOCK_ACCESS_FS_REMOVE_DIR |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_CHAR |
+ LANDLOCK_ACCESS_FS_MAKE_DIR |
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_MAKE_SOCK |
+ LANDLOCK_ACCESS_FS_MAKE_FIFO |
+ LANDLOCK_ACCESS_FS_MAKE_BLOCK |
+ LANDLOCK_ACCESS_FS_MAKE_SYM |
+ LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_TRUNCATE |
+ LANDLOCK_ACCESS_FS_IOCTL_DEV;
+/* clang-format on */
+
+TEST_F(audit_layout1, execute_read)
+{
+ struct audit_records records;
+
+ copy_file(_metadata, bin_true, file1_s1d1);
+ test_execute(_metadata, 0, file1_s1d1);
+ test_check_exec(_metadata, 0, file1_s1d1);
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ /*
+ * The only difference with the previous audit_layout1.execute_read test is
+ * the extra ",fs\\.read_file" blocked by the executable file.
+ */
+ test_execute(_metadata, EACCES, file1_s1d1);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.execute,fs\\.read_file", file1_s1d1));
+ test_check_exec(_metadata, EACCES, file1_s1d1);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.execute,fs\\.read_file", file1_s1d1));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+TEST_F(audit_layout1, write_file)
+{
+ struct audit_records records;
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.write_file", file1_s1d1));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, read_file)
+{
+ struct audit_records records;
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.read_file",
+ file1_s1d1));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, read_dir)
+{
+ struct audit_records records;
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(EACCES, test_open(dir_s1d1, O_DIRECTORY));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.read_dir",
+ dir_s1d1));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, remove_dir)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+ EXPECT_EQ(0, unlink(file2_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, rmdir(dir_s1d3));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_dir", dir_s1d2));
+
+ EXPECT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d3, AT_REMOVEDIR));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_dir", dir_s1d2));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+TEST_F(audit_layout1, remove_file)
+{
+ struct audit_records records;
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, unlink(file1_s1d3));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file", dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_char)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, mknod(file1_s1d3, S_IFCHR | 0644, 0));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_char",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_dir)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, mkdir(file1_s1d3, 0755));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_dir",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_reg)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, mknod(file1_s1d3, S_IFREG | 0644, 0));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_reg",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_sock)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, mknod(file1_s1d3, S_IFSOCK | 0644, 0));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_sock",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_fifo)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, mknod(file1_s1d3, S_IFIFO | 0644, 0));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_fifo",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_block)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, mknod(file1_s1d3, S_IFBLK | 0644, 0));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.make_block", dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_sym)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, symlink("target", file1_s1d3));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_sym",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, refer_handled)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs =
+ LANDLOCK_ACCESS_FS_REFER,
+ });
+
+ EXPECT_EQ(-1, link(file1_s1d1, file1_s1d3));
+ EXPECT_EQ(EXDEV, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
+ dir_s1d1));
+ EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, NULL));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+TEST_F(audit_layout1, refer_make)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata,
+ &(struct landlock_ruleset_attr){
+ .handled_access_fs =
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REFER,
+ });
+
+ EXPECT_EQ(-1, link(file1_s1d1, file1_s1d3));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
+ dir_s1d1));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.make_reg,fs\\.refer", dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+TEST_F(audit_layout1, refer_rename)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(EACCES, test_rename(file1_s1d2, file1_s2d3));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file,fs\\.refer", dir_s1d2));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file,fs\\.make_reg,fs\\.refer",
+ dir_s2d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+TEST_F(audit_layout1, refer_exchange)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ /*
+ * The only difference with the previous audit_layout1.refer_rename test is
+ * the extra ",fs\\.make_reg" blocked by the source directory.
+ */
+ EXPECT_EQ(EACCES, test_exchange(file1_s1d2, file1_s2d3));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file,fs\\.make_reg,fs\\.refer",
+ dir_s1d2));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file,fs\\.make_reg,fs\\.refer",
+ dir_s2d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+/*
+ * This test checks that the audit record is correctly generated when the
+ * operation is only partially denied. This is the case for rename(2) when the
+ * source file is allowed to be referenced but the destination directory is not.
+ *
+ * This is also a regression test for commit d617f0d72d80 ("landlock: Optimize
+ * file path walks and prepare for audit support") and commit 058518c20920
+ * ("landlock: Align partial refer access checks with final ones").
+ */
+TEST_F(audit_layout1, refer_rename_half)
+{
+ struct audit_records records;
+ const struct rule layer1[] = {
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {},
+ };
+ int ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REFER, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Only half of the request is denied. */
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
+ dir_s1d1));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, truncate)
+{
+ struct audit_records records;
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, truncate(file1_s1d3, 0));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.truncate",
+ file1_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, ioctl_dev)
+{
+ struct audit_records records;
+ int fd;
+
+ drop_access_rights(_metadata,
+ &(struct landlock_ruleset_attr){
+ .handled_access_fs =
+ access_fs_16 &
+ ~LANDLOCK_ACCESS_FS_READ_FILE,
+ });
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, fd);
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FIONREAD));
+ EXPECT_EQ(0, matches_log_fs_extra(_metadata, self->audit_fd,
+ "fs\\.ioctl_dev", "/dev/null",
+ " ioctlcmd=0x541b"));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, mount)
+{
+ struct audit_records records;
+
+ drop_access_rights(_metadata,
+ &(struct landlock_ruleset_attr){
+ .handled_access_fs =
+ LANDLOCK_ACCESS_FS_EXECUTE,
+ });
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ EXPECT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_RDONLY, NULL));
+ EXPECT_EQ(EPERM, errno);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.change_topology", dir_s3d2));
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c
index 4e0aeb53b225..2a45208551e6 100644
--- a/tools/testing/selftests/landlock/net_test.c
+++ b/tools/testing/selftests/landlock/net_test.c
@@ -20,6 +20,7 @@
#include <sys/syscall.h>
#include <sys/un.h>
+#include "audit.h"
#include "common.h"
const short sock_port_start = (1 << 10);
@@ -85,18 +86,18 @@ static void setup_loopback(struct __test_metadata *const _metadata)
clear_ambient_cap(_metadata, CAP_NET_ADMIN);
}
+static bool prot_is_tcp(const struct protocol_variant *const prot)
+{
+ return (prot->domain == AF_INET || prot->domain == AF_INET6) &&
+ prot->type == SOCK_STREAM &&
+ (prot->protocol == IPPROTO_TCP || prot->protocol == IPPROTO_IP);
+}
+
static bool is_restricted(const struct protocol_variant *const prot,
const enum sandbox_type sandbox)
{
- switch (prot->domain) {
- case AF_INET:
- case AF_INET6:
- switch (prot->type) {
- case SOCK_STREAM:
- return sandbox == TCP_SANDBOX;
- }
- break;
- }
+ if (sandbox == TCP_SANDBOX)
+ return prot_is_tcp(prot);
return false;
}
@@ -105,7 +106,7 @@ static int socket_variant(const struct service_fixture *const srv)
int ret;
ret = socket(srv->protocol.domain, srv->protocol.type | SOCK_CLOEXEC,
- 0);
+ srv->protocol.protocol);
if (ret < 0)
return -errno;
return ret;
@@ -290,22 +291,70 @@ FIXTURE_TEARDOWN(protocol)
}
/* clang-format off */
-FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp) {
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp1) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp2) {
/* clang-format on */
.sandbox = NO_SANDBOX,
.prot = {
.domain = AF_INET,
.type = SOCK_STREAM,
+ .protocol = IPPROTO_TCP,
},
};
/* clang-format off */
-FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp) {
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_mptcp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp1) {
/* clang-format on */
.sandbox = NO_SANDBOX,
.prot = {
.domain = AF_INET6,
.type = SOCK_STREAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp2) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_TCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_mptcp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
},
};
@@ -350,22 +399,70 @@ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_datagram) {
};
/* clang-format off */
-FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp) {
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp1) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp2) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_TCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_mptcp) {
/* clang-format on */
.sandbox = TCP_SANDBOX,
.prot = {
.domain = AF_INET,
.type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp1) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp2) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_TCP,
},
};
/* clang-format off */
-FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp) {
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_mptcp) {
/* clang-format on */
.sandbox = TCP_SANDBOX,
.prot = {
.domain = AF_INET6,
.type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
},
};
@@ -1772,4 +1869,135 @@ TEST_F(port_specific, bind_connect_1023)
EXPECT_EQ(0, close(bind_fd));
}
+static int matches_log_tcp(const int audit_fd, const char *const blockers,
+ const char *const dir_addr, const char *const addr,
+ const char *const dir_port)
+{
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " blockers=%s %s=%s %s=1024$";
+ /*
+ * Max strlen(blockers): 16
+ * Max strlen(dir_addr): 5
+ * Max strlen(addr): 12
+ * Max strlen(dir_port): 4
+ */
+ char log_match[sizeof(log_template) + 37];
+ int log_match_len;
+
+ log_match_len = snprintf(log_match, sizeof(log_match), log_template,
+ blockers, dir_addr, addr, dir_port);
+ if (log_match_len > sizeof(log_match))
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match,
+ NULL);
+}
+
+FIXTURE(audit)
+{
+ struct service_fixture srv0;
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_VARIANT(audit)
+{
+ const char *const addr;
+ const struct protocol_variant prot;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit, ipv4) {
+ /* clang-format on */
+ .addr = "127\\.0\\.0\\.1",
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit, ipv6) {
+ /* clang-format on */
+ .addr = "::1",
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ },
+};
+
+FIXTURE_SETUP(audit)
+{
+ ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0));
+ setup_loopback(_metadata);
+
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd);
+ disable_caps(_metadata);
+};
+
+FIXTURE_TEARDOWN(audit)
+{
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+TEST_F(audit, bind)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ struct audit_records records;
+ int ruleset_fd, sock_fd;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ sock_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, sock_fd);
+ EXPECT_EQ(-EACCES, bind_variant(sock_fd, &self->srv0));
+ EXPECT_EQ(0, matches_log_tcp(self->audit_fd, "net\\.bind_tcp", "saddr",
+ variant->addr, "src"));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+
+ EXPECT_EQ(0, close(sock_fd));
+}
+
+TEST_F(audit, connect)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ struct audit_records records;
+ int ruleset_fd, sock_fd;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ sock_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, sock_fd);
+ EXPECT_EQ(-EACCES, connect_variant(sock_fd, &self->srv0));
+ EXPECT_EQ(0, matches_log_tcp(self->audit_fd, "net\\.connect_tcp",
+ "daddr", variant->addr, "dest"));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+
+ EXPECT_EQ(0, close(sock_fd));
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
index 8f31b673ff2d..4e356334ecb7 100644
--- a/tools/testing/selftests/landlock/ptrace_test.c
+++ b/tools/testing/selftests/landlock/ptrace_test.c
@@ -4,6 +4,7 @@
*
* Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
* Copyright © 2019-2020 ANSSI
+ * Copyright © 2024-2025 Microsoft Corporation
*/
#define _GNU_SOURCE
@@ -17,6 +18,7 @@
#include <sys/wait.h>
#include <unistd.h>
+#include "audit.h"
#include "common.h"
/* Copied from security/yama/yama_lsm.c */
@@ -434,4 +436,142 @@ TEST_F(hierarchy, trace)
_metadata->exit_code = KSFT_FAIL;
}
+static int matches_log_ptrace(struct __test_metadata *const _metadata,
+ int audit_fd, const pid_t opid)
+{
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " blockers=ptrace opid=%d ocomm=\"ptrace_test\"$";
+ char log_match[sizeof(log_template) + 10];
+ int log_match_len;
+
+ log_match_len =
+ snprintf(log_match, sizeof(log_match), log_template, opid);
+ if (log_match_len > sizeof(log_match))
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match,
+ NULL);
+}
+
+FIXTURE(audit)
+{
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_SETUP(audit)
+{
+ disable_caps(_metadata);
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd);
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+FIXTURE_TEARDOWN_PARENT(audit)
+{
+ EXPECT_EQ(0, audit_cleanup(-1, NULL));
+}
+
+/* Test PTRACE_TRACEME and PTRACE_ATTACH for parent and child. */
+TEST_F(audit, trace)
+{
+ pid_t child;
+ int status;
+ int pipe_child[2], pipe_parent[2];
+ int yama_ptrace_scope;
+ char buf_parent;
+ struct audit_records records;
+
+ /* Makes sure there is no superfluous logged records. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+
+ yama_ptrace_scope = get_yama_ptrace_scope();
+ ASSERT_LE(0, yama_ptrace_scope);
+
+ if (yama_ptrace_scope > YAMA_SCOPE_DISABLED)
+ TH_LOG("Incomplete tests due to Yama restrictions (scope %d)",
+ yama_ptrace_scope);
+
+ /*
+ * Removes all effective and permitted capabilities to not interfere
+ * with cap_ptrace_access_check() in case of PTRACE_MODE_FSCREDS.
+ */
+ drop_caps(_metadata);
+
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ char buf_child;
+
+ ASSERT_EQ(0, close(pipe_parent[1]));
+ ASSERT_EQ(0, close(pipe_child[0]));
+
+ /* Waits for the parent to be in a domain, if any. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
+
+ /* Tests child PTRACE_TRACEME. */
+ EXPECT_EQ(-1, ptrace(PTRACE_TRACEME));
+ EXPECT_EQ(EPERM, errno);
+ /* We should see the child process. */
+ EXPECT_EQ(0, matches_log_ptrace(_metadata, self->audit_fd,
+ getpid()));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ /* Checks for a domain creation. */
+ EXPECT_EQ(1, records.domain);
+
+ /*
+ * Signals that the PTRACE_ATTACH test is done and the
+ * PTRACE_TRACEME test is ongoing.
+ */
+ ASSERT_EQ(1, write(pipe_child[1], ".", 1));
+
+ /* Waits for the parent PTRACE_ATTACH test. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ ASSERT_EQ(0, close(pipe_child[1]));
+ ASSERT_EQ(0, close(pipe_parent[0]));
+ create_domain(_metadata);
+
+ /* Signals that the parent is in a domain. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+
+ /*
+ * Waits for the child to test PTRACE_ATTACH on the parent and start
+ * testing PTRACE_TRACEME.
+ */
+ ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+ /* The child should not be traced by the parent. */
+ EXPECT_EQ(-1, ptrace(PTRACE_DETACH, child, NULL, 0));
+ EXPECT_EQ(ESRCH, errno);
+
+ /* Tests PTRACE_ATTACH on the child. */
+ EXPECT_EQ(-1, ptrace(PTRACE_ATTACH, child, NULL, 0));
+ EXPECT_EQ(EPERM, errno);
+ EXPECT_EQ(0, matches_log_ptrace(_metadata, self->audit_fd, child));
+
+ /* Signals that the parent PTRACE_ATTACH test is done. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+
+ /* Makes sure there is no superfluous logged records. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/scoped_abstract_unix_test.c b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
index a6b59d2ab1b4..6825082c079c 100644
--- a/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
+++ b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
@@ -20,6 +20,7 @@
#include <sys/wait.h>
#include <unistd.h>
+#include "audit.h"
#include "common.h"
#include "scoped_common.h"
@@ -267,6 +268,116 @@ TEST_F(scoped_domains, connect_to_child)
_metadata->exit_code = KSFT_FAIL;
}
+FIXTURE(scoped_audit)
+{
+ struct service_fixture dgram_address;
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_SETUP(scoped_audit)
+{
+ disable_caps(_metadata);
+
+ memset(&self->dgram_address, 0, sizeof(self->dgram_address));
+ set_unix_address(&self->dgram_address, 1);
+
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd);
+ drop_caps(_metadata);
+}
+
+FIXTURE_TEARDOWN_PARENT(scoped_audit)
+{
+ EXPECT_EQ(0, audit_cleanup(-1, NULL));
+}
+
+/* python -c 'print(b"\0selftests-landlock-abstract-unix-".hex().upper())' */
+#define ABSTRACT_SOCKET_PATH_PREFIX \
+ "0073656C6674657374732D6C616E646C6F636B2D61627374726163742D756E69782D"
+
+/*
+ * Simpler version of scoped_domains.connect_to_child, but with audit tests.
+ */
+TEST_F(scoped_audit, connect_to_child)
+{
+ pid_t child;
+ int err_dgram, status;
+ int pipe_child[2], pipe_parent[2];
+ char buf;
+ int dgram_client;
+ struct audit_records records;
+
+ /* Makes sure there is no superfluous logged records. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ int dgram_server;
+
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ EXPECT_EQ(0, close(pipe_child[0]));
+
+ /* Waits for the parent to be in a domain. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf, 1));
+
+ dgram_server = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, dgram_server);
+ ASSERT_EQ(0, bind(dgram_server, &self->dgram_address.unix_addr,
+ self->dgram_address.unix_addr_len));
+
+ /* Signals to the parent that child is listening. */
+ ASSERT_EQ(1, write(pipe_child[1], ".", 1));
+
+ /* Waits to connect. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf, 1));
+ EXPECT_EQ(0, close(dgram_server));
+ _exit(_metadata->exit_code);
+ return;
+ }
+ EXPECT_EQ(0, close(pipe_child[1]));
+ EXPECT_EQ(0, close(pipe_parent[0]));
+
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+
+ /* Signals that the parent is in a domain, if any. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+
+ dgram_client = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, dgram_client);
+
+ /* Waits for the child to listen */
+ ASSERT_EQ(1, read(pipe_child[0], &buf, 1));
+ err_dgram = connect(dgram_client, &self->dgram_address.unix_addr,
+ self->dgram_address.unix_addr_len);
+ EXPECT_EQ(-1, err_dgram);
+ EXPECT_EQ(EPERM, errno);
+
+ EXPECT_EQ(
+ 0,
+ audit_match_record(
+ self->audit_fd, AUDIT_LANDLOCK_ACCESS,
+ REGEX_LANDLOCK_PREFIX
+ " blockers=scope\\.abstract_unix_socket path=" ABSTRACT_SOCKET_PATH_PREFIX
+ "[0-9A-F]\\+$",
+ NULL));
+
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(0, close(dgram_client));
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+}
+
FIXTURE(scoped_vs_unscoped)
{
struct service_fixture parent_stream_address, parent_dgram_address,
diff --git a/tools/testing/selftests/landlock/scoped_signal_test.c b/tools/testing/selftests/landlock/scoped_signal_test.c
index 475ee62a832d..d8bf33417619 100644
--- a/tools/testing/selftests/landlock/scoped_signal_test.c
+++ b/tools/testing/selftests/landlock/scoped_signal_test.c
@@ -249,47 +249,67 @@ TEST_F(scoped_domains, check_access_signal)
_metadata->exit_code = KSFT_FAIL;
}
-static int thread_pipe[2];
-
enum thread_return {
THREAD_INVALID = 0,
THREAD_SUCCESS = 1,
THREAD_ERROR = 2,
+ THREAD_TEST_FAILED = 3,
};
-void *thread_func(void *arg)
+static void *thread_sync(void *arg)
{
+ const int pipe_read = *(int *)arg;
char buf;
- if (read(thread_pipe[0], &buf, 1) != 1)
+ if (read(pipe_read, &buf, 1) != 1)
return (void *)THREAD_ERROR;
return (void *)THREAD_SUCCESS;
}
-TEST(signal_scoping_threads)
+TEST(signal_scoping_thread_before)
{
- pthread_t no_sandbox_thread, scoped_thread;
+ pthread_t no_sandbox_thread;
enum thread_return ret = THREAD_INVALID;
+ int thread_pipe[2];
drop_caps(_metadata);
ASSERT_EQ(0, pipe2(thread_pipe, O_CLOEXEC));
- ASSERT_EQ(0,
- pthread_create(&no_sandbox_thread, NULL, thread_func, NULL));
+ ASSERT_EQ(0, pthread_create(&no_sandbox_thread, NULL, thread_sync,
+ &thread_pipe[0]));
- /* Restricts the domain after creating the first thread. */
+ /* Enforces restriction after creating the thread. */
create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
- ASSERT_EQ(EPERM, pthread_kill(no_sandbox_thread, 0));
- ASSERT_EQ(1, write(thread_pipe[1], ".", 1));
-
- ASSERT_EQ(0, pthread_create(&scoped_thread, NULL, thread_func, NULL));
- ASSERT_EQ(0, pthread_kill(scoped_thread, 0));
- ASSERT_EQ(1, write(thread_pipe[1], ".", 1));
+ EXPECT_EQ(0, pthread_kill(no_sandbox_thread, 0));
+ EXPECT_EQ(1, write(thread_pipe[1], ".", 1));
EXPECT_EQ(0, pthread_join(no_sandbox_thread, (void **)&ret));
EXPECT_EQ(THREAD_SUCCESS, ret);
+
+ EXPECT_EQ(0, close(thread_pipe[0]));
+ EXPECT_EQ(0, close(thread_pipe[1]));
+}
+
+TEST(signal_scoping_thread_after)
+{
+ pthread_t scoped_thread;
+ enum thread_return ret = THREAD_INVALID;
+ int thread_pipe[2];
+
+ drop_caps(_metadata);
+ ASSERT_EQ(0, pipe2(thread_pipe, O_CLOEXEC));
+
+ /* Enforces restriction before creating the thread. */
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+
+ ASSERT_EQ(0, pthread_create(&scoped_thread, NULL, thread_sync,
+ &thread_pipe[0]));
+
+ EXPECT_EQ(0, pthread_kill(scoped_thread, 0));
+ EXPECT_EQ(1, write(thread_pipe[1], ".", 1));
+
EXPECT_EQ(0, pthread_join(scoped_thread, (void **)&ret));
EXPECT_EQ(THREAD_SUCCESS, ret);
@@ -297,6 +317,64 @@ TEST(signal_scoping_threads)
EXPECT_EQ(0, close(thread_pipe[1]));
}
+struct thread_setuid_args {
+ int pipe_read, new_uid;
+};
+
+void *thread_setuid(void *ptr)
+{
+ const struct thread_setuid_args *arg = ptr;
+ char buf;
+
+ if (read(arg->pipe_read, &buf, 1) != 1)
+ return (void *)THREAD_ERROR;
+
+ /* libc's setuid() should update all thread's credentials. */
+ if (getuid() != arg->new_uid)
+ return (void *)THREAD_TEST_FAILED;
+
+ return (void *)THREAD_SUCCESS;
+}
+
+TEST(signal_scoping_thread_setuid)
+{
+ struct thread_setuid_args arg;
+ pthread_t no_sandbox_thread;
+ enum thread_return ret = THREAD_INVALID;
+ int pipe_parent[2];
+ int prev_uid;
+
+ disable_caps(_metadata);
+
+ /* This test does not need to be run as root. */
+ prev_uid = getuid();
+ arg.new_uid = prev_uid + 1;
+ EXPECT_LT(0, arg.new_uid);
+
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+ arg.pipe_read = pipe_parent[0];
+
+ /* Capabilities must be set before creating a new thread. */
+ set_cap(_metadata, CAP_SETUID);
+ ASSERT_EQ(0, pthread_create(&no_sandbox_thread, NULL, thread_setuid,
+ &arg));
+
+ /* Enforces restriction after creating the thread. */
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+
+ EXPECT_NE(arg.new_uid, getuid());
+ EXPECT_EQ(0, setuid(arg.new_uid));
+ EXPECT_EQ(arg.new_uid, getuid());
+ EXPECT_EQ(1, write(pipe_parent[1], ".", 1));
+
+ EXPECT_EQ(0, pthread_join(no_sandbox_thread, (void **)&ret));
+ EXPECT_EQ(THREAD_SUCCESS, ret);
+
+ clear_cap(_metadata, CAP_SETUID);
+ EXPECT_EQ(0, close(pipe_parent[0]));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+}
+
const short backlog = 10;
static volatile sig_atomic_t signal_received;
diff --git a/tools/testing/selftests/landlock/wait-pipe-sandbox.c b/tools/testing/selftests/landlock/wait-pipe-sandbox.c
new file mode 100644
index 000000000000..87dbc9164430
--- /dev/null
+++ b/tools/testing/selftests/landlock/wait-pipe-sandbox.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Write in a pipe, wait, sandbox itself, test sandboxing, and wait again.
+ *
+ * Used by audit_exec.flags from audit_test.c
+ *
+ * Copyright © 2024-2025 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <linux/landlock.h>
+#include <linux/prctl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "wrappers.h"
+
+static int sync_with(int pipe_child, int pipe_parent)
+{
+ char buf;
+
+ /* Signals that we are waiting. */
+ if (write(pipe_child, ".", 1) != 1) {
+ perror("Failed to write to first argument");
+ return 1;
+ }
+
+ /* Waits for the parent do its test. */
+ if (read(pipe_parent, &buf, 1) != 1) {
+ perror("Failed to write to the second argument");
+ return 1;
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ const struct landlock_ruleset_attr layer2 = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+ };
+ const struct landlock_ruleset_attr layer3 = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ int err, pipe_child, pipe_parent, ruleset_fd;
+
+ /* The first argument must be the file descriptor number of a pipe. */
+ if (argc != 3) {
+ fprintf(stderr, "Wrong number of arguments (not two)\n");
+ return 1;
+ }
+
+ pipe_child = atoi(argv[1]);
+ pipe_parent = atoi(argv[2]);
+ /* PR_SET_NO_NEW_PRIVS already set by parent. */
+
+ /* First step to test parent's layer1. */
+ err = sync_with(pipe_child, pipe_parent);
+ if (err)
+ return err;
+
+ /* Tries to send a signal, denied by layer1. */
+ if (!kill(getppid(), 0)) {
+ fprintf(stderr, "Successfully sent a signal to the parent");
+ return 1;
+ }
+
+ /* Second step to test parent's layer1 and our layer2. */
+ err = sync_with(pipe_child, pipe_parent);
+ if (err)
+ return err;
+
+ ruleset_fd = landlock_create_ruleset(&layer2, sizeof(layer2), 0);
+ if (ruleset_fd < 0) {
+ perror("Failed to create the layer2 ruleset");
+ return 1;
+ }
+
+ if (landlock_restrict_self(ruleset_fd, 0)) {
+ perror("Failed to restrict self");
+ return 1;
+ }
+ close(ruleset_fd);
+
+ /* Tries to send a signal, denied by layer1. */
+ if (!kill(getppid(), 0)) {
+ fprintf(stderr, "Successfully sent a signal to the parent");
+ return 1;
+ }
+
+ /* Tries to open ., denied by layer2. */
+ if (open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC) >= 0) {
+ fprintf(stderr, "Successfully opened /");
+ return 1;
+ }
+
+ /* Third step to test our layer2 and layer3. */
+ err = sync_with(pipe_child, pipe_parent);
+ if (err)
+ return err;
+
+ ruleset_fd = landlock_create_ruleset(&layer3, sizeof(layer3), 0);
+ if (ruleset_fd < 0) {
+ perror("Failed to create the layer3 ruleset");
+ return 1;
+ }
+
+ if (landlock_restrict_self(ruleset_fd, 0)) {
+ perror("Failed to restrict self");
+ return 1;
+ }
+ close(ruleset_fd);
+
+ /* Tries to open ., denied by layer2. */
+ if (open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC) >= 0) {
+ fprintf(stderr, "Successfully opened /");
+ return 1;
+ }
+
+ /* Tries to send a signal, denied by layer3. */
+ if (!kill(getppid(), 0)) {
+ fprintf(stderr, "Successfully sent a signal to the parent");
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index d6edcfcb5be8..530390033929 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -228,4 +228,7 @@ $(OUTPUT)/%:%.S
$(LINK.S) $^ $(LDLIBS) -o $@
endif
-.PHONY: run_tests all clean install emit_tests gen_mods_dir clean_mods_dir
+headers:
+ $(Q)$(MAKE) -C $(top_srcdir) headers
+
+.PHONY: run_tests all clean install emit_tests gen_mods_dir clean_mods_dir headers
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index c52fe3ad8e98..f876bf4744e1 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -4,5 +4,5 @@
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
all:
-TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh scanf.sh
+TEST_PROGS := bitmap.sh
include ../lib.mk
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index dc15aba8d0a3..81a1f64a22e8 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -1,5 +1,2 @@
-CONFIG_TEST_PRINTF=m
-CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
-CONFIG_PRIME_NUMBERS=m
CONFIG_TEST_BITOPS=m
diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh
deleted file mode 100755
index 370b79a9cb2e..000000000000
--- a/tools/testing/selftests/lib/prime_numbers.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Checks fast/slow prime_number generation for inconsistencies
-$(dirname $0)/../kselftest/module.sh "prime numbers" prime_numbers selftest=65536
diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh
deleted file mode 100755
index 05f4544e87f9..000000000000
--- a/tools/testing/selftests/lib/printf.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Tests the printf infrastructure using test_printf kernel module.
-$(dirname $0)/../kselftest/module.sh "printf" test_printf
diff --git a/tools/testing/selftests/lib/scanf.sh b/tools/testing/selftests/lib/scanf.sh
deleted file mode 100755
index b59b8ba561c3..000000000000
--- a/tools/testing/selftests/lib/scanf.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Tests the scanf infrastructure using test_scanf kernel module.
-$(dirname $0)/../kselftest/module.sh "scanf" test_scanf
diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index e5d06fb40233..46991a029f7c 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -10,6 +10,7 @@ SYSFS_KERNEL_DIR="/sys/kernel"
SYSFS_KLP_DIR="$SYSFS_KERNEL_DIR/livepatch"
SYSFS_DEBUG_DIR="$SYSFS_KERNEL_DIR/debug"
SYSFS_KPROBES_DIR="$SYSFS_DEBUG_DIR/kprobes"
+SYSFS_TRACING_DIR="$SYSFS_DEBUG_DIR/tracing"
# Kselftest framework requirement - SKIP code is 4
ksft_skip=4
@@ -62,6 +63,9 @@ function push_config() {
awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}')
FTRACE_ENABLED=$(sysctl --values kernel.ftrace_enabled)
KPROBE_ENABLED=$(cat "$SYSFS_KPROBES_DIR/enabled")
+ TRACING_ON=$(cat "$SYSFS_TRACING_DIR/tracing_on")
+ CURRENT_TRACER=$(cat "$SYSFS_TRACING_DIR/current_tracer")
+ FTRACE_FILTER=$(cat "$SYSFS_TRACING_DIR/set_ftrace_filter")
}
function pop_config() {
@@ -74,6 +78,17 @@ function pop_config() {
if [[ -n "$KPROBE_ENABLED" ]]; then
echo "$KPROBE_ENABLED" > "$SYSFS_KPROBES_DIR/enabled"
fi
+ if [[ -n "$TRACING_ON" ]]; then
+ echo "$TRACING_ON" > "$SYSFS_TRACING_DIR/tracing_on"
+ fi
+ if [[ -n "$CURRENT_TRACER" ]]; then
+ echo "$CURRENT_TRACER" > "$SYSFS_TRACING_DIR/current_tracer"
+ fi
+ if [[ -n "$FTRACE_FILTER" ]]; then
+ echo "$FTRACE_FILTER" \
+ | sed -e "/#### all functions enabled ####/d" \
+ > "$SYSFS_TRACING_DIR/set_ftrace_filter"
+ fi
}
function set_dynamic_debug() {
@@ -306,7 +321,8 @@ function check_result {
result=$(dmesg | awk -v last_dmesg="$LAST_DMESG" 'p; $0 == last_dmesg { p=1 }' | \
grep -e 'livepatch:' -e 'test_klp' | \
grep -v '\(tainting\|taints\) kernel' | \
- sed 's/^\[[ 0-9.]*\] //')
+ sed 's/^\[[ 0-9.]*\] //' | \
+ sed 's/^\[[ ]*[CT][0-9]*\] //')
if [[ "$expect" == "$result" ]] ; then
echo "ok"
@@ -351,3 +367,37 @@ function check_sysfs_value() {
die "Unexpected value in $path: $expected_value vs. $value"
fi
}
+
+# cleanup_tracing() - stop and clean up function tracing
+function cleanup_tracing() {
+ echo 0 > "$SYSFS_TRACING_DIR/tracing_on"
+ echo "" > "$SYSFS_TRACING_DIR/set_ftrace_filter"
+ echo "nop" > "$SYSFS_TRACING_DIR/current_tracer"
+ echo "" > "$SYSFS_TRACING_DIR/trace"
+}
+
+# trace_function(function) - start tracing of a function
+# function - to be traced function
+function trace_function() {
+ local function="$1"; shift
+
+ cleanup_tracing
+
+ echo "function" > "$SYSFS_TRACING_DIR/current_tracer"
+ echo "$function" > "$SYSFS_TRACING_DIR/set_ftrace_filter"
+ echo 1 > "$SYSFS_TRACING_DIR/tracing_on"
+}
+
+# check_traced_functions(functions...) - check whether each function appeared in the trace log
+# functions - list of functions to be checked
+function check_traced_functions() {
+ local function
+
+ for function in "$@"; do
+ if ! grep -Fwq "$function" "$SYSFS_TRACING_DIR/trace" ; then
+ die "Function ($function) did not appear in the trace"
+ fi
+ done
+
+ cleanup_tracing
+}
diff --git a/tools/testing/selftests/livepatch/test-ftrace.sh b/tools/testing/selftests/livepatch/test-ftrace.sh
index fe14f248913a..094176f1a46a 100755
--- a/tools/testing/selftests/livepatch/test-ftrace.sh
+++ b/tools/testing/selftests/livepatch/test-ftrace.sh
@@ -61,4 +61,38 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
+# - verify livepatch can load
+# - check if traces have a patched function
+# - reset trace and unload livepatch
+
+start_test "trace livepatched function and check that the live patch remains in effect"
+
+FUNCTION_NAME="livepatch_cmdline_proc_show"
+
+load_lp $MOD_LIVEPATCH
+trace_function "$FUNCTION_NAME"
+
+if [[ "$(cat /proc/cmdline)" == "$MOD_LIVEPATCH: this has been live patched" ]] ; then
+ log "livepatch: ok"
+fi
+
+check_traced_functions "$FUNCTION_NAME"
+
+disable_lp $MOD_LIVEPATCH
+unload_lp $MOD_LIVEPATCH
+
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
+livepatch: enabling patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': initializing patching transition
+livepatch: '$MOD_LIVEPATCH': starting patching transition
+livepatch: '$MOD_LIVEPATCH': completing patching transition
+livepatch: '$MOD_LIVEPATCH': patching complete
+livepatch: ok
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
+livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH': unpatching complete
+% rmmod $MOD_LIVEPATCH"
+
exit 0
diff --git a/tools/testing/selftests/livepatch/test-kprobe.sh b/tools/testing/selftests/livepatch/test-kprobe.sh
index 115065156016..b67dfad03d97 100755
--- a/tools/testing/selftests/livepatch/test-kprobe.sh
+++ b/tools/testing/selftests/livepatch/test-kprobe.sh
@@ -5,6 +5,8 @@
. $(dirname $0)/functions.sh
+grep -q kprobe_ftrace_ops /proc/kallsyms || skip "test-kprobe requires CONFIG_KPROBES_ON_FTRACE"
+
MOD_LIVEPATCH=test_klp_livepatch
MOD_KPROBE=test_klp_kprobe
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index c0c53451a16d..5b993924cc3f 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -171,7 +171,7 @@ static void mfd_fail_new(const char *name, unsigned int flags)
r = sys_memfd_create(name, flags);
if (r >= 0) {
printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
- name, flags);
+ name ? name : "NULL", flags);
close(r);
abort();
}
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index 8f01f4da1c0d..121000c28c10 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -27,6 +27,7 @@ protection_keys_64
madv_populate
uffd-stress
uffd-unit-tests
+uffd-wp-mremap
mlock-intersect-test
mlock-random-test
virtual_address_range
@@ -36,6 +37,9 @@ map_fixed_noreplace
write_to_hugetlbfs
hmm-tests
memfd_secret
+hugetlb_dio
+pkey_sighandler_tests_32
+pkey_sighandler_tests_64
soft-dirty
split_huge_page_test
ksm_tests
@@ -49,7 +53,6 @@ va_high_addr_switch
hugetlb_fault_after_madv
hugetlb_madv_vs_map
mseal_test
-seal_elf
droppable
hugetlb_dio
pkey_sighandler_tests_32
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 3de23ea4663f..63ce39d024bb 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -33,9 +33,16 @@ endif
# LDLIBS.
MAKEFLAGS += --no-builtin-rules
-CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+CFLAGS = -Wall -O2 -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
LDLIBS = -lrt -lpthread -lm
+# Some distributions (such as Ubuntu) configure GCC so that _FORTIFY_SOURCE is
+# automatically enabled at -O1 or above. This triggers various unused-result
+# warnings where functions such as read() or write() are called and their
+# return value is not checked. Disable _FORTIFY_SOURCE to silence those
+# warnings.
+CFLAGS += -U_FORTIFY_SOURCE
+
KDIR ?= /lib/modules/$(shell uname -r)/build
ifneq (,$(wildcard $(KDIR)/Module.symvers))
ifneq (,$(wildcard $(KDIR)/include/linux/page_frag_cache.h))
@@ -75,13 +82,13 @@ TEST_GEN_FILES += mrelease_test
TEST_GEN_FILES += mremap_dontunmap
TEST_GEN_FILES += mremap_test
TEST_GEN_FILES += mseal_test
-TEST_GEN_FILES += seal_elf
TEST_GEN_FILES += on-fault-limit
TEST_GEN_FILES += pagemap_ioctl
TEST_GEN_FILES += thuge-gen
TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += uffd-stress
TEST_GEN_FILES += uffd-unit-tests
+TEST_GEN_FILES += uffd-wp-mremap
TEST_GEN_FILES += split_huge_page_test
TEST_GEN_FILES += ksm_tests
TEST_GEN_FILES += ksm_functional_tests
@@ -152,11 +159,16 @@ $(TEST_GEN_FILES): vm_util.c thp_settings.c
$(OUTPUT)/uffd-stress: uffd-common.c
$(OUTPUT)/uffd-unit-tests: uffd-common.c
+$(OUTPUT)/uffd-wp-mremap: uffd-common.c
+$(OUTPUT)/protection_keys: pkey_util.c
+$(OUTPUT)/pkey_sighandler_tests: pkey_util.c
ifeq ($(ARCH),x86_64)
BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
+$(BINARIES_32) $(BINARIES_64): pkey_util.c
+
define gen-target-rule-32
$(1) $(1)_32: $(OUTPUT)/$(1)_32
.PHONY: $(1) $(1)_32
diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config
index 4309916f629e..a28baa536332 100644
--- a/tools/testing/selftests/mm/config
+++ b/tools/testing/selftests/mm/config
@@ -7,3 +7,4 @@ CONFIG_TEST_HMM=m
CONFIG_GUP_TEST=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_MEM_SOFT_DIRTY=y
+CONFIG_ANON_VMA_NAME=y
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index 1238e1c5aae1..9446673645eb 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -1482,7 +1482,7 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
}
smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
- if (mem == MAP_FAILED) {
+ if (smem == MAP_FAILED) {
ksft_test_result_fail("mmap() failed\n");
goto munmap;
}
@@ -1583,7 +1583,7 @@ static void run_with_memfd(non_anon_test_fn fn, const char *desc)
goto close;
}
smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
- if (mem == MAP_FAILED) {
+ if (smem == MAP_FAILED) {
ksft_test_result_fail("mmap() failed\n");
goto munmap;
}
@@ -1634,7 +1634,7 @@ static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
goto close;
}
smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
- if (mem == MAP_FAILED) {
+ if (smem == MAP_FAILED) {
ksft_test_result_fail("mmap() failed\n");
goto munmap;
}
@@ -1684,7 +1684,7 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
goto close;
}
smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0);
- if (mem == MAP_FAILED) {
+ if (smem == MAP_FAILED) {
ksft_test_result_fail("mmap() failed\n");
goto munmap;
}
@@ -1696,7 +1696,7 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
fn(mem, smem, hugetlbsize);
munmap:
munmap(mem, hugetlbsize);
- if (mem != MAP_FAILED)
+ if (smem != MAP_FAILED)
munmap(smem, hugetlbsize);
close:
close(fd);
diff --git a/tools/testing/selftests/mm/guard-pages.c b/tools/testing/selftests/mm/guard-pages.c
index 7cdf815d0d63..525c50d3ec23 100644
--- a/tools/testing/selftests/mm/guard-pages.c
+++ b/tools/testing/selftests/mm/guard-pages.c
@@ -19,6 +19,8 @@
#include <sys/uio.h>
#include <unistd.h>
+#include "../pidfd/pidfd.h"
+
/*
* Ignore the checkpatch warning, as per the C99 standard, section 7.14.1.1:
*
@@ -50,9 +52,10 @@ static void handle_fatal(int c)
siglongjmp(signal_jmp_buf, c);
}
-static int pidfd_open(pid_t pid, unsigned int flags)
+static ssize_t sys_process_madvise(int pidfd, const struct iovec *iovec,
+ size_t n, int advice, unsigned int flags)
{
- return syscall(SYS_pidfd_open, pid, flags);
+ return syscall(__NR_process_madvise, pidfd, iovec, n, advice, flags);
}
/*
@@ -364,14 +367,10 @@ TEST_F(guard_pages, multi_vma)
TEST_F(guard_pages, process_madvise)
{
const unsigned long page_size = self->page_size;
- pid_t pid = getpid();
- int pidfd = pidfd_open(pid, 0);
char *ptr_region, *ptr1, *ptr2, *ptr3;
ssize_t count;
struct iovec vec[6];
- ASSERT_NE(pidfd, -1);
-
/* Reserve region to map over. */
ptr_region = mmap(NULL, 100 * page_size, PROT_NONE,
MAP_ANON | MAP_PRIVATE, -1, 0);
@@ -419,7 +418,7 @@ TEST_F(guard_pages, process_madvise)
ASSERT_EQ(munmap(&ptr_region[99 * page_size], page_size), 0);
/* Now guard in one step. */
- count = process_madvise(pidfd, vec, 6, MADV_GUARD_INSTALL, 0);
+ count = sys_process_madvise(PIDFD_SELF, vec, 6, MADV_GUARD_INSTALL, 0);
/* OK we don't have permission to do this, skip. */
if (count == -1 && errno == EPERM)
@@ -440,7 +439,7 @@ TEST_F(guard_pages, process_madvise)
ASSERT_FALSE(try_read_write_buf(&ptr3[19 * page_size]));
/* Now do the same with unguard... */
- count = process_madvise(pidfd, vec, 6, MADV_GUARD_REMOVE, 0);
+ count = sys_process_madvise(PIDFD_SELF, vec, 6, MADV_GUARD_REMOVE, 0);
/* ...and everything should now succeed. */
@@ -457,7 +456,6 @@ TEST_F(guard_pages, process_madvise)
ASSERT_EQ(munmap(ptr1, 10 * page_size), 0);
ASSERT_EQ(munmap(ptr2, 5 * page_size), 0);
ASSERT_EQ(munmap(ptr3, 20 * page_size), 0);
- close(pidfd);
}
/* Assert that unmapping ranges does not leave guard markers behind. */
@@ -990,7 +988,7 @@ TEST_F(guard_pages, fork)
MAP_ANON | MAP_PRIVATE, -1, 0);
ASSERT_NE(ptr, MAP_FAILED);
- /* Establish guard apges in the first 5 pages. */
+ /* Establish guard pages in the first 5 pages. */
ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0);
pid = fork();
@@ -1030,6 +1028,77 @@ TEST_F(guard_pages, fork)
}
/*
+ * Assert expected behaviour after we fork populated ranges of anonymous memory
+ * and then guard and unguard the range.
+ */
+TEST_F(guard_pages, fork_cow)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ pid_t pid;
+ int i;
+
+ /* Map 10 pages. */
+ ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Populate range. */
+ for (i = 0; i < 10 * page_size; i++) {
+ char chr = 'a' + (i % 26);
+
+ ptr[i] = chr;
+ }
+
+ pid = fork();
+ ASSERT_NE(pid, -1);
+ if (!pid) {
+ /* This is the child process now. */
+
+ /* Ensure the range is as expected. */
+ for (i = 0; i < 10 * page_size; i++) {
+ char expected = 'a' + (i % 26);
+ char actual = ptr[i];
+
+ ASSERT_EQ(actual, expected);
+ }
+
+ /* Establish guard pages across the whole range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+ /* Remove it. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /*
+ * By removing the guard pages, the page tables will be
+ * cleared. Assert that we are looking at the zero page now.
+ */
+ for (i = 0; i < 10 * page_size; i++) {
+ char actual = ptr[i];
+
+ ASSERT_EQ(actual, '\0');
+ }
+
+ exit(0);
+ }
+
+ /* Parent process. */
+
+ /* Parent simply waits on child. */
+ waitpid(pid, NULL, 0);
+
+ /* Ensure the range is unchanged in parent anon range. */
+ for (i = 0; i < 10 * page_size; i++) {
+ char expected = 'a' + (i % 26);
+ char actual = ptr[i];
+
+ ASSERT_EQ(actual, expected);
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
* Assert that forking a process with VMAs that do have VM_WIPEONFORK set
* behave as expected.
*/
diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c
index ada9156cc497..c463d1c09c9b 100644
--- a/tools/testing/selftests/mm/hugepage-mremap.c
+++ b/tools/testing/selftests/mm/hugepage-mremap.c
@@ -15,7 +15,7 @@
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
-#include <asm-generic/unistd.h>
+#include <unistd.h>
#include <sys/mman.h>
#include <errno.h>
#include <fcntl.h> /* Definition of O_* constants */
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
index 66b4e111b5a2..b61803e36d1c 100644
--- a/tools/testing/selftests/mm/ksm_functional_tests.c
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -11,7 +11,7 @@
#include <string.h>
#include <stdbool.h>
#include <stdint.h>
-#include <asm-generic/unistd.h>
+#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/mman.h>
@@ -369,6 +369,7 @@ unmap:
munmap(map, size);
}
+#ifdef __NR_userfaultfd
static void test_unmerge_uffd_wp(void)
{
struct uffdio_writeprotect uffd_writeprotect;
@@ -429,6 +430,7 @@ close_uffd:
unmap:
munmap(map, size);
}
+#endif
/* Verify that KSM can be enabled / queried with prctl. */
static void test_prctl(void)
@@ -684,7 +686,9 @@ int main(int argc, char **argv)
exit(test_child_ksm());
}
+#ifdef __NR_userfaultfd
tests++;
+#endif
ksft_print_header();
ksft_set_plan(tests);
@@ -696,7 +700,9 @@ int main(int argc, char **argv)
test_unmerge();
test_unmerge_zero_pages();
test_unmerge_discarded();
+#ifdef __NR_userfaultfd
test_unmerge_uffd_wp();
+#endif
test_prot_none();
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c
index b748c48908d9..dcdd5bb20f3d 100644
--- a/tools/testing/selftests/mm/ksm_tests.c
+++ b/tools/testing/selftests/mm/ksm_tests.c
@@ -776,7 +776,7 @@ err_out:
int main(int argc, char *argv[])
{
- int ret, opt;
+ int ret = 0, opt;
int prot = 0;
int ksm_scan_limit_sec = KSM_SCAN_LIMIT_SEC_DEFAULT;
int merge_type = KSM_MERGE_TYPE_DEFAULT;
diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c
index 74c911aa3aea..9a0597310a76 100644
--- a/tools/testing/selftests/mm/memfd_secret.c
+++ b/tools/testing/selftests/mm/memfd_secret.c
@@ -17,7 +17,7 @@
#include <stdlib.h>
#include <string.h>
-#include <asm-generic/unistd.h>
+#include <unistd.h>
#include <errno.h>
#include <stdio.h>
#include <fcntl.h>
@@ -28,6 +28,8 @@
#define pass(fmt, ...) ksft_test_result_pass(fmt, ##__VA_ARGS__)
#define skip(fmt, ...) ksft_test_result_skip(fmt, ##__VA_ARGS__)
+#ifdef __NR_memfd_secret
+
#define PATTERN 0x55
static const int prot = PROT_READ | PROT_WRITE;
@@ -332,3 +334,13 @@ int main(int argc, char *argv[])
ksft_finished();
}
+
+#else /* __NR_memfd_secret */
+
+int main(int argc, char *argv[])
+{
+ printf("skip: skipping memfd_secret test (missing __NR_memfd_secret)\n");
+ return KSFT_SKIP;
+}
+
+#endif /* __NR_memfd_secret */
diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c
index 64bcbb7151cf..1e3a595fbf01 100644
--- a/tools/testing/selftests/mm/migration.c
+++ b/tools/testing/selftests/mm/migration.c
@@ -204,4 +204,103 @@ TEST_F_TIMEOUT(migration, private_anon_thp, 2*RUNTIME)
ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
}
+/*
+ * migration test with shared anon THP page
+ */
+
+TEST_F_TIMEOUT(migration, shared_anon_thp, 2*RUNTIME)
+{
+ pid_t pid;
+ uint64_t *ptr;
+ int i;
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ ptr = mmap(NULL, 2 * TWOMEG, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ ptr = (uint64_t *) ALIGN((uintptr_t) ptr, TWOMEG);
+ ASSERT_EQ(madvise(ptr, TWOMEG, MADV_HUGEPAGE), 0);
+
+ memset(ptr, 0xde, TWOMEG);
+ for (i = 0; i < self->nthreads - 1; i++) {
+ pid = fork();
+ if (!pid) {
+ prctl(PR_SET_PDEATHSIG, SIGHUP);
+ /* Parent may have died before prctl so check now. */
+ if (getppid() == 1)
+ kill(getpid(), SIGHUP);
+ access_mem(ptr);
+ } else {
+ self->pids[i] = pid;
+ }
+ }
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+ for (i = 0; i < self->nthreads - 1; i++)
+ ASSERT_EQ(kill(self->pids[i], SIGTERM), 0);
+}
+
+/*
+ * migration test with private anon hugetlb page
+ */
+TEST_F_TIMEOUT(migration, private_anon_htlb, 2*RUNTIME)
+{
+ uint64_t *ptr;
+ int i;
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ memset(ptr, 0xde, TWOMEG);
+ for (i = 0; i < self->nthreads - 1; i++)
+ if (pthread_create(&self->threads[i], NULL, access_mem, ptr))
+ perror("Couldn't create thread");
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+ for (i = 0; i < self->nthreads - 1; i++)
+ ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
+}
+
+/*
+ * migration test with shared anon hugetlb page
+ */
+TEST_F_TIMEOUT(migration, shared_anon_htlb, 2*RUNTIME)
+{
+ pid_t pid;
+ uint64_t *ptr;
+ int i;
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ memset(ptr, 0xde, TWOMEG);
+ for (i = 0; i < self->nthreads - 1; i++) {
+ pid = fork();
+ if (!pid) {
+ prctl(PR_SET_PDEATHSIG, SIGHUP);
+ /* Parent may have died before prctl so check now. */
+ if (getppid() == 1)
+ kill(getpid(), SIGHUP);
+ access_mem(ptr);
+ } else {
+ self->pids[i] = pid;
+ }
+ }
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+ for (i = 0; i < self->nthreads - 1; i++)
+ ASSERT_EQ(kill(self->pids[i], SIGTERM), 0);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c
index 1db134063c38..09feeb453646 100644
--- a/tools/testing/selftests/mm/mkdirty.c
+++ b/tools/testing/selftests/mm/mkdirty.c
@@ -9,7 +9,7 @@
*/
#include <fcntl.h>
#include <signal.h>
-#include <asm-generic/unistd.h>
+#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
@@ -265,6 +265,7 @@ munmap:
munmap(mmap_mem, mmap_size);
}
+#ifdef __NR_userfaultfd
static void test_uffdio_copy(void)
{
struct uffdio_register uffdio_register;
@@ -280,6 +281,7 @@ static void test_uffdio_copy(void)
dst = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE|MAP_ANON, -1, 0);
if (dst == MAP_FAILED) {
ksft_test_result_fail("mmap() failed\n");
+ free(src);
return;
}
@@ -321,6 +323,7 @@ munmap:
munmap(dst, pagesize);
free(src);
}
+#endif /* __NR_userfaultfd */
int main(void)
{
@@ -333,7 +336,9 @@ int main(void)
thpsize / 1024);
tests += 3;
}
+#ifdef __NR_userfaultfd
tests += 1;
+#endif /* __NR_userfaultfd */
ksft_print_header();
ksft_set_plan(tests);
@@ -363,7 +368,9 @@ int main(void)
if (thpsize)
test_pte_mapped_thp();
/* Placing a fresh page via userfaultfd may set the PTE dirty. */
+#ifdef __NR_userfaultfd
test_uffdio_copy();
+#endif /* __NR_userfaultfd */
err = ksft_get_fail_cnt();
if (err)
diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h
index 1e5731bab499..4417eaa5cfb7 100644
--- a/tools/testing/selftests/mm/mlock2.h
+++ b/tools/testing/selftests/mm/mlock2.h
@@ -3,7 +3,6 @@
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
-#include <asm-generic/unistd.h>
static int mlock2_(void *start, size_t len, int flags)
{
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c
index 5a3a9bcba640..bb84476a177f 100644
--- a/tools/testing/selftests/mm/mremap_test.c
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -34,7 +34,7 @@ struct config {
unsigned long long dest_alignment;
unsigned long long region_size;
int overlapping;
- int dest_preamble_size;
+ unsigned int dest_preamble_size;
};
struct test {
@@ -328,7 +328,7 @@ static void mremap_move_within_range(unsigned int pattern_seed, char *rand_addr)
{
char *test_name = "mremap mremap move within range";
void *src, *dest;
- int i, success = 1;
+ unsigned int i, success = 1;
size_t size = SIZE_MB(20);
void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
@@ -384,7 +384,7 @@ out:
static long long remap_region(struct config c, unsigned int threshold_mb,
char *rand_addr)
{
- void *addr, *src_addr, *dest_addr, *dest_preamble_addr;
+ void *addr, *src_addr, *dest_addr, *dest_preamble_addr = NULL;
unsigned long long t, d;
struct timespec t_start = {0, 0}, t_end = {0, 0};
long long start_ns, end_ns, align_mask, ret, offset;
@@ -569,7 +569,7 @@ static void mremap_move_1mb_from_start(unsigned int pattern_seed,
{
char *test_name = "mremap move 1mb from start at 1MB+256KB aligned src";
void *src = NULL, *dest = NULL;
- int i, success = 1;
+ unsigned int i, success = 1;
/* Config to reuse get_source_mapping() to do an aligned mmap. */
struct config c = {
@@ -636,7 +636,7 @@ out:
static void run_mremap_test_case(struct test test_case, int *failures,
unsigned int threshold_mb,
- unsigned int pattern_seed, char *rand_addr)
+ char *rand_addr)
{
long long remap_time = remap_region(test_case.config, threshold_mb,
rand_addr);
@@ -708,7 +708,8 @@ static int parse_args(int argc, char **argv, unsigned int *threshold_mb,
int main(int argc, char **argv)
{
int failures = 0;
- int i, run_perf_tests;
+ unsigned int i;
+ int run_perf_tests;
unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
/* hard-coded test configs */
@@ -831,7 +832,7 @@ int main(int argc, char **argv)
for (i = 0; i < ARRAY_SIZE(test_cases); i++)
run_mremap_test_case(test_cases[i], &failures, threshold_mb,
- pattern_seed, rand_addr);
+ rand_addr);
maps_fp = fopen("/proc/self/maps", "r");
@@ -853,7 +854,7 @@ int main(int argc, char **argv)
"mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:");
for (i = 0; i < ARRAY_SIZE(perf_test_cases); i++)
run_mremap_test_case(perf_test_cases[i], &failures,
- threshold_mb, pattern_seed,
+ threshold_mb,
rand_addr);
}
diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c
index 01675c412b2a..005f29c86484 100644
--- a/tools/testing/selftests/mm/mseal_test.c
+++ b/tools/testing/selftests/mm/mseal_test.c
@@ -218,7 +218,7 @@ bool seal_support(void)
bool pkey_supported(void)
{
#if defined(__i386__) || defined(__x86_64__) /* arch */
- int pkey = sys_pkey_alloc(0, 0);
+ int pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
if (pkey > 0)
return true;
@@ -802,7 +802,7 @@ static void test_seal_mprotect_partial_mprotect_tail(bool seal)
}
-static void test_seal_mprotect_two_vma_with_gap(bool seal)
+static void test_seal_mprotect_two_vma_with_gap(void)
{
void *ptr;
unsigned long page_size = getpagesize();
@@ -1671,7 +1671,7 @@ static void test_seal_discard_ro_anon_on_pkey(bool seal)
setup_single_address_rw(size, &ptr);
FAIL_TEST_IF_FALSE(ptr != (void *)-1);
- pkey = sys_pkey_alloc(0, 0);
+ pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
FAIL_TEST_IF_FALSE(pkey > 0);
ret = sys_mprotect_pkey((void *)ptr, size, PROT_READ | PROT_WRITE, pkey);
@@ -1683,7 +1683,7 @@ static void test_seal_discard_ro_anon_on_pkey(bool seal)
}
/* sealing doesn't take effect if PKRU allow write. */
- set_pkey(pkey, 0);
+ set_pkey(pkey, PKEY_UNRESTRICTED);
ret = sys_madvise(ptr, size, MADV_DONTNEED);
FAIL_TEST_IF_FALSE(!ret);
@@ -1864,7 +1864,7 @@ static void test_seal_madvise_nodiscard(bool seal)
REPORT_TEST_PASS();
}
-int main(int argc, char **argv)
+int main(void)
{
bool test_seal = seal_support();
@@ -1913,8 +1913,8 @@ int main(int argc, char **argv)
test_seal_mprotect_partial_mprotect(false);
test_seal_mprotect_partial_mprotect(true);
- test_seal_mprotect_two_vma_with_gap(false);
- test_seal_mprotect_two_vma_with_gap(true);
+ test_seal_mprotect_two_vma_with_gap();
+ test_seal_mprotect_two_vma_with_gap();
test_seal_mprotect_merge(false);
test_seal_mprotect_merge(true);
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
index bcc73b4e805c..57b4bba2b45f 100644
--- a/tools/testing/selftests/mm/pagemap_ioctl.c
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -34,8 +34,8 @@
#define PAGEMAP "/proc/self/pagemap"
int pagemap_fd;
int uffd;
-int page_size;
-int hpage_size;
+unsigned int page_size;
+unsigned int hpage_size;
const char *progname;
#define LEN(region) ((region.end - region.start)/page_size)
@@ -235,7 +235,9 @@ int get_reads(struct page_region *vec, int vec_size)
int sanity_tests_sd(void)
{
- int mem_size, vec_size, ret, ret2, ret3, i, num_pages = 1000, total_pages = 0;
+ unsigned long long mem_size, vec_size, i, total_pages = 0;
+ long ret, ret2, ret3;
+ int num_pages = 1000;
int total_writes, total_reads, reads, count;
struct page_region *vec, *vec2;
char *mem, *m[2];
@@ -321,9 +323,9 @@ int sanity_tests_sd(void)
ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0,
0, PAGE_IS_WRITTEN);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
- ksft_test_result(ret == mem_size/(page_size * 2),
+ ksft_test_result((unsigned long long)ret == mem_size/(page_size * 2),
"%s Repeated pattern of written and non-written pages\n", __func__);
/* 4. Repeated pattern of written and non-written pages in parts */
@@ -331,21 +333,21 @@ int sanity_tests_sd(void)
PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
num_pages/2 - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
ret2 = pagemap_ioctl(mem, mem_size, vec, 2, 0, 0, PAGE_IS_WRITTEN, 0, 0,
PAGE_IS_WRITTEN);
if (ret2 < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret2, errno, strerror(errno));
ret3 = pagemap_ioctl(mem, mem_size, vec, vec_size,
PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
if (ret3 < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret3, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret3, errno, strerror(errno));
ksft_test_result((ret + ret3) == num_pages/2 && ret2 == 2,
- "%s Repeated pattern of written and non-written pages in parts %d %d %d\n",
+ "%s Repeated pattern of written and non-written pages in parts %ld %ld %ld\n",
__func__, ret, ret3, ret2);
/* 5. Repeated pattern of written and non-written pages max_pages */
@@ -357,13 +359,13 @@ int sanity_tests_sd(void)
PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
num_pages/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
ret2 = pagemap_ioctl(mem, mem_size, vec, vec_size,
PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
if (ret2 < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret2, errno, strerror(errno));
ksft_test_result(ret == num_pages/2 && ret2 == 1,
"%s Repeated pattern of written and non-written pages max_pages\n",
@@ -378,12 +380,12 @@ int sanity_tests_sd(void)
PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
ret2 = pagemap_ioctl(mem, mem_size, vec2, vec_size, 0, 0,
PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
if (ret2 < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret2, errno, strerror(errno));
ksft_test_result(ret == 1 && LEN(vec[0]) == 2 &&
vec[0].start == (uintptr_t)(mem + page_size) &&
@@ -416,7 +418,7 @@ int sanity_tests_sd(void)
ret = pagemap_ioctl(m[1], mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0,
PAGE_IS_WRITTEN);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
ksft_test_result(ret == 1 && LEN(vec[0]) == mem_size/page_size,
"%s Two regions\n", __func__);
@@ -448,7 +450,7 @@ int sanity_tests_sd(void)
PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0,
PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
for (i = 0; i < mem_size/page_size; i += 2)
mem[i * page_size]++;
@@ -457,7 +459,7 @@ int sanity_tests_sd(void)
PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
total_pages += ret;
@@ -465,7 +467,7 @@ int sanity_tests_sd(void)
PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
total_pages += ret;
@@ -473,7 +475,7 @@ int sanity_tests_sd(void)
PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
total_pages += ret;
@@ -515,9 +517,9 @@ int sanity_tests_sd(void)
vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
- if (ret > vec_size)
+ if ((unsigned long)ret > vec_size)
break;
reads = get_reads(vec, ret);
@@ -554,63 +556,63 @@ int sanity_tests_sd(void)
ret = pagemap_ioc(mem, 0, vec, vec_size, 0,
0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
ksft_test_result(ret == 0 && walk_end == (long)mem,
"Walk_end: Same start and end address\n");
ret = pagemap_ioc(mem, 0, vec, vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
ksft_test_result(ret == 0 && walk_end == (long)mem,
"Walk_end: Same start and end with WP\n");
ret = pagemap_ioc(mem, 0, vec, 0, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
ksft_test_result(ret == 0 && walk_end == (long)mem,
"Walk_end: Same start and end with 0 output buffer\n");
ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
"Walk_end: Big vec\n");
ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
"Walk_end: vec of minimum length\n");
ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
"Walk_end: Max pages specified\n");
ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size/2),
"Walk_end: Half max pages\n");
ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size),
"Walk_end: 1 max page\n");
ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
-1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
"Walk_end: max pages\n");
@@ -621,49 +623,49 @@ int sanity_tests_sd(void)
ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
- ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size),
"Walk_end sparse: Big vec\n");
ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2),
"Walk_end sparse: vec of minimum length\n");
ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2),
"Walk_end sparse: Max pages specified\n");
ret = pagemap_ioc(mem, mem_size, vec, vec_size/2, 0,
vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
- ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size),
"Walk_end sparse: Max pages specified\n");
ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
- ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size),
"Walk_end sparse: Max pages specified\n");
ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
- ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size),
"Walk_endsparse : Half max pages\n");
ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2),
"Walk_end: 1 max page\n");
@@ -674,9 +676,10 @@ int sanity_tests_sd(void)
return 0;
}
-int base_tests(char *prefix, char *mem, int mem_size, int skip)
+int base_tests(char *prefix, char *mem, unsigned long long mem_size, int skip)
{
- int vec_size, written;
+ unsigned long long vec_size;
+ int written;
struct page_region *vec, *vec2;
if (skip) {
@@ -799,8 +802,8 @@ int hpage_unit_tests(void)
char *map;
int ret, ret2;
size_t num_pages = 10;
- int map_size = hpage_size * num_pages;
- int vec_size = map_size/page_size;
+ unsigned long long map_size = hpage_size * num_pages;
+ unsigned long long vec_size = map_size/page_size;
struct page_region *vec, *vec2;
vec = malloc(sizeof(struct page_region) * vec_size);
@@ -1047,7 +1050,8 @@ static void test_simple(void)
int sanity_tests(void)
{
- int mem_size, vec_size, ret, fd, i, buf_size;
+ unsigned long long mem_size, vec_size;
+ int ret, fd, i, buf_size;
struct page_region *vec;
char *mem, *fmem;
struct stat sbuf;
@@ -1312,7 +1316,9 @@ static ssize_t get_dirty_pages_reset(char *mem, unsigned int count,
{
struct pm_scan_arg arg = {0};
struct page_region rgns[256];
- int i, j, cnt, ret;
+ unsigned long long i, j;
+ long ret;
+ int cnt;
arg.size = sizeof(struct pm_scan_arg);
arg.start = (uintptr_t)mem;
@@ -1330,7 +1336,7 @@ static ssize_t get_dirty_pages_reset(char *mem, unsigned int count,
ksft_exit_fail_msg("ioctl failed\n");
cnt = 0;
- for (i = 0; i < ret; ++i) {
+ for (i = 0; i < (unsigned long)ret; ++i) {
if (rgns[i].categories != PAGE_IS_WRITTEN)
ksft_exit_fail_msg("wrong flags\n");
@@ -1384,9 +1390,10 @@ void *thread_proc(void *mem)
static void transact_test(int page_size)
{
unsigned int i, count, extra_pages;
+ unsigned int c;
pthread_t th;
char *mem;
- int ret, c;
+ int ret;
if (pthread_barrier_init(&start_barrier, NULL, nthreads + 1))
ksft_exit_fail_msg("pthread_barrier_init\n");
@@ -1405,9 +1412,9 @@ static void transact_test(int page_size)
memset(mem, 0, 0x1000 * nthreads * pages_per_thread);
count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size);
- ksft_test_result(count > 0, "%s count %d\n", __func__, count);
+ ksft_test_result(count > 0, "%s count %u\n", __func__, count);
count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size);
- ksft_test_result(count == 0, "%s count %d\n", __func__, count);
+ ksft_test_result(count == 0, "%s count %u\n", __func__, count);
finish = 0;
for (i = 0; i < nthreads; ++i)
@@ -1429,7 +1436,7 @@ static void transact_test(int page_size)
ksft_exit_fail_msg("pthread_barrier_wait\n");
if (count > nthreads * access_per_thread)
- ksft_exit_fail_msg("Too big count %d expected %d, iter %d\n",
+ ksft_exit_fail_msg("Too big count %u expected %u, iter %u\n",
count, nthreads * access_per_thread, i);
c = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size);
@@ -1454,7 +1461,7 @@ static void transact_test(int page_size)
* access and application gets page fault again for the same write.
*/
if (count < nthreads * access_per_thread) {
- ksft_test_result_fail("Lost update, iter %d, %d vs %d.\n", i, count,
+ ksft_test_result_fail("Lost update, iter %u, %u vs %u.\n", i, count,
nthreads * access_per_thread);
return;
}
@@ -1467,15 +1474,16 @@ static void transact_test(int page_size)
finish = 1;
pthread_barrier_wait(&end_barrier);
- ksft_test_result_pass("%s Extra pages %u (%.1lf%%), extra thread faults %d.\n", __func__,
+ ksft_test_result_pass("%s Extra pages %u (%.1lf%%), extra thread faults %u.\n", __func__,
extra_pages,
100.0 * extra_pages / (iter_count * nthreads * access_per_thread),
extra_thread_faults);
}
-int main(int argc, char *argv[])
+int main(int __attribute__((unused)) argc, char *argv[])
{
- int mem_size, shmid, buf_size, fd, i, ret;
+ int shmid, buf_size, fd, i, ret;
+ unsigned long long mem_size;
char *mem, *map, *fmem;
struct stat sbuf;
diff --git a/tools/testing/selftests/mm/pkey-arm64.h b/tools/testing/selftests/mm/pkey-arm64.h
index d9d2100eafc0..8e9685e03c44 100644
--- a/tools/testing/selftests/mm/pkey-arm64.h
+++ b/tools/testing/selftests/mm/pkey-arm64.h
@@ -30,7 +30,7 @@
#define NR_PKEYS 8
#define NR_RESERVED_PKEYS 1 /* pkey-0 */
-#define PKEY_ALLOW_ALL 0x77777777
+#define PKEY_REG_ALLOW_ALL 0x77777777
#define PKEY_REG_ALLOW_NONE 0x0
#define PKEY_BITS_PER_PKEY 4
@@ -81,11 +81,11 @@ static inline int get_arch_reserved_keys(void)
return NR_RESERVED_PKEYS;
}
-void expect_fault_on_read_execonly_key(void *p1, int pkey)
+static inline void expect_fault_on_read_execonly_key(void *p1, int pkey)
{
}
-void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
+static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
{
return PTR_ERR_ENOTSUP;
}
diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h
index f7cfe163b0ff..ea404f80e6cb 100644
--- a/tools/testing/selftests/mm/pkey-helpers.h
+++ b/tools/testing/selftests/mm/pkey-helpers.h
@@ -13,22 +13,23 @@
#include <ucontext.h>
#include <sys/mman.h>
+#include <linux/mman.h>
+#include <linux/types.h>
+
#include "../kselftest.h"
/* Define some kernel-like types */
-#define u8 __u8
-#define u16 __u16
-#define u32 __u32
-#define u64 __u64
+typedef __u8 u8;
+typedef __u16 u16;
+typedef __u32 u32;
+typedef __u64 u64;
#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
#ifndef DEBUG_LEVEL
#define DEBUG_LEVEL 0
#endif
-#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
extern int dprint_in_signal;
-extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
extern int test_nr;
extern int iteration_nr;
@@ -83,17 +84,18 @@ extern void abort_hooks(void);
#ifndef noinline
# define noinline __attribute__((noinline))
#endif
+#ifndef __maybe_unused
+# define __maybe_unused __attribute__((__unused__))
+#endif
-noinline int read_ptr(int *ptr)
-{
- /* Keep GCC from optimizing this away somehow */
- barrier();
- return *ptr;
-}
-
-void expected_pkey_fault(int pkey);
int sys_pkey_alloc(unsigned long flags, unsigned long init_val);
int sys_pkey_free(unsigned long pkey);
+int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey);
+
+/* For functions called from protection_keys.c only */
+noinline int read_ptr(int *ptr);
+void expected_pkey_fault(int pkey);
int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
unsigned long pkey);
void record_pkey_malloc(void *ptr, long size, int prot);
@@ -171,38 +173,6 @@ static inline void write_pkey_reg(u64 pkey_reg)
pkey_reg, __read_pkey_reg());
}
-/*
- * These are technically racy. since something could
- * change PKEY register between the read and the write.
- */
-static inline void __pkey_access_allow(int pkey, int do_allow)
-{
- u64 pkey_reg = read_pkey_reg();
- int bit = pkey * 2;
-
- if (do_allow)
- pkey_reg &= (1<<bit);
- else
- pkey_reg |= (1<<bit);
-
- dprintf4("pkey_reg now: %016llx\n", read_pkey_reg());
- write_pkey_reg(pkey_reg);
-}
-
-static inline void __pkey_write_allow(int pkey, int do_allow_write)
-{
- u64 pkey_reg = read_pkey_reg();
- int bit = pkey * 2 + 1;
-
- if (do_allow_write)
- pkey_reg &= (1<<bit);
- else
- pkey_reg |= (1<<bit);
-
- write_pkey_reg(pkey_reg);
- dprintf4("pkey_reg now: %016llx\n", read_pkey_reg());
-}
-
#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
#define ALIGN_PTR_UP(p, ptr_align_to) \
@@ -224,7 +194,7 @@ static inline u32 *siginfo_get_pkey_ptr(siginfo_t *si)
static inline int kernel_has_pkeys(void)
{
/* try allocating a key and see if it succeeds */
- int ret = sys_pkey_alloc(0, 0);
+ int ret = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
if (ret <= 0) {
return 0;
}
diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h
index 3d0c0bdae5bc..1bad310d282a 100644
--- a/tools/testing/selftests/mm/pkey-powerpc.h
+++ b/tools/testing/selftests/mm/pkey-powerpc.h
@@ -91,7 +91,7 @@ static inline int get_arch_reserved_keys(void)
return NR_RESERVED_PKEYS_64K_3KEYS;
}
-void expect_fault_on_read_execonly_key(void *p1, int pkey)
+static inline void expect_fault_on_read_execonly_key(void *p1, int pkey)
{
/*
* powerpc does not allow userspace to change permissions of exec-only
@@ -105,7 +105,7 @@ void expect_fault_on_read_execonly_key(void *p1, int pkey)
/* 4-byte instructions * 16384 = 64K page */
#define __page_o_noops() asm(".rept 16384 ; nop; .endr")
-void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
+static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
{
void *ptr;
int ret;
diff --git a/tools/testing/selftests/mm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h
index ac91777c8917..f7ecd335df1e 100644
--- a/tools/testing/selftests/mm/pkey-x86.h
+++ b/tools/testing/selftests/mm/pkey-x86.h
@@ -113,7 +113,7 @@ static inline u32 pkey_bit_position(int pkey)
#define XSTATE_PKEY 0x200
#define XSTATE_BV_OFFSET 512
-int pkey_reg_xstate_offset(void)
+static inline int pkey_reg_xstate_offset(void)
{
unsigned int eax;
unsigned int ebx;
@@ -148,7 +148,7 @@ static inline int get_arch_reserved_keys(void)
return NR_RESERVED_PKEYS;
}
-void expect_fault_on_read_execonly_key(void *p1, int pkey)
+static inline void expect_fault_on_read_execonly_key(void *p1, int pkey)
{
int ptr_contents;
@@ -157,7 +157,7 @@ void expect_fault_on_read_execonly_key(void *p1, int pkey)
expected_pkey_fault(pkey);
}
-void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
+static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
{
return PTR_ERR_ENOTSUP;
}
diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c
index c593a426341c..b5e076a564c9 100644
--- a/tools/testing/selftests/mm/pkey_sighandler_tests.c
+++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c
@@ -32,11 +32,9 @@
#define STACK_SIZE PTHREAD_STACK_MIN
-void expected_pkey_fault(int pkey) {}
-
-pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
-pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
-siginfo_t siginfo = {0};
+static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+static siginfo_t siginfo = {0};
/*
* We need to use inline assembly instead of glibc's syscall because glibc's
@@ -163,7 +161,7 @@ static void *thread_segv_with_pkey0_disabled(void *ptr)
__write_pkey_reg(pkey_reg_restrictive_default());
/* Segfault (with SEGV_MAPERR) */
- *(int *) (0x1) = 1;
+ *(volatile int *)NULL = 1;
return NULL;
}
@@ -179,7 +177,6 @@ static void *thread_segv_pkuerr_stack(void *ptr)
static void *thread_segv_maperr_ptr(void *ptr)
{
stack_t *stack = ptr;
- int *bad = (int *)1;
u64 pkey_reg;
/*
@@ -195,7 +192,7 @@ static void *thread_segv_maperr_ptr(void *ptr)
__write_pkey_reg(pkey_reg);
/* Segfault */
- *bad = 1;
+ *(volatile int *)NULL = 1;
syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0);
return NULL;
}
@@ -234,7 +231,7 @@ static void test_sigsegv_handler_with_pkey0_disabled(void)
ksft_test_result(siginfo.si_signo == SIGSEGV &&
siginfo.si_code == SEGV_MAPERR &&
- siginfo.si_addr == (void *)1,
+ siginfo.si_addr == NULL,
"%s\n", __func__);
}
@@ -314,11 +311,11 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void)
__write_pkey_reg(pkey_reg);
/* Protect the new stack with MPK 1 */
- pkey = pkey_alloc(0, 0);
- pkey_mprotect(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey);
+ pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
+ sys_mprotect_pkey(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey);
/* Set up alternate signal stack that will use the default MPK */
- sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
+ sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
sigstack.ss_flags = 0;
sigstack.ss_size = STACK_SIZE;
@@ -349,7 +346,7 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void)
ksft_test_result(siginfo.si_signo == SIGSEGV &&
siginfo.si_code == SEGV_MAPERR &&
- siginfo.si_addr == (void *)1,
+ siginfo.si_addr == NULL,
"%s\n", __func__);
}
@@ -487,11 +484,11 @@ static void test_pkru_sigreturn(void)
__write_pkey_reg(pkey_reg);
/* Protect the stack with MPK 2 */
- pkey = pkey_alloc(0, 0);
- pkey_mprotect(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey);
+ pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
+ sys_mprotect_pkey(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey);
/* Set up alternate signal stack that will use the default MPK */
- sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
+ sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
sigstack.ss_flags = 0;
sigstack.ss_size = STACK_SIZE;
@@ -538,6 +535,9 @@ int main(int argc, char *argv[])
ksft_print_header();
ksft_set_plan(ARRAY_SIZE(pkey_tests));
+ if (!is_pkeys_supported())
+ ksft_exit_skip("pkeys not supported\n");
+
for (i = 0; i < ARRAY_SIZE(pkey_tests); i++)
(*pkey_tests[i])();
diff --git a/tools/testing/selftests/mm/pkey_util.c b/tools/testing/selftests/mm/pkey_util.c
new file mode 100644
index 000000000000..ca4ad0d44ab2
--- /dev/null
+++ b/tools/testing/selftests/mm/pkey_util.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "pkey-helpers.h"
+
+int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
+{
+ int ret = syscall(SYS_pkey_alloc, flags, init_val);
+ dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
+ __func__, flags, init_val, ret, errno);
+ return ret;
+}
+
+int sys_pkey_free(unsigned long pkey)
+{
+ int ret = syscall(SYS_pkey_free, pkey);
+ dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
+ return ret;
+}
+
+int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey)
+{
+ int sret;
+
+ dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
+ ptr, size, orig_prot, pkey);
+
+ errno = 0;
+ sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey);
+ if (errno) {
+ dprintf2("SYS_mprotect_key sret: %d\n", sret);
+ dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
+ dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
+ if (DEBUG_LEVEL >= 2)
+ perror("SYS_mprotect_pkey");
+ }
+ return sret;
+}
diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c
index 4990f7ab4cb7..23ebec367015 100644
--- a/tools/testing/selftests/mm/protection_keys.c
+++ b/tools/testing/selftests/mm/protection_keys.c
@@ -42,7 +42,7 @@
#include <sys/wait.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include <asm-generic/unistd.h>
+#include <unistd.h>
#include <sys/ptrace.h>
#include <setjmp.h>
@@ -53,9 +53,15 @@ int test_nr;
u64 shadow_pkey_reg;
int dprint_in_signal;
-char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-void cat_into_file(char *str, char *file)
+noinline int read_ptr(int *ptr)
+{
+ /* Keep GCC from optimizing this away somehow */
+ barrier();
+ return *ptr;
+}
+
+static void cat_into_file(char *str, char *file)
{
int fd = open(file, O_RDWR);
int ret;
@@ -82,7 +88,7 @@ void cat_into_file(char *str, char *file)
#if CONTROL_TRACING > 0
static int warned_tracing;
-int tracing_root_ok(void)
+static int tracing_root_ok(void)
{
if (geteuid() != 0) {
if (!warned_tracing)
@@ -95,7 +101,7 @@ int tracing_root_ok(void)
}
#endif
-void tracing_on(void)
+static void tracing_on(void)
{
#if CONTROL_TRACING > 0
#define TRACEDIR "/sys/kernel/tracing"
@@ -119,7 +125,7 @@ void tracing_on(void)
#endif
}
-void tracing_off(void)
+static void tracing_off(void)
{
#if CONTROL_TRACING > 0
if (!tracing_root_ok())
@@ -153,7 +159,7 @@ __attribute__((__aligned__(65536)))
#else
__attribute__((__aligned__(PAGE_SIZE)))
#endif
-void lots_o_noops_around_write(int *write_to_me)
+static void lots_o_noops_around_write(int *write_to_me)
{
dprintf3("running %s()\n", __func__);
__page_o_noops();
@@ -164,7 +170,7 @@ void lots_o_noops_around_write(int *write_to_me)
dprintf3("%s() done\n", __func__);
}
-void dump_mem(void *dumpme, int len_bytes)
+static void dump_mem(void *dumpme, int len_bytes)
{
char *c = (void *)dumpme;
int i;
@@ -207,7 +213,7 @@ static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
return 0;
}
-void pkey_disable_set(int pkey, int flags)
+static void pkey_disable_set(int pkey, int flags)
{
unsigned long syscall_flags = 0;
int ret;
@@ -245,7 +251,7 @@ void pkey_disable_set(int pkey, int flags)
pkey, flags);
}
-void pkey_disable_clear(int pkey, int flags)
+static void pkey_disable_clear(int pkey, int flags)
{
unsigned long syscall_flags = 0;
int ret;
@@ -271,19 +277,19 @@ void pkey_disable_clear(int pkey, int flags)
pkey, read_pkey_reg());
}
-void pkey_write_allow(int pkey)
+__maybe_unused static void pkey_write_allow(int pkey)
{
pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
}
-void pkey_write_deny(int pkey)
+__maybe_unused static void pkey_write_deny(int pkey)
{
pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
}
-void pkey_access_allow(int pkey)
+__maybe_unused static void pkey_access_allow(int pkey)
{
pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
}
-void pkey_access_deny(int pkey)
+__maybe_unused static void pkey_access_deny(int pkey)
{
pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
}
@@ -301,9 +307,9 @@ static char *si_code_str(int si_code)
return "UNKNOWN";
}
-int pkey_faults;
-int last_si_pkey = -1;
-void signal_handler(int signum, siginfo_t *si, void *vucontext)
+static int pkey_faults;
+static int last_si_pkey = -1;
+static void signal_handler(int signum, siginfo_t *si, void *vucontext)
{
ucontext_t *uctxt = vucontext;
int trapno;
@@ -390,27 +396,21 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
/* restore access and let the faulting instruction continue */
pkey_access_allow(siginfo_pkey);
#elif defined(__aarch64__)
- aarch64_write_signal_pkey(uctxt, PKEY_ALLOW_ALL);
+ aarch64_write_signal_pkey(uctxt, PKEY_REG_ALLOW_ALL);
#endif /* arch */
pkey_faults++;
dprintf1("<<<<==================================================\n");
dprint_in_signal = 0;
}
-int wait_all_children(void)
-{
- int status;
- return waitpid(-1, &status, 0);
-}
-
-void sig_chld(int x)
+static void sig_chld(int x)
{
dprint_in_signal = 1;
dprintf2("[%d] SIGCHLD: %d\n", getpid(), x);
dprint_in_signal = 0;
}
-void setup_sigsegv_handler(void)
+static void setup_sigsegv_handler(void)
{
int r, rs;
struct sigaction newact;
@@ -436,13 +436,13 @@ void setup_sigsegv_handler(void)
pkey_assert(r == 0);
}
-void setup_handlers(void)
+static void setup_handlers(void)
{
signal(SIGCHLD, &sig_chld);
setup_sigsegv_handler();
}
-pid_t fork_lazy_child(void)
+static pid_t fork_lazy_child(void)
{
pid_t forkret;
@@ -460,38 +460,10 @@ pid_t fork_lazy_child(void)
return forkret;
}
-int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
- unsigned long pkey)
-{
- int sret;
-
- dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
- ptr, size, orig_prot, pkey);
-
- errno = 0;
- sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey);
- if (errno) {
- dprintf2("SYS_mprotect_key sret: %d\n", sret);
- dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
- dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
- if (DEBUG_LEVEL >= 2)
- perror("SYS_mprotect_pkey");
- }
- return sret;
-}
-
-int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
-{
- int ret = syscall(SYS_pkey_alloc, flags, init_val);
- dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
- __func__, flags, init_val, ret, errno);
- return ret;
-}
-
-int alloc_pkey(void)
+static int alloc_pkey(void)
{
int ret;
- unsigned long init_val = 0x0;
+ unsigned long init_val = PKEY_UNRESTRICTED;
dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n",
__func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg);
@@ -534,19 +506,12 @@ int alloc_pkey(void)
return ret;
}
-int sys_pkey_free(unsigned long pkey)
-{
- int ret = syscall(SYS_pkey_free, pkey);
- dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
- return ret;
-}
-
/*
* I had a bug where pkey bits could be set by mprotect() but
* not cleared. This ensures we get lots of random bit sets
* and clears on the vma and pte pkey bits.
*/
-int alloc_random_pkey(void)
+static int alloc_random_pkey(void)
{
int max_nr_pkey_allocs;
int ret;
@@ -629,7 +594,7 @@ struct pkey_malloc_record {
};
struct pkey_malloc_record *pkey_malloc_records;
struct pkey_malloc_record *pkey_last_malloc_record;
-long nr_pkey_malloc_records;
+static long nr_pkey_malloc_records;
void record_pkey_malloc(void *ptr, long size, int prot)
{
long i;
@@ -667,7 +632,7 @@ void record_pkey_malloc(void *ptr, long size, int prot)
nr_pkey_malloc_records++;
}
-void free_pkey_malloc(void *ptr)
+static void free_pkey_malloc(void *ptr)
{
long i;
int ret;
@@ -694,8 +659,7 @@ void free_pkey_malloc(void *ptr)
pkey_assert(false);
}
-
-void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
+static void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
{
void *ptr;
int ret;
@@ -715,7 +679,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
return ptr;
}
-void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
+static void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
{
int ret;
void *ptr;
@@ -745,10 +709,10 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
return ptr;
}
-int hugetlb_setup_ok;
+static int hugetlb_setup_ok;
#define SYSFS_FMT_NR_HUGE_PAGES "/sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages"
#define GET_NR_HUGE_PAGES 10
-void setup_hugetlbfs(void)
+static void setup_hugetlbfs(void)
{
int err;
int fd;
@@ -796,7 +760,7 @@ void setup_hugetlbfs(void)
hugetlb_setup_ok = 1;
}
-void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
+static void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
{
void *ptr;
int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB;
@@ -817,42 +781,15 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
return ptr;
}
-void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
-{
- void *ptr;
- int fd;
-
- dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
- size, prot, pkey);
- pkey_assert(pkey < NR_PKEYS);
- fd = open("/dax/foo", O_RDWR);
- pkey_assert(fd >= 0);
-
- ptr = mmap(0, size, prot, MAP_SHARED, fd, 0);
- pkey_assert(ptr != (void *)-1);
-
- mprotect_pkey(ptr, size, prot, pkey);
-
- record_pkey_malloc(ptr, size, prot);
-
- dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
- close(fd);
- return ptr;
-}
-
-void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
+static void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
malloc_pkey_with_mprotect,
malloc_pkey_with_mprotect_subpage,
malloc_pkey_anon_huge,
malloc_pkey_hugetlb
-/* can not do direct with the pkey_mprotect() API:
- malloc_pkey_mmap_direct,
- malloc_pkey_mmap_dax,
-*/
};
-void *malloc_pkey(long size, int prot, u16 pkey)
+static void *malloc_pkey(long size, int prot, u16 pkey)
{
void *ret;
static int malloc_type;
@@ -882,7 +819,7 @@ void *malloc_pkey(long size, int prot, u16 pkey)
return ret;
}
-int last_pkey_faults;
+static int last_pkey_faults;
#define UNKNOWN_PKEY -2
void expected_pkey_fault(int pkey)
{
@@ -905,7 +842,7 @@ void expected_pkey_fault(int pkey)
*/
if (__read_pkey_reg() != 0)
#elif defined(__aarch64__)
- if (__read_pkey_reg() != PKEY_ALLOW_ALL)
+ if (__read_pkey_reg() != PKEY_REG_ALLOW_ALL)
#else
if (__read_pkey_reg() != shadow_pkey_reg)
#endif /* arch */
@@ -924,9 +861,9 @@ void expected_pkey_fault(int pkey)
pkey_assert(last_pkey_faults == pkey_faults); \
} while (0)
-int test_fds[10] = { -1 };
-int nr_test_fds;
-void __save_test_fd(int fd)
+static int test_fds[10] = { -1 };
+static int nr_test_fds;
+static void __save_test_fd(int fd)
{
pkey_assert(fd >= 0);
pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds));
@@ -934,14 +871,14 @@ void __save_test_fd(int fd)
nr_test_fds++;
}
-int get_test_read_fd(void)
+static int get_test_read_fd(void)
{
int test_fd = open("/etc/passwd", O_RDONLY);
__save_test_fd(test_fd);
return test_fd;
}
-void close_test_fds(void)
+static void close_test_fds(void)
{
int i;
@@ -954,7 +891,7 @@ void close_test_fds(void)
nr_test_fds = 0;
}
-void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey)
+static void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey)
{
int i, err;
int max_nr_pkey_allocs;
@@ -1006,7 +943,7 @@ void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey)
pkey_assert(!err);
}
-void test_read_of_write_disabled_region(int *ptr, u16 pkey)
+static void test_read_of_write_disabled_region(int *ptr, u16 pkey)
{
int ptr_contents;
@@ -1016,7 +953,7 @@ void test_read_of_write_disabled_region(int *ptr, u16 pkey)
dprintf1("*ptr: %d\n", ptr_contents);
dprintf1("\n");
}
-void test_read_of_access_disabled_region(int *ptr, u16 pkey)
+static void test_read_of_access_disabled_region(int *ptr, u16 pkey)
{
int ptr_contents;
@@ -1028,7 +965,7 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey)
expected_pkey_fault(pkey);
}
-void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr,
+static void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr,
u16 pkey)
{
int ptr_contents;
@@ -1045,7 +982,7 @@ void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr,
expected_pkey_fault(pkey);
}
-void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr,
+static void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr,
u16 pkey)
{
*ptr = __LINE__;
@@ -1056,14 +993,14 @@ void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr,
expected_pkey_fault(pkey);
}
-void test_write_of_write_disabled_region(int *ptr, u16 pkey)
+static void test_write_of_write_disabled_region(int *ptr, u16 pkey)
{
dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
pkey_write_deny(pkey);
*ptr = __LINE__;
expected_pkey_fault(pkey);
}
-void test_write_of_access_disabled_region(int *ptr, u16 pkey)
+static void test_write_of_access_disabled_region(int *ptr, u16 pkey)
{
dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
pkey_access_deny(pkey);
@@ -1071,7 +1008,7 @@ void test_write_of_access_disabled_region(int *ptr, u16 pkey)
expected_pkey_fault(pkey);
}
-void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr,
+static void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr,
u16 pkey)
{
*ptr = __LINE__;
@@ -1082,7 +1019,7 @@ void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr,
expected_pkey_fault(pkey);
}
-void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
+static void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
{
int ret;
int test_fd = get_test_read_fd();
@@ -1094,7 +1031,8 @@ void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
dprintf1("read ret: %d\n", ret);
pkey_assert(ret);
}
-void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
+
+static void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
{
int ret;
int test_fd = get_test_read_fd();
@@ -1107,7 +1045,7 @@ void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
pkey_assert(ret);
}
-void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
+static void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
{
int pipe_ret, vmsplice_ret;
struct iovec iov;
@@ -1129,7 +1067,7 @@ void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
close(pipe_fds[1]);
}
-void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
+static void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
{
int ignored = 0xdada;
int futex_ret;
@@ -1147,7 +1085,7 @@ void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
}
/* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
+static void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
{
int err;
int i;
@@ -1170,7 +1108,7 @@ void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
}
/* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
+static void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
{
int err;
int bad_pkey = NR_PKEYS+99;
@@ -1180,7 +1118,7 @@ void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
pkey_assert(err);
}
-void become_child(void)
+static void become_child(void)
{
pid_t forkret;
@@ -1196,7 +1134,7 @@ void become_child(void)
}
/* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
+static void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
{
int err;
int allocated_pkeys[NR_PKEYS] = {0};
@@ -1263,7 +1201,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
}
}
-void arch_force_pkey_reg_init(void)
+static void arch_force_pkey_reg_init(void)
{
#if defined(__i386__) || defined(__x86_64__) /* arch */
u64 *buf;
@@ -1302,7 +1240,7 @@ void arch_force_pkey_reg_init(void)
* a long-running test that continually checks the pkey
* register.
*/
-void test_pkey_init_state(int *ptr, u16 pkey)
+static void test_pkey_init_state(int *ptr, u16 pkey)
{
int err;
int allocated_pkeys[NR_PKEYS] = {0};
@@ -1340,7 +1278,7 @@ void test_pkey_init_state(int *ptr, u16 pkey)
* have to call pkey_alloc() to use it first. Make sure that it
* is usable.
*/
-void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
+static void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
{
long size;
int prot;
@@ -1364,7 +1302,7 @@ void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
mprotect_pkey(ptr, size, prot, pkey);
}
-void test_ptrace_of_child(int *ptr, u16 pkey)
+static void test_ptrace_of_child(int *ptr, u16 pkey)
{
__attribute__((__unused__)) int peek_result;
pid_t child_pid;
@@ -1440,7 +1378,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
free(plain_ptr_unaligned);
}
-void *get_pointer_to_instructions(void)
+static void *get_pointer_to_instructions(void)
{
void *p1;
@@ -1461,7 +1399,7 @@ void *get_pointer_to_instructions(void)
return p1;
}
-void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
+static void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
{
void *p1;
int scratch;
@@ -1493,7 +1431,7 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
pkey_assert(!ret);
}
-void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
+static void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
{
void *p1;
int scratch;
@@ -1542,7 +1480,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
}
#if defined(__i386__) || defined(__x86_64__)
-void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
+static void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
{
u32 new_pkru;
pid_t child;
@@ -1665,7 +1603,7 @@ void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
#endif
#if defined(__aarch64__)
-void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
+static void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
{
pid_t child;
int status, ret;
@@ -1742,7 +1680,7 @@ void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
}
#endif
-void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
+static void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
{
int size = PAGE_SIZE;
int sret;
@@ -1756,7 +1694,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
pkey_assert(sret < 0);
}
-void (*pkey_tests[])(int *ptr, u16 pkey) = {
+static void (*pkey_tests[])(int *ptr, u16 pkey) = {
test_read_of_write_disabled_region,
test_read_of_access_disabled_region,
test_read_of_access_disabled_region_with_page_already_mapped,
@@ -1782,7 +1720,7 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
#endif
};
-void run_tests_once(void)
+static void run_tests_once(void)
{
int *ptr;
int prot = PROT_READ|PROT_WRITE;
@@ -1816,7 +1754,7 @@ void run_tests_once(void)
iteration_nr++;
}
-void pkey_setup_shadow(void)
+static void pkey_setup_shadow(void)
{
shadow_pkey_reg = __read_pkey_reg();
}
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index 2fc290d9430c..7cc71d942f83 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -45,6 +45,8 @@ separated by spaces:
vmalloc smoke tests
- hmm
hmm smoke tests
+- madv_guard
+ test madvise(2) MADV_GUARD_INSTALL and MADV_GUARD_REMOVE options
- madv_populate
test memadvise(2) MADV_POPULATE_{READ,WRITE} options
- memfd_secret
@@ -218,7 +220,7 @@ run_test() {
if test_selected ${CATEGORY}; then
# On memory constrainted systems some tests can fail to allocate hugepages.
# perform some cleanup before the test for a higher success rate.
- if [ ${CATEGORY} == "thp" ] | [ ${CATEGORY} == "hugetlb" ]; then
+ if [ ${CATEGORY} == "thp" -o ${CATEGORY} == "hugetlb" ]; then
echo 3 > /proc/sys/vm/drop_caches
sleep 2
echo 1 > /proc/sys/vm/compact_memory
@@ -302,11 +304,14 @@ uffd_stress_bin=./uffd-stress
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16
# Hugetlb tests require source and destination huge pages. Pass in half
# the size of the free pages we have, which is used for *each*.
-half_ufd_size_MB=$((freepgs / 2))
+# uffd-stress expects a region expressed in MiB, so we adjust
+# half_ufd_size_MB accordingly.
+half_ufd_size_MB=$(((freepgs * hpgsize_KB) / 1024 / 2))
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem-private 20 16
+CATEGORY="userfaultfd" run_test ./uffd-wp-mremap
#cleanup
echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
@@ -375,6 +380,9 @@ CATEGORY="mremap" run_test ./mremap_dontunmap
CATEGORY="hmm" run_test bash ./test_hmm.sh smoke
+# MADV_GUARD_INSTALL and MADV_GUARD_REMOVE tests
+CATEGORY="madv_guard" run_test ./guard-pages
+
# MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
CATEGORY="madv_populate" run_test ./madv_populate
diff --git a/tools/testing/selftests/mm/seal_elf.c b/tools/testing/selftests/mm/seal_elf.c
deleted file mode 100644
index d9f8ba8d5050..000000000000
--- a/tools/testing/selftests/mm/seal_elf.c
+++ /dev/null
@@ -1,137 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE
-#include <sys/mman.h>
-#include <stdint.h>
-#include <asm-generic/unistd.h>
-#include <string.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-#include <stdbool.h>
-#include "../kselftest.h"
-#include <syscall.h>
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <sys/vfs.h>
-#include <sys/stat.h>
-#include "mseal_helpers.h"
-
-/*
- * define sys_xyx to call syscall directly.
- */
-static int sys_mseal(void *start, size_t len)
-{
- int sret;
-
- errno = 0;
- sret = syscall(__NR_mseal, start, len, 0);
- return sret;
-}
-
-static inline int sys_mprotect(void *ptr, size_t size, unsigned long prot)
-{
- int sret;
-
- errno = 0;
- sret = syscall(__NR_mprotect, ptr, size, prot);
- return sret;
-}
-
-static bool seal_support(void)
-{
- int ret;
- void *ptr;
- unsigned long page_size = getpagesize();
-
- ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- if (ptr == (void *) -1)
- return false;
-
- ret = sys_mseal(ptr, page_size);
- if (ret < 0)
- return false;
-
- return true;
-}
-
-const char somestr[4096] = {"READONLY"};
-
-static void test_seal_elf(void)
-{
- int ret;
- FILE *maps;
- char line[512];
- uintptr_t addr_start, addr_end;
- char prot[5];
- char filename[256];
- unsigned long page_size = getpagesize();
- unsigned long long ptr = (unsigned long long) somestr;
- char *somestr2 = (char *)somestr;
-
- /*
- * Modify the protection of readonly somestr
- */
- if (((unsigned long long)ptr % page_size) != 0)
- ptr = (unsigned long long)ptr & ~(page_size - 1);
-
- ksft_print_msg("somestr = %s\n", somestr);
- ksft_print_msg("change protection to rw\n");
- ret = sys_mprotect((void *)ptr, page_size, PROT_READ|PROT_WRITE);
- FAIL_TEST_IF_FALSE(!ret);
- *somestr2 = 'A';
- ksft_print_msg("somestr is modified to: %s\n", somestr);
- ret = sys_mprotect((void *)ptr, page_size, PROT_READ);
- FAIL_TEST_IF_FALSE(!ret);
-
- maps = fopen("/proc/self/maps", "r");
- FAIL_TEST_IF_FALSE(maps);
-
- /*
- * apply sealing to elf binary
- */
- while (fgets(line, sizeof(line), maps)) {
- if (sscanf(line, "%lx-%lx %4s %*x %*x:%*x %*u %255[^\n]",
- &addr_start, &addr_end, prot, filename) == 4) {
- if (strlen(filename)) {
- /*
- * seal the mapping if read only.
- */
- if (strstr(prot, "r-")) {
- ret = sys_mseal((void *)addr_start, addr_end - addr_start);
- FAIL_TEST_IF_FALSE(!ret);
- ksft_print_msg("sealed: %lx-%lx %s %s\n",
- addr_start, addr_end, prot, filename);
- if ((uintptr_t) somestr >= addr_start &&
- (uintptr_t) somestr <= addr_end)
- ksft_print_msg("mapping for somestr found\n");
- }
- }
- }
- }
- fclose(maps);
-
- ret = sys_mprotect((void *)ptr, page_size, PROT_READ | PROT_WRITE);
- FAIL_TEST_IF_FALSE(ret < 0);
- ksft_print_msg("somestr is sealed, mprotect is rejected\n");
-
- REPORT_TEST_PASS();
-}
-
-int main(int argc, char **argv)
-{
- bool test_seal = seal_support();
-
- ksft_print_header();
- ksft_print_msg("pid=%d\n", getpid());
-
- if (!test_seal)
- ksft_exit_skip("sealing not supported, check CONFIG_64BIT\n");
-
- ksft_set_plan(1);
-
- test_seal_elf();
-
- ksft_finished();
-}
diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c
index bdfa5d085f00..8e1462ce0532 100644
--- a/tools/testing/selftests/mm/soft-dirty.c
+++ b/tools/testing/selftests/mm/soft-dirty.c
@@ -128,7 +128,7 @@ static void test_mprotect(int pagemap_fd, int pagesize, bool anon)
{
const char *type[] = {"file", "anon"};
const char *fname = "./soft-dirty-test-file";
- int test_fd;
+ int test_fd = 0;
char *map;
if (anon) {
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index eb6d1b9fc362..3f353f3d070f 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -108,38 +108,28 @@ static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hp
unsigned long rss_anon_before, rss_anon_after;
size_t i;
- if (!check_huge_anon(one_page, 4, pmd_pagesize)) {
- printf("No THP is allocated\n");
- exit(EXIT_FAILURE);
- }
+ if (!check_huge_anon(one_page, 4, pmd_pagesize))
+ ksft_exit_fail_msg("No THP is allocated\n");
rss_anon_before = rss_anon();
- if (!rss_anon_before) {
- printf("No RssAnon is allocated before split\n");
- exit(EXIT_FAILURE);
- }
+ if (!rss_anon_before)
+ ksft_exit_fail_msg("No RssAnon is allocated before split\n");
/* split all THPs */
write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
(uint64_t)one_page + len, 0);
for (i = 0; i < len; i++)
- if (one_page[i] != (char)0) {
- printf("%ld byte corrupted\n", i);
- exit(EXIT_FAILURE);
- }
+ if (one_page[i] != (char)0)
+ ksft_exit_fail_msg("%ld byte corrupted\n", i);
- if (!check_huge_anon(one_page, 0, pmd_pagesize)) {
- printf("Still AnonHugePages not split\n");
- exit(EXIT_FAILURE);
- }
+ if (!check_huge_anon(one_page, 0, pmd_pagesize))
+ ksft_exit_fail_msg("Still AnonHugePages not split\n");
rss_anon_after = rss_anon();
- if (rss_anon_after >= rss_anon_before) {
- printf("Incorrect RssAnon value. Before: %ld After: %ld\n",
+ if (rss_anon_after >= rss_anon_before)
+ ksft_exit_fail_msg("Incorrect RssAnon value. Before: %ld After: %ld\n",
rss_anon_before, rss_anon_after);
- exit(EXIT_FAILURE);
- }
}
void split_pmd_zero_pages(void)
@@ -150,11 +140,11 @@ void split_pmd_zero_pages(void)
one_page = allocate_zero_filled_hugepage(len);
verify_rss_anon_split_huge_page_all_zeroes(one_page, nr_hpages, len);
- printf("Split zero filled huge pages successful\n");
+ ksft_test_result_pass("Split zero filled huge pages successful\n");
free(one_page);
}
-void split_pmd_thp(void)
+void split_pmd_thp_to_order(int order)
{
char *one_page;
size_t len = 4 * pmd_pagesize;
@@ -174,7 +164,7 @@ void split_pmd_thp(void)
/* split all THPs */
write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
- (uint64_t)one_page + len, 0);
+ (uint64_t)one_page + len, order);
for (i = 0; i < len; i++)
if (one_page[i] != (char)i)
@@ -184,7 +174,7 @@ void split_pmd_thp(void)
if (!check_huge_anon(one_page, 0, pmd_pagesize))
ksft_exit_fail_msg("Still AnonHugePages not split\n");
- ksft_test_result_pass("Split huge pages successful\n");
+ ksft_test_result_pass("Split huge pages to order %d successful\n", order);
free(one_page);
}
@@ -491,7 +481,7 @@ int main(int argc, char **argv)
if (argc > 1)
optional_xfs_path = argv[1];
- ksft_set_plan(3+9);
+ ksft_set_plan(1+8+2+9);
pagesize = getpagesize();
pageshift = ffs(pagesize) - 1;
@@ -502,7 +492,11 @@ int main(int argc, char **argv)
fd_size = 2 * pmd_pagesize;
split_pmd_zero_pages();
- split_pmd_thp();
+
+ for (i = 0; i < 9; i++)
+ if (i != 1)
+ split_pmd_thp_to_order(i);
+
split_pte_mapped_thp();
split_file_backed_thp();
diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c
index 577eaab6266f..ad872af1c81a 100644
--- a/tools/testing/selftests/mm/thp_settings.c
+++ b/tools/testing/selftests/mm/thp_settings.c
@@ -87,7 +87,7 @@ int write_file(const char *path, const char *buf, size_t buflen)
return (unsigned int) numwritten;
}
-const unsigned long read_num(const char *path)
+unsigned long read_num(const char *path)
{
char buf[21];
@@ -172,7 +172,7 @@ void thp_write_string(const char *name, const char *val)
}
}
-const unsigned long thp_read_num(const char *name)
+unsigned long thp_read_num(const char *name)
{
char path[PATH_MAX];
int ret;
diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h
index 876235a23460..fc131d23d593 100644
--- a/tools/testing/selftests/mm/thp_settings.h
+++ b/tools/testing/selftests/mm/thp_settings.h
@@ -64,12 +64,12 @@ struct thp_settings {
int read_file(const char *path, char *buf, size_t buflen);
int write_file(const char *path, const char *buf, size_t buflen);
-const unsigned long read_num(const char *path);
+unsigned long read_num(const char *path);
void write_num(const char *path, unsigned long num);
int thp_read_string(const char *name, const char * const strings[]);
void thp_write_string(const char *name, const char *val);
-const unsigned long thp_read_num(const char *name);
+unsigned long thp_read_num(const char *name);
void thp_write_num(const char *name, unsigned long num);
void thp_write_settings(struct thp_settings *settings);
diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c
index 717539eddf98..7ad6ba660c7d 100644
--- a/tools/testing/selftests/mm/uffd-common.c
+++ b/tools/testing/selftests/mm/uffd-common.c
@@ -673,7 +673,11 @@ int uffd_open_dev(unsigned int flags)
int uffd_open_sys(unsigned int flags)
{
+#ifdef __NR_userfaultfd
return syscall(__NR_userfaultfd, flags);
+#else
+ return -1;
+#endif
}
int uffd_open(unsigned int flags)
diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c
index a4b83280998a..944d559ade21 100644
--- a/tools/testing/selftests/mm/uffd-stress.c
+++ b/tools/testing/selftests/mm/uffd-stress.c
@@ -33,10 +33,11 @@
* pthread_mutex_lock will also verify the atomicity of the memory
* transfer (UFFDIO_COPY).
*/
-#include <asm-generic/unistd.h>
+
#include "uffd-common.h"
uint64_t features;
+#ifdef __NR_userfaultfd
#define BOUNCE_RANDOM (1<<0)
#define BOUNCE_RACINGFAULTS (1<<1)
@@ -471,3 +472,15 @@ int main(int argc, char **argv)
nr_pages, nr_pages_per_cpu);
return userfaultfd_stress();
}
+
+#else /* __NR_userfaultfd */
+
+#warning "missing __NR_userfaultfd definition"
+
+int main(void)
+{
+ printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
+ return KSFT_SKIP;
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index a2e71b1636e7..74c8bc02b506 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -5,11 +5,12 @@
* Copyright (C) 2015-2023 Red Hat, Inc.
*/
-#include <asm-generic/unistd.h>
#include "uffd-common.h"
#include "../../../../mm/gup_test.h"
+#ifdef __NR_userfaultfd
+
/* The unit test doesn't need a large or random size, make it 32MB for now */
#define UFFD_TEST_MEM_SIZE (32UL << 20)
@@ -1122,7 +1123,7 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size,
char c;
unsigned long long count;
struct uffd_args args = { 0 };
- char *orig_area_src, *orig_area_dst;
+ char *orig_area_src = NULL, *orig_area_dst = NULL;
unsigned long step_size, step_count;
unsigned long src_offs = 0;
unsigned long dst_offs = 0;
@@ -1190,7 +1191,7 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size,
nr, count, count_verify[src_offs + nr + i]);
}
}
- if (step_size > page_size) {
+ if (chunk_size > page_size) {
area_src = orig_area_src;
area_dst = orig_area_dst;
}
@@ -1558,3 +1559,14 @@ int main(int argc, char *argv[])
return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS;
}
+#else /* __NR_userfaultfd */
+
+#warning "missing __NR_userfaultfd definition"
+
+int main(void)
+{
+ printf("Skipping %s (missing __NR_userfaultfd)\n", __file__);
+ return KSFT_SKIP;
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/mm/uffd-wp-mremap.c b/tools/testing/selftests/mm/uffd-wp-mremap.c
new file mode 100644
index 000000000000..2c4f984bd73c
--- /dev/null
+++ b/tools/testing/selftests/mm/uffd-wp-mremap.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include "../kselftest.h"
+#include "thp_settings.h"
+#include "uffd-common.h"
+
+static int pagemap_fd;
+static size_t pagesize;
+static int nr_pagesizes = 1;
+static int nr_thpsizes;
+static size_t thpsizes[20];
+static int nr_hugetlbsizes;
+static size_t hugetlbsizes[10];
+
+static int sz2ord(size_t size)
+{
+ return __builtin_ctzll(size / pagesize);
+}
+
+static int detect_thp_sizes(size_t sizes[], int max)
+{
+ int count = 0;
+ unsigned long orders;
+ size_t kb;
+ int i;
+
+ /* thp not supported at all. */
+ if (!read_pmd_pagesize())
+ return 0;
+
+ orders = thp_supported_orders();
+
+ for (i = 0; orders && count < max; i++) {
+ if (!(orders & (1UL << i)))
+ continue;
+ orders &= ~(1UL << i);
+ kb = (pagesize >> 10) << i;
+ sizes[count++] = kb * 1024;
+ ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb);
+ }
+
+ return count;
+}
+
+static void *mmap_aligned(size_t size, int prot, int flags)
+{
+ size_t mmap_size = size * 2;
+ char *mmap_mem, *mem;
+
+ mmap_mem = mmap(NULL, mmap_size, prot, flags, -1, 0);
+ if (mmap_mem == MAP_FAILED)
+ return mmap_mem;
+
+ mem = (char *)(((uintptr_t)mmap_mem + size - 1) & ~(size - 1));
+ munmap(mmap_mem, mem - mmap_mem);
+ munmap(mem + size, mmap_mem + mmap_size - mem - size);
+
+ return mem;
+}
+
+static void *alloc_one_folio(size_t size, bool private, bool hugetlb)
+{
+ bool thp = !hugetlb && size > pagesize;
+ int flags = MAP_ANONYMOUS;
+ int prot = PROT_READ | PROT_WRITE;
+ char *mem, *addr;
+
+ assert((size & (size - 1)) == 0);
+
+ if (private)
+ flags |= MAP_PRIVATE;
+ else
+ flags |= MAP_SHARED;
+
+ /*
+ * For THP, we must explicitly enable the THP size, allocate twice the
+ * required space then manually align.
+ */
+ if (thp) {
+ struct thp_settings settings = *thp_current_settings();
+
+ if (private)
+ settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS;
+ else
+ settings.shmem_hugepages[sz2ord(size)].enabled = SHMEM_ALWAYS;
+
+ thp_push_settings(&settings);
+
+ mem = mmap_aligned(size, prot, flags);
+ } else {
+ if (hugetlb) {
+ flags |= MAP_HUGETLB;
+ flags |= __builtin_ctzll(size) << MAP_HUGE_SHIFT;
+ }
+
+ mem = mmap(NULL, size, prot, flags, -1, 0);
+ }
+
+ if (mem == MAP_FAILED) {
+ mem = NULL;
+ goto out;
+ }
+
+ assert(((uintptr_t)mem & (size - 1)) == 0);
+
+ /*
+ * Populate the folio by writing the first byte and check that all pages
+ * are populated. Finally set the whole thing to non-zero data to avoid
+ * kernel from mapping it back to the zero page.
+ */
+ mem[0] = 1;
+ for (addr = mem; addr < mem + size; addr += pagesize) {
+ if (!pagemap_is_populated(pagemap_fd, addr)) {
+ munmap(mem, size);
+ mem = NULL;
+ goto out;
+ }
+ }
+ memset(mem, 1, size);
+out:
+ if (thp)
+ thp_pop_settings();
+
+ return mem;
+}
+
+static bool check_uffd_wp_state(void *mem, size_t size, bool expect)
+{
+ uint64_t pte;
+ void *addr;
+
+ for (addr = mem; addr < mem + size; addr += pagesize) {
+ pte = pagemap_get_entry(pagemap_fd, addr);
+ if (!!(pte & PM_UFFD_WP) != expect) {
+ ksft_test_result_fail("uffd-wp not %s for pte %lu!\n",
+ expect ? "set" : "clear",
+ (addr - mem) / pagesize);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool range_is_swapped(void *addr, size_t size)
+{
+ for (; size; addr += pagesize, size -= pagesize)
+ if (!pagemap_is_swapped(pagemap_fd, addr))
+ return false;
+ return true;
+}
+
+static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb)
+{
+ struct uffdio_writeprotect wp_prms;
+ uint64_t features = 0;
+ void *addr = NULL;
+ void *mem = NULL;
+
+ assert(!(hugetlb && swapout));
+
+ ksft_print_msg("[RUN] %s(size=%zu, private=%s, swapout=%s, hugetlb=%s)\n",
+ __func__,
+ size,
+ private ? "true" : "false",
+ swapout ? "true" : "false",
+ hugetlb ? "true" : "false");
+
+ /* Allocate a folio of required size and type. */
+ mem = alloc_one_folio(size, private, hugetlb);
+ if (!mem) {
+ ksft_test_result_fail("alloc_one_folio() failed\n");
+ goto out;
+ }
+
+ /* Register range for uffd-wp. */
+ if (userfaultfd_open(&features)) {
+ ksft_test_result_fail("userfaultfd_open() failed\n");
+ goto out;
+ }
+ if (uffd_register(uffd, mem, size, false, true, false)) {
+ ksft_test_result_fail("uffd_register() failed\n");
+ goto out;
+ }
+ wp_prms.mode = UFFDIO_WRITEPROTECT_MODE_WP;
+ wp_prms.range.start = (uintptr_t)mem;
+ wp_prms.range.len = size;
+ if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp_prms)) {
+ ksft_test_result_fail("ioctl(UFFDIO_WRITEPROTECT) failed\n");
+ goto out;
+ }
+
+ if (swapout) {
+ madvise(mem, size, MADV_PAGEOUT);
+ if (!range_is_swapped(mem, size)) {
+ ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
+ goto out;
+ }
+ }
+
+ /* Check that uffd-wp is set for all PTEs in range. */
+ if (!check_uffd_wp_state(mem, size, true))
+ goto out;
+
+ /*
+ * Move the mapping to a new, aligned location. Since
+ * UFFD_FEATURE_EVENT_REMAP is not set, we expect the uffd-wp bit for
+ * each PTE to be cleared in the new mapping.
+ */
+ addr = mmap_aligned(size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS);
+ if (addr == MAP_FAILED) {
+ ksft_test_result_fail("mmap_aligned() failed\n");
+ goto out;
+ }
+ if (mremap(mem, size, size, MREMAP_FIXED | MREMAP_MAYMOVE, addr) == MAP_FAILED) {
+ ksft_test_result_fail("mremap() failed\n");
+ munmap(addr, size);
+ goto out;
+ }
+ mem = addr;
+
+ /* Check that uffd-wp is cleared for all PTEs in range. */
+ if (!check_uffd_wp_state(mem, size, false))
+ goto out;
+
+ ksft_test_result_pass("%s(size=%zu, private=%s, swapout=%s, hugetlb=%s)\n",
+ __func__,
+ size,
+ private ? "true" : "false",
+ swapout ? "true" : "false",
+ hugetlb ? "true" : "false");
+out:
+ if (mem)
+ munmap(mem, size);
+ if (uffd >= 0) {
+ close(uffd);
+ uffd = -1;
+ }
+}
+
+struct testcase {
+ size_t *sizes;
+ int *nr_sizes;
+ bool private;
+ bool swapout;
+ bool hugetlb;
+};
+
+static const struct testcase testcases[] = {
+ /* base pages. */
+ {
+ .sizes = &pagesize,
+ .nr_sizes = &nr_pagesizes,
+ .private = false,
+ .swapout = false,
+ .hugetlb = false,
+ },
+ {
+ .sizes = &pagesize,
+ .nr_sizes = &nr_pagesizes,
+ .private = true,
+ .swapout = false,
+ .hugetlb = false,
+ },
+ {
+ .sizes = &pagesize,
+ .nr_sizes = &nr_pagesizes,
+ .private = false,
+ .swapout = true,
+ .hugetlb = false,
+ },
+ {
+ .sizes = &pagesize,
+ .nr_sizes = &nr_pagesizes,
+ .private = true,
+ .swapout = true,
+ .hugetlb = false,
+ },
+
+ /* thp. */
+ {
+ .sizes = thpsizes,
+ .nr_sizes = &nr_thpsizes,
+ .private = false,
+ .swapout = false,
+ .hugetlb = false,
+ },
+ {
+ .sizes = thpsizes,
+ .nr_sizes = &nr_thpsizes,
+ .private = true,
+ .swapout = false,
+ .hugetlb = false,
+ },
+ {
+ .sizes = thpsizes,
+ .nr_sizes = &nr_thpsizes,
+ .private = false,
+ .swapout = true,
+ .hugetlb = false,
+ },
+ {
+ .sizes = thpsizes,
+ .nr_sizes = &nr_thpsizes,
+ .private = true,
+ .swapout = true,
+ .hugetlb = false,
+ },
+
+ /* hugetlb. */
+ {
+ .sizes = hugetlbsizes,
+ .nr_sizes = &nr_hugetlbsizes,
+ .private = false,
+ .swapout = false,
+ .hugetlb = true,
+ },
+ {
+ .sizes = hugetlbsizes,
+ .nr_sizes = &nr_hugetlbsizes,
+ .private = true,
+ .swapout = false,
+ .hugetlb = true,
+ },
+};
+
+int main(int argc, char **argv)
+{
+ struct thp_settings settings;
+ int i, j, plan = 0;
+
+ pagesize = getpagesize();
+ nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes));
+ nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
+ ARRAY_SIZE(hugetlbsizes));
+
+ /* If THP is supported, save THP settings and initially disable THP. */
+ if (nr_thpsizes) {
+ thp_save_settings();
+ thp_read_settings(&settings);
+ for (i = 0; i < NR_ORDERS; i++) {
+ settings.hugepages[i].enabled = THP_NEVER;
+ settings.shmem_hugepages[i].enabled = SHMEM_NEVER;
+ }
+ thp_push_settings(&settings);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(testcases); i++)
+ plan += *testcases[i].nr_sizes;
+ ksft_set_plan(plan);
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+
+ for (i = 0; i < ARRAY_SIZE(testcases); i++) {
+ const struct testcase *tc = &testcases[i];
+
+ for (j = 0; j < *tc->nr_sizes; j++)
+ test_one_folio(tc->sizes[j], tc->private, tc->swapout,
+ tc->hugetlb);
+ }
+
+ /* If THP is supported, restore original THP settings. */
+ if (nr_thpsizes)
+ thp_restore_settings();
+
+ i = ksft_get_fail_cnt();
+ if (i)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ i, ksft_test_num());
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c
index 2a2b69e91950..b380e102b22f 100644
--- a/tools/testing/selftests/mm/virtual_address_range.c
+++ b/tools/testing/selftests/mm/virtual_address_range.c
@@ -10,10 +10,12 @@
#include <string.h>
#include <unistd.h>
#include <errno.h>
+#include <sys/prctl.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <fcntl.h>
+#include "vm_util.h"
#include "../kselftest.h"
/*
@@ -82,6 +84,24 @@ static void validate_addr(char *ptr, int high_addr)
ksft_exit_fail_msg("Bad address %lx\n", addr);
}
+static void mark_range(char *ptr, size_t size)
+{
+ if (prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ptr, size, "virtual_address_range") == -1) {
+ if (errno == EINVAL) {
+ /* Depends on CONFIG_ANON_VMA_NAME */
+ ksft_test_result_skip("prctl(PR_SET_VMA_ANON_NAME) not supported\n");
+ ksft_finished();
+ } else {
+ ksft_exit_fail_perror("prctl(PR_SET_VMA_ANON_NAME) failed\n");
+ }
+ }
+}
+
+static int is_marked_vma(const char *vma_name)
+{
+ return vma_name && !strcmp(vma_name, "[anon:virtual_address_range]\n");
+}
+
static int validate_lower_address_hint(void)
{
char *ptr;
@@ -116,12 +136,17 @@ static int validate_complete_va_space(void)
prev_end_addr = 0;
while (fgets(line, sizeof(line), file)) {
+ const char *vma_name = NULL;
+ int vma_name_start = 0;
unsigned long hop;
- if (sscanf(line, "%lx-%lx %s[rwxp-]",
- &start_addr, &end_addr, prot) != 3)
+ if (sscanf(line, "%lx-%lx %4s %*s %*s %*s %n",
+ &start_addr, &end_addr, prot, &vma_name_start) != 3)
ksft_exit_fail_msg("cannot parse /proc/self/maps\n");
+ if (vma_name_start)
+ vma_name = line + vma_name_start;
+
/* end of userspace mappings; ignore vsyscall mapping */
if (start_addr & (1UL << 63))
return 0;
@@ -135,6 +160,9 @@ static int validate_complete_va_space(void)
if (prot[0] != 'r')
continue;
+ if (check_vmflag_io((void *)start_addr))
+ continue;
+
/*
* Confirm whether MAP_CHUNK_SIZE chunk can be found or not.
* If write succeeds, no need to check MAP_CHUNK_SIZE - 1
@@ -149,6 +177,9 @@ static int validate_complete_va_space(void)
return 1;
lseek(fd, 0, SEEK_SET);
+ if (is_marked_vma(vma_name))
+ munmap((char *)(start_addr + hop), MAP_CHUNK_SIZE);
+
hop += MAP_CHUNK_SIZE;
}
}
@@ -166,7 +197,7 @@ int main(int argc, char *argv[])
ksft_set_plan(1);
for (i = 0; i < NR_CHUNKS_LOW; i++) {
- ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
+ ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr[i] == MAP_FAILED) {
@@ -175,6 +206,7 @@ int main(int argc, char *argv[])
break;
}
+ mark_range(ptr[i], MAP_CHUNK_SIZE);
validate_addr(ptr[i], 0);
}
lchunks = i;
@@ -186,12 +218,13 @@ int main(int argc, char *argv[])
for (i = 0; i < NR_CHUNKS_HIGH; i++) {
hint = hint_addr();
- hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
+ hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (hptr[i] == MAP_FAILED)
break;
+ mark_range(ptr[i], MAP_CHUNK_SIZE);
validate_addr(hptr[i], 1);
}
hchunks = i;
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index d8d0cf04bb57..a36734fb62f3 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -2,6 +2,7 @@
#include <string.h>
#include <fcntl.h>
#include <dirent.h>
+#include <inttypes.h>
#include <sys/ioctl.h>
#include <linux/userfaultfd.h>
#include <linux/fs.h>
@@ -138,7 +139,7 @@ void clear_softdirty(void)
ksft_exit_fail_msg("opening clear_refs failed\n");
ret = write(fd, ctrl, strlen(ctrl));
close(fd);
- if (ret != strlen(ctrl))
+ if (ret != (signed int)strlen(ctrl))
ksft_exit_fail_msg("writing clear_refs failed\n");
}
@@ -193,13 +194,11 @@ err_out:
return rss_anon;
}
-bool __check_huge(void *addr, char *pattern, int nr_hpages,
- uint64_t hpage_size)
+char *__get_smap_entry(void *addr, const char *pattern, char *buf, size_t len)
{
- uint64_t thp = -1;
int ret;
FILE *fp;
- char buffer[MAX_LINE_LENGTH];
+ char *entry = NULL;
char addr_pattern[MAX_LINE_LENGTH];
ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
@@ -211,23 +210,40 @@ bool __check_huge(void *addr, char *pattern, int nr_hpages,
if (!fp)
ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH);
- if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer)))
+ if (!check_for_pattern(fp, addr_pattern, buf, len))
goto err_out;
- /*
- * Fetch the pattern in the same block and check the number of
- * hugepages.
- */
- if (!check_for_pattern(fp, pattern, buffer, sizeof(buffer)))
+ /* Fetch the pattern in the same block */
+ if (!check_for_pattern(fp, pattern, buf, len))
goto err_out;
- snprintf(addr_pattern, MAX_LINE_LENGTH, "%s%%9ld kB", pattern);
+ /* Trim trailing newline */
+ entry = strchr(buf, '\n');
+ if (entry)
+ *entry = '\0';
- if (sscanf(buffer, addr_pattern, &thp) != 1)
- ksft_exit_fail_msg("Reading smap error\n");
+ entry = buf + strlen(pattern);
err_out:
fclose(fp);
+ return entry;
+}
+
+bool __check_huge(void *addr, char *pattern, int nr_hpages,
+ uint64_t hpage_size)
+{
+ char buffer[MAX_LINE_LENGTH];
+ uint64_t thp = -1;
+ char *entry;
+
+ entry = __get_smap_entry(addr, pattern, buffer, sizeof(buffer));
+ if (!entry)
+ goto err_out;
+
+ if (sscanf(entry, "%9" SCNu64 " kB", &thp) != 1)
+ ksft_exit_fail_msg("Reading smap error\n");
+
+err_out:
return thp == (nr_hpages * (hpage_size >> 10));
}
@@ -384,3 +400,27 @@ unsigned long get_free_hugepages(void)
fclose(f);
return fhp;
}
+
+bool check_vmflag_io(void *addr)
+{
+ char buffer[MAX_LINE_LENGTH];
+ const char *flags;
+ size_t flaglen;
+
+ flags = __get_smap_entry(addr, "VmFlags:", buffer, sizeof(buffer));
+ if (!flags)
+ ksft_exit_fail_msg("%s: No VmFlags for %p\n", __func__, addr);
+
+ while (true) {
+ flags += strspn(flags, " ");
+
+ flaglen = strcspn(flags, " ");
+ if (!flaglen)
+ return false;
+
+ if (flaglen == strlen("io") && !memcmp(flags, "io", flaglen))
+ return true;
+
+ flags += flaglen;
+ }
+}
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index 2eaed8209925..b60ac68a9dc8 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -53,6 +53,7 @@ int uffd_unregister(int uffd, void *addr, uint64_t len);
int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
bool miss, bool wp, bool minor, uint64_t *ioctls);
unsigned long get_free_hugepages(void);
+bool check_vmflag_io(void *addr);
/*
* On ppc64 this will only work with radix 2M hugepage size
diff --git a/tools/testing/selftests/mm/write_to_hugetlbfs.c b/tools/testing/selftests/mm/write_to_hugetlbfs.c
index 1289d311efd7..34c91f7e6128 100644
--- a/tools/testing/selftests/mm/write_to_hugetlbfs.c
+++ b/tools/testing/selftests/mm/write_to_hugetlbfs.c
@@ -89,7 +89,7 @@ int main(int argc, char **argv)
size = atoi(optarg);
break;
case 'p':
- strncpy(path, optarg, sizeof(path));
+ strncpy(path, optarg, sizeof(path) - 1);
break;
case 'm':
if (atoi(optarg) >= MAX_METHOD) {
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index 70f65eb320a7..48a000cabc97 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -20,6 +20,7 @@
#include <stdarg.h>
#include <linux/mount.h>
+#include "../filesystems/overlayfs/wrappers.h"
#include "../kselftest_harness.h"
#ifndef CLONE_NEWNS
@@ -126,6 +127,26 @@
#endif
#endif
+#ifndef __NR_move_mount
+ #if defined __alpha__
+ #define __NR_move_mount 539
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_move_mount 4429
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_move_mount 6429
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_move_mount 5429
+ #endif
+ #elif defined __ia64__
+ #define __NR_move_mount (428 + 1024)
+ #else
+ #define __NR_move_mount 429
+ #endif
+#endif
+
#ifndef MOUNT_ATTR_IDMAP
#define MOUNT_ATTR_IDMAP 0x00100000
#endif
@@ -397,6 +418,10 @@ FIXTURE_SETUP(mount_setattr)
ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
+ ASSERT_EQ(mkdir("/tmp/target1", 0777), 0);
+
+ ASSERT_EQ(mkdir("/tmp/target2", 0777), 0);
+
ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
"size=100000,mode=700"), 0);
@@ -1506,4 +1531,631 @@ TEST_F(mount_setattr, mount_attr_nosymfollow)
ASSERT_EQ(close(fd), 0);
}
+TEST_F(mount_setattr, open_tree_detached)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+ /*
+ * /mnt testing tmpfs
+ * |-/mnt/A testing tmpfs
+ * | `-/mnt/A/AA testing tmpfs
+ * | `-/mnt/A/AA/B testing tmpfs
+ * | `-/mnt/A/AA/B/BB testing tmpfs
+ * `-/mnt/B testing ramfs
+ */
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_subdir, 0);
+ /*
+ * /AA testing tmpfs
+ * `-/AA/B testing tmpfs
+ * `-/AA/B/BB testing tmpfs
+ */
+ ASSERT_EQ(statx(fd_tree_subdir, "B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_subdir, "B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ ASSERT_EQ(move_mount(fd_tree_subdir, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ /*
+ * /tmp/target1 testing tmpfs
+ * `-/tmp/target1/B testing tmpfs
+ * `-/tmp/target1/B/BB testing tmpfs
+ */
+ ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target1/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target1/B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target2", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ /*
+ * /tmp/target2 testing tmpfs
+ * |-/tmp/target2/A testing tmpfs
+ * | `-/tmp/target2/A/AA testing tmpfs
+ * | `-/tmp/target2/A/AA/B testing tmpfs
+ * | `-/tmp/target2/A/AA/B/BB testing tmpfs
+ * `-/tmp/target2/B testing ramfs
+ */
+ ASSERT_EQ(statx(-EBADF, "/tmp/target2", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target2/A", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA/B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target2/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ EXPECT_EQ(close(fd_tree_base), 0);
+ EXPECT_EQ(close(fd_tree_subdir), 0);
+}
+
+TEST_F(mount_setattr, open_tree_detached_fail)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+ /*
+ * /mnt testing tmpfs
+ * |-/mnt/A testing tmpfs
+ * | `-/mnt/A/AA testing tmpfs
+ * | `-/mnt/A/AA/B testing tmpfs
+ * | `-/mnt/A/AA/B/BB testing tmpfs
+ * `-/mnt/B testing ramfs
+ */
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ /*
+ * The origin mount namespace of the anonymous mount namespace
+ * of @fd_tree_base doesn't match the caller's mount namespace
+ * anymore so creation of another detached mounts must fail.
+ */
+ fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_LT(fd_tree_subdir, 0);
+ ASSERT_EQ(errno, EINVAL);
+}
+
+TEST_F(mount_setattr, open_tree_detached_fail2)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+ /*
+ * /mnt testing tmpfs
+ * |-/mnt/A testing tmpfs
+ * | `-/mnt/A/AA testing tmpfs
+ * | `-/mnt/A/AA/B testing tmpfs
+ * | `-/mnt/A/AA/B/BB testing tmpfs
+ * `-/mnt/B testing ramfs
+ */
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ EXPECT_EQ(create_and_enter_userns(), 0);
+
+ /*
+ * The caller entered a new user namespace. They will have
+ * CAP_SYS_ADMIN in this user namespace. However, they're still
+ * located in a mount namespace that is owned by an ancestor
+ * user namespace in which they hold no privilege. Creating a
+ * detached mount must thus fail.
+ */
+ fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_LT(fd_tree_subdir, 0);
+ ASSERT_EQ(errno, EPERM);
+}
+
+TEST_F(mount_setattr, open_tree_detached_fail3)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+ /*
+ * /mnt testing tmpfs
+ * |-/mnt/A testing tmpfs
+ * | `-/mnt/A/AA testing tmpfs
+ * | `-/mnt/A/AA/B testing tmpfs
+ * | `-/mnt/A/AA/B/BB testing tmpfs
+ * `-/mnt/B testing ramfs
+ */
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ EXPECT_EQ(prepare_unpriv_mountns(), 0);
+
+ /*
+ * The caller entered a new mount namespace. They will have
+ * CAP_SYS_ADMIN in the owning user namespace of their mount
+ * namespace.
+ *
+ * However, the origin mount namespace of the anonymous mount
+ * namespace of @fd_tree_base doesn't match the caller's mount
+ * namespace anymore so creation of another detached mounts must
+ * fail.
+ */
+ fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_LT(fd_tree_subdir, 0);
+ ASSERT_EQ(errno, EINVAL);
+}
+
+TEST_F(mount_setattr, open_tree_subfolder)
+{
+ int fd_context, fd_tmpfs, fd_tree;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+
+ EXPECT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "subdir", 0755), 0);
+
+ fd_tree = sys_open_tree(fd_tmpfs, "subdir",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree, 0);
+
+ EXPECT_EQ(close(fd_tmpfs), 0);
+
+ ASSERT_EQ(mkdirat(-EBADF, "/mnt/open_tree_subfolder", 0755), 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tree, "", -EBADF, "/mnt/open_tree_subfolder", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ EXPECT_EQ(close(fd_tree), 0);
+
+ ASSERT_EQ(umount2("/mnt/open_tree_subfolder", 0), 0);
+
+ EXPECT_EQ(rmdir("/mnt/open_tree_subfolder"), 0);
+}
+
+TEST_F(mount_setattr, mount_detached_mount_on_detached_mount_then_close)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+ /*
+ * /mnt testing tmpfs
+ */
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ fd_tree_subdir = sys_open_tree(fd_tree_base, "",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_subdir, 0);
+ /*
+ * /mnt testing tmpfs
+ */
+ ASSERT_EQ(statx(fd_tree_subdir, "A", 0, 0, &stx), 0);
+ ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ /*
+ * /mnt testing tmpfs
+ * `-/mnt testing tmpfs
+ */
+ ASSERT_EQ(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+ ASSERT_EQ(statx(fd_tree_subdir, "", AT_EMPTY_PATH, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ ASSERT_NE(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+
+ EXPECT_EQ(close(fd_tree_base), 0);
+ EXPECT_EQ(close(fd_tree_subdir), 0);
+}
+
+TEST_F(mount_setattr, mount_detached_mount_on_detached_mount_and_attach)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+ __u64 mnt_id = 0;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+ /*
+ * /mnt testing tmpfs
+ */
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ fd_tree_subdir = sys_open_tree(fd_tree_base, "",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_subdir, 0);
+ /*
+ * /mnt testing tmpfs
+ */
+ ASSERT_EQ(statx(fd_tree_subdir, "A", 0, 0, &stx), 0);
+ ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ /*
+ * /mnt testing tmpfs
+ * `-/mnt testing tmpfs
+ */
+ ASSERT_EQ(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+ ASSERT_EQ(statx(fd_tree_subdir, "", AT_EMPTY_PATH, STATX_MNT_ID_UNIQUE, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_TRUE(stx.stx_mask & STATX_MNT_ID_UNIQUE);
+ mnt_id = stx.stx_mnt_id;
+
+ ASSERT_NE(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+
+ ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, STATX_MNT_ID_UNIQUE, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_TRUE(stx.stx_mask & STATX_MNT_ID_UNIQUE);
+ ASSERT_EQ(stx.stx_mnt_id, mnt_id);
+
+ EXPECT_EQ(close(fd_tree_base), 0);
+ EXPECT_EQ(close(fd_tree_subdir), 0);
+}
+
+TEST_F(mount_setattr, move_mount_detached_fail)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+
+ /* Attach the mount to the caller's mount namespace. */
+ ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ fd_tree_subdir = sys_open_tree(-EBADF, "/tmp/B",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_subdir, 0);
+ ASSERT_EQ(statx(fd_tree_subdir, "BB", 0, 0, &stx), 0);
+ ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ /* Not allowed to move an attached mount to a detached mount. */
+ ASSERT_NE(move_mount(fd_tree_base, "", fd_tree_subdir, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ EXPECT_EQ(close(fd_tree_base), 0);
+ EXPECT_EQ(close(fd_tree_subdir), 0);
+}
+
+TEST_F(mount_setattr, attach_detached_mount_then_umount_then_close)
+{
+ int fd_tree = -EBADF;
+ struct statx stx;
+
+ fd_tree = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree, 0);
+
+ ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx), 0);
+ /* We copied with AT_RECURSIVE so /mnt/A must be a mountpoint. */
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ /* Attach the mount to the caller's mount namespace. */
+ ASSERT_EQ(move_mount(fd_tree, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ ASSERT_EQ(umount2("/tmp/target1", MNT_DETACH), 0);
+
+ /*
+ * This tests whether dissolve_on_fput() handles a NULL mount
+ * namespace correctly, i.e., that it doesn't splat.
+ */
+ EXPECT_EQ(close(fd_tree), 0);
+}
+
+TEST_F(mount_setattr, mount_detached1_onto_detached2_then_close_detached1_then_mount_detached2_onto_attached)
+{
+ int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
+
+ /*
+ * |-/mnt/A testing tmpfs
+ * `-/mnt/A/AA testing tmpfs
+ * `-/mnt/A/AA/B testing tmpfs
+ * `-/mnt/A/AA/B/BB testing tmpfs
+ */
+ fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree1, 0);
+
+ /*
+ * `-/mnt/B testing ramfs
+ */
+ fd_tree2 = sys_open_tree(-EBADF, "/mnt/B",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree2, 0);
+
+ /*
+ * Move the source detached mount tree to the target detached
+ * mount tree. This will move all the mounts in the source mount
+ * tree from the source anonymous mount namespace to the target
+ * anonymous mount namespace.
+ *
+ * The source detached mount tree and the target detached mount
+ * tree now both refer to the same anonymous mount namespace.
+ *
+ * |-"" testing ramfs
+ * `-"" testing tmpfs
+ * `-""/AA testing tmpfs
+ * `-""/AA/B testing tmpfs
+ * `-""/AA/B/BB testing tmpfs
+ */
+ ASSERT_EQ(move_mount(fd_tree1, "", fd_tree2, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+
+ /*
+ * The source detached mount tree @fd_tree1 is now an attached
+ * mount, i.e., it has a parent. Specifically, it now has the
+ * root mount of the mount tree of @fd_tree2 as its parent.
+ *
+ * That means we are no longer allowed to attach it as we only
+ * allow attaching the root of an anonymous mount tree, not
+ * random bits and pieces. Verify that the kernel enforces this.
+ */
+ ASSERT_NE(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ /*
+ * Closing the source detached mount tree must not unmount and
+ * free the shared anonymous mount namespace. The kernel will
+ * quickly yell at us because the anonymous mount namespace
+ * won't be empty when it's freed.
+ */
+ EXPECT_EQ(close(fd_tree1), 0);
+
+ /*
+ * Attach the mount tree to a non-anonymous mount namespace.
+ * This can only succeed if closing fd_tree1 had proper
+ * semantics and didn't cause the anonymous mount namespace to
+ * be freed. If it did this will trigger a UAF which will be
+ * visible on any KASAN enabled kernel.
+ *
+ * |-/tmp/target1 testing ramfs
+ * `-/tmp/target1 testing tmpfs
+ * `-/tmp/target1/AA testing tmpfs
+ * `-/tmp/target1/AA/B testing tmpfs
+ * `-/tmp/target1/AA/B/BB testing tmpfs
+ */
+ ASSERT_EQ(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ EXPECT_EQ(close(fd_tree2), 0);
+}
+
+TEST_F(mount_setattr, two_detached_mounts_referring_to_same_anonymous_mount_namespace)
+{
+ int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
+
+ /*
+ * Copy the following mount tree:
+ *
+ * |-/mnt/A testing tmpfs
+ * `-/mnt/A/AA testing tmpfs
+ * `-/mnt/A/AA/B testing tmpfs
+ * `-/mnt/A/AA/B/BB testing tmpfs
+ */
+ fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree1, 0);
+
+ /*
+ * Create an O_PATH file descriptors with a separate struct file
+ * that refers to the same detached mount tree as @fd_tree1
+ */
+ fd_tree2 = sys_open_tree(fd_tree1, "",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_EMPTY_PATH | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(fd_tree2, 0);
+
+ /*
+ * Copy the following mount tree:
+ *
+ * |-/tmp/target1 testing tmpfs
+ * `-/tmp/target1/AA testing tmpfs
+ * `-/tmp/target1/AA/B testing tmpfs
+ * `-/tmp/target1/AA/B/BB testing tmpfs
+ */
+ ASSERT_EQ(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ /*
+ * This must fail as this would mean adding the same mount tree
+ * into the same mount tree.
+ */
+ ASSERT_NE(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+}
+
+TEST_F(mount_setattr, two_detached_subtrees_of_same_anonymous_mount_namespace)
+{
+ int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
+
+ /*
+ * Copy the following mount tree:
+ *
+ * |-/mnt/A testing tmpfs
+ * `-/mnt/A/AA testing tmpfs
+ * `-/mnt/A/AA/B testing tmpfs
+ * `-/mnt/A/AA/B/BB testing tmpfs
+ */
+ fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree1, 0);
+
+ /*
+ * Create an O_PATH file descriptors with a separate struct file that
+ * refers to a subtree of the same detached mount tree as @fd_tree1
+ */
+ fd_tree2 = sys_open_tree(fd_tree1, "AA",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_EMPTY_PATH | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(fd_tree2, 0);
+
+ /*
+ * This must fail as it is only possible to attach the root of a
+ * detached mount tree.
+ */
+ ASSERT_NE(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ ASSERT_EQ(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+}
+
+TEST_F(mount_setattr, detached_tree_propagation)
+{
+ int fd_tree = -EBADF;
+ struct statx stx1, stx2, stx3, stx4;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(mount(NULL, "/mnt", NULL, MS_REC | MS_SHARED, NULL), 0);
+
+ /*
+ * Copy the following mount tree:
+ *
+ * /mnt testing tmpfs
+ * |-/mnt/A testing tmpfs
+ * | `-/mnt/A/AA testing tmpfs
+ * | `-/mnt/A/AA/B testing tmpfs
+ * | `-/mnt/A/AA/B/BB testing tmpfs
+ * `-/mnt/B testing ramfs
+ */
+ fd_tree = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree, 0);
+
+ ASSERT_EQ(statx(-EBADF, "/mnt/A", 0, 0, &stx1), 0);
+ ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx2), 0);
+
+ /*
+ * Copying the mount namespace like done above doesn't alter the
+ * mounts in any way so the filesystem mounted on /mnt must be
+ * identical even though the mounts will differ. Use the device
+ * information to verify that. Note that tmpfs will have a 0
+ * major number so comparing the major number is misleading.
+ */
+ ASSERT_EQ(stx1.stx_dev_minor, stx2.stx_dev_minor);
+
+ /* Mount a tmpfs filesystem over /mnt/A. */
+ ASSERT_EQ(mount(NULL, "/mnt/A", "tmpfs", 0, NULL), 0);
+
+
+ ASSERT_EQ(statx(-EBADF, "/mnt/A", 0, 0, &stx3), 0);
+ ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx4), 0);
+
+ /*
+ * A new filesystem has been mounted on top of /mnt/A which
+ * means that the device information will be different for any
+ * statx() that was taken from /mnt/A before the mount compared
+ * to one after the mount.
+ *
+ * Since we already now that the device information between the
+ * stx1 and stx2 samples are identical we also now that stx2 and
+ * stx3 device information will necessarily differ.
+ */
+ ASSERT_NE(stx1.stx_dev_minor, stx3.stx_dev_minor);
+
+ /*
+ * If mount propagation worked correctly then the tmpfs mount
+ * that was created after the mount namespace was unshared will
+ * have propagated onto /mnt/A in the detached mount tree.
+ *
+ * Verify that the device information for stx3 and stx4 are
+ * identical. It is already established that stx3 is different
+ * from both stx1 and stx2 sampled before the tmpfs mount was
+ * done so if stx3 and stx4 are identical the proof is done.
+ */
+ ASSERT_EQ(stx3.stx_dev_minor, stx4.stx_dev_minor);
+
+ EXPECT_EQ(close(fd_tree), 0);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 28a715a8ef2b..679542f565a4 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -21,6 +21,7 @@ msg_oob
msg_zerocopy
netlink-dumps
nettest
+proc_net_pktgen
psock_fanout
psock_snd
psock_tpacket
@@ -42,6 +43,7 @@ socket
so_incoming_cpu
so_netns_cookie
so_txtime
+so_rcv_listener
stress_reuseport_listen
tap
tcp_fastopen_backup_key
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 73ee88d6b043..6d718b478ed8 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -7,7 +7,7 @@ CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES)
CFLAGS += -I../
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
- rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh
+ rtnetlink.sh xfrm_policy.sh
TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
@@ -33,9 +33,12 @@ TEST_PROGS += gro.sh
TEST_PROGS += gre_gso.sh
TEST_PROGS += cmsg_so_mark.sh
TEST_PROGS += cmsg_so_priority.sh
-TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
+TEST_PROGS += test_so_rcv.sh
+TEST_PROGS += cmsg_time.sh cmsg_ip.sh
TEST_PROGS += netns-name.sh
+TEST_PROGS += link_netns.py
TEST_PROGS += nl_netdev.py
+TEST_PROGS += rtnetlink.py
TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
@@ -75,6 +78,7 @@ TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_
TEST_GEN_FILES += toeplitz
TEST_GEN_FILES += cmsg_sender
TEST_GEN_FILES += stress_reuseport_listen
+TEST_GEN_FILES += so_rcv_listener
TEST_PROGS += test_vxlan_vnifiltering.sh
TEST_GEN_FILES += io_uring_zerocopy_tx
TEST_PROGS += io_uring_zerocopy_tx.sh
@@ -100,6 +104,8 @@ TEST_PROGS += vlan_bridge_binding.sh
TEST_PROGS += bpf_offload.py
TEST_PROGS += ipv6_route_update_soft_lockup.sh
TEST_PROGS += busy_poll_test.sh
+TEST_GEN_PROGS += proc_net_pktgen
+TEST_PROGS += lwt_dst_cache_ref_loop.sh
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller netlink-dumps
diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py
index d10f420e4ef6..b2c271b79240 100755
--- a/tools/testing/selftests/net/bpf_offload.py
+++ b/tools/testing/selftests/net/bpf_offload.py
@@ -207,20 +207,24 @@ def bpftool_prog_list_wait(expected=0, n_retry=20):
raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
def bpftool_map_list_wait(expected=0, n_retry=20, ns=""):
+ nmaps = None
for i in range(n_retry):
maps = bpftool_map_list(ns=ns)
- if len(maps) == expected:
+ nmaps = len(maps)
+ if nmaps == expected:
return maps
time.sleep(0.05)
raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
def bpftool_prog_load(sample, file_name, maps=[], prog_type="xdp", dev=None,
- fail=True, include_stderr=False):
+ fail=True, include_stderr=False, dev_bind=None):
args = "prog load %s %s" % (os.path.join(bpf_test_dir, sample), file_name)
if prog_type is not None:
args += " type " + prog_type
if dev is not None:
args += " dev " + dev
+ elif dev_bind is not None:
+ args += " xdpmeta_dev " + dev_bind
if len(maps):
args += " map " + " map ".join(maps)
@@ -708,6 +712,7 @@ _, base_maps = bpftool("map")
base_map_names = [
'pid_iter.rodata', # created on each bpftool invocation
'libbpf_det_bind', # created on each bpftool invocation
+ 'libbpf_global',
]
# Check netdevsim
@@ -980,6 +985,16 @@ try:
rm("/sys/fs/bpf/offload")
sim.wait_for_flush()
+ bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/devbound",
+ dev_bind=sim['ifname'])
+ devbound = bpf_pinned("/sys/fs/bpf/devbound")
+ start_test("Test dev-bound program in generic mode...")
+ ret, _, err = sim.set_xdp(devbound, "generic", fail=False, include_stderr=True)
+ fail(ret == 0, "devbound program in generic mode allowed")
+ check_extack(err, "Can't attach device-bound programs in generic mode.", args)
+ rm("/sys/fs/bpf/devbound")
+ sim.wait_for_flush()
+
start_test("Test XDP load failure...")
sim.dfs["dev/bpf_bind_verifier_accept"] = 0
ret, _, err = bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload",
diff --git a/tools/testing/selftests/net/cmsg_ip.sh b/tools/testing/selftests/net/cmsg_ip.sh
new file mode 100755
index 000000000000..b55680e081ad
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_ip.sh
@@ -0,0 +1,187 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+IP4=172.16.0.1/24
+TGT4=172.16.0.2
+IP6=2001:db8:1::1/64
+TGT6=2001:db8:1::2
+TMPF=$(mktemp --suffix ".pcap")
+
+cleanup()
+{
+ rm -f $TMPF
+ cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+tcpdump -h | grep immediate-mode >> /dev/null
+if [ $? -ne 0 ]; then
+ echo "SKIP - tcpdump with --immediate-mode option required"
+ exit $ksft_skip
+fi
+
+# Namespaces
+setup_ns NS
+NSEXE="ip netns exec $NS"
+
+$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
+
+# Connectivity
+ip -netns $NS link add type dummy
+ip -netns $NS link set dev dummy0 up
+ip -netns $NS addr add $IP4 dev dummy0
+ip -netns $NS addr add $IP6 dev dummy0
+
+# Test
+BAD=0
+TOTAL=0
+
+check_result() {
+ ((TOTAL++))
+ if [ $1 -ne $2 ]; then
+ echo " Case $3 returned $1, expected $2"
+ ((BAD++))
+ fi
+}
+
+# IPV6_DONTFRAG
+for ovr in setsock cmsg both diff; do
+ for df in 0 1; do
+ for p in u U i r; do
+ [ $p == "u" ] && prot=UDP
+ [ $p == "U" ] && prot=UDP
+ [ $p == "i" ] && prot=ICMP
+ [ $p == "r" ] && prot=RAW
+
+ [ $ovr == "setsock" ] && m="-F $df"
+ [ $ovr == "cmsg" ] && m="-f $df"
+ [ $ovr == "both" ] && m="-F $df -f $df"
+ [ $ovr == "diff" ] && m="-F $((1 - df)) -f $df"
+
+ $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234
+ check_result $? $df "DONTFRAG $prot $ovr"
+ done
+ done
+done
+
+# IP_TOS + IPV6_TCLASS
+
+test_dscp() {
+ local -r IPVER=$1
+ local -r TGT=$2
+ local -r MATCH=$3
+
+ local -r TOS=0x10
+ local -r TOS2=0x20
+ local -r ECN=0x3
+
+ ip $IPVER -netns $NS rule add tos $TOS lookup 300
+ ip $IPVER -netns $NS route add table 300 prohibit any
+
+ for ovr in setsock cmsg both diff; do
+ for p in u U i r; do
+ [ $p == "u" ] && prot=UDP
+ [ $p == "U" ] && prot=UDP
+ [ $p == "i" ] && prot=ICMP
+ [ $p == "r" ] && prot=RAW
+
+ [ $ovr == "setsock" ] && m="-C"
+ [ $ovr == "cmsg" ] && m="-c"
+ [ $ovr == "both" ] && m="-C $((TOS2)) -c"
+ [ $ovr == "diff" ] && m="-C $((TOS )) -c"
+
+ $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
+ BG=$!
+ sleep 0.05
+
+ $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS2)) $TGT 1234
+ check_result $? 0 "$MATCH $prot $ovr - pass"
+
+ while [ -d /proc/$BG ]; do
+ $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS2)) $TGT 1234
+ done
+
+ tcpdump -r $TMPF -v 2>&1 | grep "$MATCH $TOS2" >> /dev/null
+ check_result $? 0 "$MATCH $prot $ovr - packet data"
+ rm $TMPF
+
+ [ $ovr == "both" ] && m="-C $((TOS )) -c"
+ [ $ovr == "diff" ] && m="-C $((TOS2)) -c"
+
+ # Match prohibit rule: expect failure
+ $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS)) -s $TGT 1234
+ check_result $? 1 "$MATCH $prot $ovr - rejection"
+
+ # Match prohibit rule: IPv4 masks ECN: expect failure
+ if [[ "$IPVER" == "-4" ]]; then
+ $NSEXE ./cmsg_sender $IPVER -p $p $m "$((TOS | ECN))" -s $TGT 1234
+ check_result $? 1 "$MATCH $prot $ovr - rejection (ECN)"
+ fi
+ done
+ done
+}
+
+test_dscp -4 $TGT4 tos
+test_dscp -6 $TGT6 class
+
+# IP_TTL + IPV6_HOPLIMIT
+test_ttl_hoplimit() {
+ local -r IPVER=$1
+ local -r TGT=$2
+ local -r MATCH=$3
+
+ local -r LIM=4
+
+ for ovr in setsock cmsg both diff; do
+ for p in u U i r; do
+ [ $p == "u" ] && prot=UDP
+ [ $p == "U" ] && prot=UDP
+ [ $p == "i" ] && prot=ICMP
+ [ $p == "r" ] && prot=RAW
+
+ [ $ovr == "setsock" ] && m="-L"
+ [ $ovr == "cmsg" ] && m="-l"
+ [ $ovr == "both" ] && m="-L $LIM -l"
+ [ $ovr == "diff" ] && m="-L $((LIM + 1)) -l"
+
+ $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
+ BG=$!
+ sleep 0.05
+
+ $NSEXE ./cmsg_sender $IPVER -p $p $m $LIM $TGT 1234
+ check_result $? 0 "$MATCH $prot $ovr - pass"
+
+ while [ -d /proc/$BG ]; do
+ $NSEXE ./cmsg_sender $IPVER -p $p $m $LIM $TGT 1234
+ done
+
+ tcpdump -r $TMPF -v 2>&1 | grep "$MATCH $LIM[^0-9]" >> /dev/null
+ check_result $? 0 "$MATCH $prot $ovr - packet data"
+ rm $TMPF
+ done
+ done
+}
+
+test_ttl_hoplimit -4 $TGT4 ttl
+test_ttl_hoplimit -6 $TGT6 hlim
+
+# IPV6 exthdr
+for p in u U i r; do
+ # Very basic "does it crash" test
+ for h in h d r; do
+ $NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234
+ check_result $? 0 "ExtHdr $prot $ovr - pass"
+ done
+done
+
+# Summary
+if [ $BAD -ne 0 ]; then
+ echo "FAIL - $BAD/$TOTAL cases failed"
+ exit 1
+else
+ echo "OK"
+ exit 0
+fi
diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh
deleted file mode 100755
index 8bc23fb4c82b..000000000000
--- a/tools/testing/selftests/net/cmsg_ipv6.sh
+++ /dev/null
@@ -1,154 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-source lib.sh
-
-IP6=2001:db8:1::1/64
-TGT6=2001:db8:1::2
-TMPF=$(mktemp --suffix ".pcap")
-
-cleanup()
-{
- rm -f $TMPF
- cleanup_ns $NS
-}
-
-trap cleanup EXIT
-
-tcpdump -h | grep immediate-mode >> /dev/null
-if [ $? -ne 0 ]; then
- echo "SKIP - tcpdump with --immediate-mode option required"
- exit $ksft_skip
-fi
-
-# Namespaces
-setup_ns NS
-NSEXE="ip netns exec $NS"
-
-$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
-
-# Connectivity
-ip -netns $NS link add type dummy
-ip -netns $NS link set dev dummy0 up
-ip -netns $NS addr add $IP6 dev dummy0
-
-# Test
-BAD=0
-TOTAL=0
-
-check_result() {
- ((TOTAL++))
- if [ $1 -ne $2 ]; then
- echo " Case $3 returned $1, expected $2"
- ((BAD++))
- fi
-}
-
-# IPV6_DONTFRAG
-for ovr in setsock cmsg both diff; do
- for df in 0 1; do
- for p in u i r; do
- [ $p == "u" ] && prot=UDP
- [ $p == "i" ] && prot=ICMP
- [ $p == "r" ] && prot=RAW
-
- [ $ovr == "setsock" ] && m="-F $df"
- [ $ovr == "cmsg" ] && m="-f $df"
- [ $ovr == "both" ] && m="-F $df -f $df"
- [ $ovr == "diff" ] && m="-F $((1 - df)) -f $df"
-
- $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234
- check_result $? $df "DONTFRAG $prot $ovr"
- done
- done
-done
-
-# IPV6_TCLASS
-TOS=0x10
-TOS2=0x20
-
-ip -6 -netns $NS rule add tos $TOS lookup 300
-ip -6 -netns $NS route add table 300 prohibit any
-
-for ovr in setsock cmsg both diff; do
- for p in u i r; do
- [ $p == "u" ] && prot=UDP
- [ $p == "i" ] && prot=ICMP
- [ $p == "r" ] && prot=RAW
-
- [ $ovr == "setsock" ] && m="-C"
- [ $ovr == "cmsg" ] && m="-c"
- [ $ovr == "both" ] && m="-C $((TOS2)) -c"
- [ $ovr == "diff" ] && m="-C $((TOS )) -c"
-
- $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
- BG=$!
- sleep 0.05
-
- $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234
- check_result $? 0 "TCLASS $prot $ovr - pass"
-
- while [ -d /proc/$BG ]; do
- $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234
- done
-
- tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null
- check_result $? 0 "TCLASS $prot $ovr - packet data"
- rm $TMPF
-
- [ $ovr == "both" ] && m="-C $((TOS )) -c"
- [ $ovr == "diff" ] && m="-C $((TOS2)) -c"
-
- $NSEXE ./cmsg_sender -6 -p $p $m $((TOS)) -s $TGT6 1234
- check_result $? 1 "TCLASS $prot $ovr - rejection"
- done
-done
-
-# IPV6_HOPLIMIT
-LIM=4
-
-for ovr in setsock cmsg both diff; do
- for p in u i r; do
- [ $p == "u" ] && prot=UDP
- [ $p == "i" ] && prot=ICMP
- [ $p == "r" ] && prot=RAW
-
- [ $ovr == "setsock" ] && m="-L"
- [ $ovr == "cmsg" ] && m="-l"
- [ $ovr == "both" ] && m="-L $LIM -l"
- [ $ovr == "diff" ] && m="-L $((LIM + 1)) -l"
-
- $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
- BG=$!
- sleep 0.05
-
- $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234
- check_result $? 0 "HOPLIMIT $prot $ovr - pass"
-
- while [ -d /proc/$BG ]; do
- $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234
- done
-
- tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null
- check_result $? 0 "HOPLIMIT $prot $ovr - packet data"
- rm $TMPF
- done
-done
-
-# IPV6 exthdr
-for p in u i r; do
- # Very basic "does it crash" test
- for h in h d r; do
- $NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234
- check_result $? 0 "ExtHdr $prot $ovr - pass"
- done
-done
-
-# Summary
-if [ $BAD -ne 0 ]; then
- echo "FAIL - $BAD/$TOTAL cases failed"
- exit 1
-else
- echo "OK"
- exit 0
-fi
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index bc314382e4e1..a825e628aee7 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -33,6 +33,7 @@ enum {
ERN_RECVERR,
ERN_CMSG_RD,
ERN_CMSG_RCV,
+ ERN_SEND_MORE,
};
struct option_cmsg_u32 {
@@ -46,6 +47,7 @@ struct options {
const char *service;
unsigned int size;
unsigned int num_pkt;
+ bool msg_more;
struct {
unsigned int mark;
unsigned int dontfrag;
@@ -72,7 +74,7 @@ struct options {
struct option_cmsg_u32 tclass;
struct option_cmsg_u32 hlimit;
struct option_cmsg_u32 exthdr;
- } v6;
+ } cmsg;
} opt = {
.size = 13,
.num_pkt = 1,
@@ -94,7 +96,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
"\t\t-S send() size\n"
"\t\t-4/-6 Force IPv4 / IPv6 only\n"
"\t\t-p prot Socket protocol\n"
- "\t\t (u = UDP (default); i = ICMP; r = RAW)\n"
+ "\t\t (u = UDP (default); i = ICMP; r = RAW;\n"
+ "\t\t U = UDP with MSG_MORE)\n"
"\n"
"\t\t-m val Set SO_MARK with given value\n"
"\t\t-M val Set SO_MARK via setsockopt\n"
@@ -104,13 +107,13 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
"\t\t-t Enable time stamp reporting\n"
"\t\t-f val Set don't fragment via cmsg\n"
"\t\t-F val Set don't fragment via setsockopt\n"
- "\t\t-c val Set TCLASS via cmsg\n"
- "\t\t-C val Set TCLASS via setsockopt\n"
- "\t\t-l val Set HOPLIMIT via cmsg\n"
- "\t\t-L val Set HOPLIMIT via setsockopt\n"
+ "\t\t-c val Set TOS/TCLASS via cmsg\n"
+ "\t\t-C val Set TOS/TCLASS via setsockopt\n"
+ "\t\t-l val Set TTL/HOPLIMIT via cmsg\n"
+ "\t\t-L val Set TTL/HOPLIMIT via setsockopt\n"
"\t\t-H type Add an IPv6 header option\n"
- "\t\t (h = HOP; d = DST; r = RTDST)"
- "");
+ "\t\t (h = HOP; d = DST; r = RTDST)\n"
+ "\n");
exit(ERN_HELP);
}
@@ -133,8 +136,11 @@ static void cs_parse_args(int argc, char *argv[])
opt.sock.family = AF_INET6;
break;
case 'p':
- if (*optarg == 'u' || *optarg == 'U') {
+ if (*optarg == 'u') {
opt.sock.proto = IPPROTO_UDP;
+ } else if (*optarg == 'U') {
+ opt.sock.proto = IPPROTO_UDP;
+ opt.msg_more = true;
} else if (*optarg == 'i' || *optarg == 'I') {
opt.sock.proto = IPPROTO_ICMP;
} else if (*optarg == 'r') {
@@ -169,37 +175,37 @@ static void cs_parse_args(int argc, char *argv[])
opt.ts.ena = true;
break;
case 'f':
- opt.v6.dontfrag.ena = true;
- opt.v6.dontfrag.val = atoi(optarg);
+ opt.cmsg.dontfrag.ena = true;
+ opt.cmsg.dontfrag.val = atoi(optarg);
break;
case 'F':
opt.sockopt.dontfrag = atoi(optarg);
break;
case 'c':
- opt.v6.tclass.ena = true;
- opt.v6.tclass.val = atoi(optarg);
+ opt.cmsg.tclass.ena = true;
+ opt.cmsg.tclass.val = atoi(optarg);
break;
case 'C':
opt.sockopt.tclass = atoi(optarg);
break;
case 'l':
- opt.v6.hlimit.ena = true;
- opt.v6.hlimit.val = atoi(optarg);
+ opt.cmsg.hlimit.ena = true;
+ opt.cmsg.hlimit.val = atoi(optarg);
break;
case 'L':
opt.sockopt.hlimit = atoi(optarg);
break;
case 'H':
- opt.v6.exthdr.ena = true;
+ opt.cmsg.exthdr.ena = true;
switch (optarg[0]) {
case 'h':
- opt.v6.exthdr.val = IPV6_HOPOPTS;
+ opt.cmsg.exthdr.val = IPV6_HOPOPTS;
break;
case 'd':
- opt.v6.exthdr.val = IPV6_DSTOPTS;
+ opt.cmsg.exthdr.val = IPV6_DSTOPTS;
break;
case 'r':
- opt.v6.exthdr.val = IPV6_RTHDRDSTOPTS;
+ opt.cmsg.exthdr.val = IPV6_RTHDRDSTOPTS;
break;
default:
printf("Error: hdr type: %s\n", optarg);
@@ -261,12 +267,20 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
SOL_SOCKET, SO_MARK, &opt.mark);
ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
SOL_SOCKET, SO_PRIORITY, &opt.priority);
- ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
- SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag);
- ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
- SOL_IPV6, IPV6_TCLASS, &opt.v6.tclass);
- ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
- SOL_IPV6, IPV6_HOPLIMIT, &opt.v6.hlimit);
+
+ if (opt.sock.family == AF_INET) {
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_IP, IP_TOS, &opt.cmsg.tclass);
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_IP, IP_TTL, &opt.cmsg.hlimit);
+ } else {
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_IPV6, IPV6_DONTFRAG, &opt.cmsg.dontfrag);
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_IPV6, IPV6_TCLASS, &opt.cmsg.tclass);
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_IPV6, IPV6_HOPLIMIT, &opt.cmsg.hlimit);
+ }
if (opt.txtime.ena) {
__u64 txtime;
@@ -297,14 +311,14 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
*(__u32 *)CMSG_DATA(cmsg) = SOF_TIMESTAMPING_TX_SCHED |
SOF_TIMESTAMPING_TX_SOFTWARE;
}
- if (opt.v6.exthdr.ena) {
+ if (opt.cmsg.exthdr.ena) {
cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
cmsg_len += CMSG_SPACE(8);
if (cbuf_sz < cmsg_len)
error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
cmsg->cmsg_level = SOL_IPV6;
- cmsg->cmsg_type = opt.v6.exthdr.val;
+ cmsg->cmsg_type = opt.cmsg.exthdr.val;
cmsg->cmsg_len = CMSG_LEN(8);
*(__u64 *)CMSG_DATA(cmsg) = 0;
}
@@ -405,23 +419,35 @@ static void ca_set_sockopts(int fd)
setsockopt(fd, SOL_SOCKET, SO_MARK,
&opt.sockopt.mark, sizeof(opt.sockopt.mark)))
error(ERN_SOCKOPT, errno, "setsockopt SO_MARK");
- if (opt.sockopt.dontfrag &&
- setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG,
- &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag)))
- error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG");
- if (opt.sockopt.tclass &&
- setsockopt(fd, SOL_IPV6, IPV6_TCLASS,
- &opt.sockopt.tclass, sizeof(opt.sockopt.tclass)))
- error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS");
- if (opt.sockopt.hlimit &&
- setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS,
- &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
- error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT");
if (opt.sockopt.priority &&
setsockopt(fd, SOL_SOCKET, SO_PRIORITY,
&opt.sockopt.priority, sizeof(opt.sockopt.priority)))
error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY");
+ if (opt.sock.family == AF_INET) {
+ if (opt.sockopt.tclass &&
+ setsockopt(fd, SOL_IP, IP_TOS,
+ &opt.sockopt.tclass, sizeof(opt.sockopt.tclass)))
+ error(ERN_SOCKOPT, errno, "setsockopt IP_TOS");
+ if (opt.sockopt.hlimit &&
+ setsockopt(fd, SOL_IP, IP_TTL,
+ &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
+ error(ERN_SOCKOPT, errno, "setsockopt IP_TTL");
+ } else {
+ if (opt.sockopt.dontfrag &&
+ setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG,
+ &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag)))
+ error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG");
+ if (opt.sockopt.tclass &&
+ setsockopt(fd, SOL_IPV6, IPV6_TCLASS,
+ &opt.sockopt.tclass, sizeof(opt.sockopt.tclass)))
+ error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS");
+ if (opt.sockopt.hlimit &&
+ setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS,
+ &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
+ error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT");
+ }
+
if (opt.txtime.ena) {
struct sock_txtime so_txtime = {
.clockid = CLOCK_MONOTONIC,
@@ -511,7 +537,7 @@ int main(int argc, char *argv[])
cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf));
for (i = 0; i < opt.num_pkt; i++) {
- err = sendmsg(fd, &msg, 0);
+ err = sendmsg(fd, &msg, opt.msg_more ? MSG_MORE : 0);
if (err < 0) {
if (!opt.silent_send)
fprintf(stderr, "send failed: %s\n", strerror(errno));
@@ -522,6 +548,14 @@ int main(int argc, char *argv[])
err = ERN_SEND_SHORT;
goto err_out;
}
+ if (opt.msg_more) {
+ err = write(fd, NULL, 0);
+ if (err < 0) {
+ fprintf(stderr, "send more: %s\n", strerror(errno));
+ err = ERN_SEND_MORE;
+ goto err_out;
+ }
+ }
}
err = ERN_SUCCESS;
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 5b9baf708950..130d532b7e67 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -18,6 +18,8 @@ CONFIG_DUMMY=y
CONFIG_BRIDGE_VLAN_FILTERING=y
CONFIG_BRIDGE=y
CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_BTF_MODULES=n
CONFIG_VLAN_8021Q=y
CONFIG_GENEVE=m
CONFIG_IFB=y
@@ -107,3 +109,11 @@ CONFIG_XFRM_INTERFACE=m
CONFIG_XFRM_USER=m
CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IPVLAN=m
+CONFIG_CAN=m
+CONFIG_CAN_DEV=m
+CONFIG_CAN_VXCAN=m
+CONFIG_NETKIT=y
+CONFIG_NET_PKTGEN=m
+CONFIG_IPV6_ILA=m
+CONFIG_IPV6_RPL_LWTUNNEL=y
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 899dbad0104b..4fcc38907e48 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -3667,7 +3667,7 @@ ipv6_addr_bind_novrf()
# when it really should not
a=${NSA_LO_IP6}
log_start
- show_hint "Tecnically should fail since address is not on device but kernel allows"
+ show_hint "Technically should fail since address is not on device but kernel allows"
run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address"
}
@@ -3724,7 +3724,7 @@ ipv6_addr_bind_vrf()
# passes when it really should not
a=${VRF_IP6}
log_start
- show_hint "Tecnically should fail since address is not on device but kernel allows"
+ show_hint "Technically should fail since address is not on device but kernel allows"
run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind"
diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh
index d5e3abb8658c..9931a1e36e3d 100755
--- a/tools/testing/selftests/net/fdb_flush.sh
+++ b/tools/testing/selftests/net/fdb_flush.sh
@@ -583,7 +583,7 @@ vxlan_test_flush_by_remote_attributes()
$IP link del dev vx10
$IP link add name vx10 type vxlan dstport "$VXPORT" external
- # For multicat FDB entries, the VXLAN driver stores a linked list of
+ # For multicast FDB entries, the VXLAN driver stores a linked list of
# remotes for a given key. Verify that only the expected remotes are
# flushed.
multicast_fdb_entries_add
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 77c83d9508d3..b39f748c2572 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -76,11 +76,13 @@ log_test()
printf "TEST: %-60s [ OK ]\n" "${msg}"
nsuccess=$((nsuccess+1))
else
- ret=1
- nfail=$((nfail+1))
if [[ $rc -eq $ksft_skip ]]; then
+ [[ $ret -eq 0 ]] && ret=$ksft_skip
+ nskip=$((nskip+1))
printf "TEST: %-60s [SKIP]\n" "${msg}"
else
+ ret=1
+ nfail=$((nfail+1))
printf "TEST: %-60s [FAIL]\n" "${msg}"
fi
@@ -741,7 +743,7 @@ ipv6_fcnal()
run_cmd "$IP nexthop add id 52 via 2001:db8:92::3"
log_test $? 2 "Create nexthop - gw only"
- # gw is not reachable throught given dev
+ # gw is not reachable through given dev
run_cmd "$IP nexthop add id 53 via 2001:db8:3::3 dev veth1"
log_test $? 2 "Create nexthop - invalid gw+dev combination"
@@ -2528,6 +2530,7 @@ done
if [ "$TESTS" != "none" ]; then
printf "\nTests passed: %3d\n" ${nsuccess}
printf "Tests failed: %3d\n" ${nfail}
+ printf "Tests skipped: %2d\n" ${nskip}
fi
exit $ret
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 847936363a12..b866bab1d92a 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -256,6 +256,24 @@ fib_rule6_test()
fib_rule6_test_match_n_redirect "$match" "$match" \
"$getnomatch" "sport and dport redirect to table" \
"sport and dport no redirect to table"
+
+ match="sport 100-200 dport 300-400"
+ getmatch="sport 100 dport 400"
+ getnomatch="sport 100 dport 401"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" \
+ "sport and dport range redirect to table" \
+ "sport and dport range no redirect to table"
+ fi
+
+ ip rule help 2>&1 | grep sport | grep -q MASK
+ if [ $? -eq 0 ]; then
+ match="sport 0x0f00/0xff00 dport 0x000f/0x00ff"
+ getmatch="sport 0x0f11 dport 0x220f"
+ getnomatch="sport 0x1f11 dport 0x221f"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "sport and dport masked redirect to table" \
+ "sport and dport masked no redirect to table"
fi
fib_check_iproute_support "ipproto" "ipproto"
@@ -292,6 +310,25 @@ fib_rule6_test()
"iif dscp no redirect to table"
fi
+ ip rule help 2>&1 | grep -q "DSCP\[/MASK\]"
+ if [ $? -eq 0 ]; then
+ match="dscp 0x0f/0x0f"
+ tosmatch=$(printf 0x"%x" $((0x1f << 2)))
+ tosnomatch=$(printf 0x"%x" $((0x1e << 2)))
+ getmatch="tos $tosmatch"
+ getnomatch="tos $tosnomatch"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "dscp masked redirect to table" \
+ "dscp masked no redirect to table"
+
+ match="dscp 0x0f/0x0f"
+ getmatch="from $SRC_IP6 iif $DEV tos $tosmatch"
+ getnomatch="from $SRC_IP6 iif $DEV tos $tosnomatch"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif dscp masked redirect to table" \
+ "iif dscp masked no redirect to table"
+ fi
+
fib_check_iproute_support "flowlabel" "flowlabel"
if [ $? -eq 0 ]; then
match="flowlabel 0xfffff"
@@ -525,6 +562,24 @@ fib_rule4_test()
fib_rule4_test_match_n_redirect "$match" "$match" \
"$getnomatch" "sport and dport redirect to table" \
"sport and dport no redirect to table"
+
+ match="sport 100-200 dport 300-400"
+ getmatch="sport 100 dport 400"
+ getnomatch="sport 100 dport 401"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" \
+ "sport and dport range redirect to table" \
+ "sport and dport range no redirect to table"
+ fi
+
+ ip rule help 2>&1 | grep sport | grep -q MASK
+ if [ $? -eq 0 ]; then
+ match="sport 0x0f00/0xff00 dport 0x000f/0x00ff"
+ getmatch="sport 0x0f11 dport 0x220f"
+ getnomatch="sport 0x1f11 dport 0x221f"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "sport and dport masked redirect to table" \
+ "sport and dport masked no redirect to table"
fi
fib_check_iproute_support "ipproto" "ipproto"
@@ -561,6 +616,25 @@ fib_rule4_test()
"$getnomatch" "iif dscp redirect to table" \
"iif dscp no redirect to table"
fi
+
+ ip rule help 2>&1 | grep -q "DSCP\[/MASK\]"
+ if [ $? -eq 0 ]; then
+ match="dscp 0x0f/0x0f"
+ tosmatch=$(printf 0x"%x" $((0x1f << 2)))
+ tosnomatch=$(printf 0x"%x" $((0x1e << 2)))
+ getmatch="tos $tosmatch"
+ getnomatch="tos $tosnomatch"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "dscp masked redirect to table" \
+ "dscp masked no redirect to table"
+
+ match="dscp 0x0f/0x0f"
+ getmatch="from $SRC_IP iif $DEV tos $tosmatch"
+ getnomatch="from $SRC_IP iif $DEV tos $tosnomatch"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif dscp masked redirect to table" \
+ "iif dscp masked no redirect to table"
+ fi
}
fib_rule4_vrf_test()
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
index a652429bfd53..7b41cff993ad 100644
--- a/tools/testing/selftests/net/forwarding/README
+++ b/tools/testing/selftests/net/forwarding/README
@@ -6,7 +6,7 @@ to easily create and test complex environments.
Unfortunately, these namespaces can not be used with actual switching
ASICs, as their ports can not be migrated to other network namespaces
-(dev->netns_local) and most of them probably do not support the
+(dev->netns_immutable) and most of them probably do not support the
L1-separation provided by namespaces.
However, a similar kind of flexibility can be achieved by using VRFs and
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index d9d587454d20..8c1597ebc2d3 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -149,7 +149,7 @@ cfg_test_host_common()
check_err $? "Failed to add $name host entry"
bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null
- check_fail $? "Managed to replace $name host entry"
+ check_err $? "Failed to replace $name host entry"
bridge mdb del dev br0 port br0 grp $grp $state vid 10
bridge mdb get dev br0 grp $grp vid 10 &> /dev/null
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 8de80acf249e..508f3c700d71 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -291,16 +291,6 @@ if [[ "$CHECK_TC" = "yes" ]]; then
check_tc_version
fi
-require_command()
-{
- local cmd=$1; shift
-
- if [[ ! -x "$(command -v "$cmd")" ]]; then
- echo "SKIP: $cmd not installed"
- exit $ksft_skip
- fi
-}
-
# IPv6 support was added in v3.0
check_mtools_version()
{
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
index 3885a2a91f7d..baed5e380dae 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
@@ -20,6 +20,7 @@ ALL_TESTS="
test_port_range_ipv4_tcp
test_port_range_ipv6_udp
test_port_range_ipv6_tcp
+ test_port_range_ipv4_udp_drop
"
NUM_NETIFS=4
@@ -194,6 +195,51 @@ test_port_range_ipv6_tcp()
__test_port_range $proto $ip_proto $sip $dip $mode "$name"
}
+test_port_range_ipv4_udp_drop()
+{
+ local proto=ipv4
+ local ip_proto=udp
+ local sip=192.0.2.1
+ local dip=192.0.2.2
+ local mode="-4"
+ local name="IPv4 UDP Drop"
+ local dmac=$(mac_get $h2)
+ local smac=$(mac_get $h1)
+ local sport_min=2000
+ local sport_max=3000
+ local sport_mid=$((sport_min + (sport_max - sport_min) / 2))
+ local dport=5000
+
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol $proto handle 101 pref 1 \
+ flower src_ip $sip dst_ip $dip ip_proto $ip_proto \
+ src_port $sport_min-$sport_max \
+ dst_port $dport \
+ action drop
+
+ # Test ports outside range - should pass
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$((sport_min - 1)),dp=$dport"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$((sport_max + 1)),dp=$dport"
+
+ # Test ports inside range - should be dropped
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_min,dp=$dport"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_mid,dp=$dport"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_max,dp=$dport"
+
+ tc_check_packets "dev $swp1 ingress" 101 3
+ check_err $? "Filter did not drop the expected number of packets"
+
+ tc filter del dev $swp1 ingress protocol $proto pref 1 handle 101 flower
+
+ log_test "Port range matching - $name"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index 3f9d50f1ef9e..b43816dd998c 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -428,6 +428,14 @@ __test_flood()
test_flood()
{
__test_flood de:ad:be:ef:13:37 192.0.2.100 "flood"
+
+ # Add an entry with arbitrary destination IP. Verify that packets are
+ # not duplicated (this can happen if hardware floods the packets, and
+ # then traps them due to misconfiguration, so software data path repeats
+ # flooding and resends packets).
+ bridge fdb append dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self
+ __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood, unresolved FDB entry"
+ bridge fdb del dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self
}
vxlan_fdb_add_del()
@@ -740,6 +748,8 @@ test_learning()
vxlan_flood_test $mac $dst 0 10 0
+ # The entry should age out when it only forwards traffic
+ $MZ $h1 -c 50 -d 1sec -p 64 -b $mac -B $dst -t icmp -q &
sleep 60
bridge fdb show brport vx1 | grep $mac | grep -q self
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
index fb9a34cb50c6..afc65647f673 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
@@ -539,6 +539,21 @@ test_flood()
10 10 0 10 0
__test_flood ca:fe:be:ef:13:37 198.51.100.100 20 "flood vlan 20" \
10 0 10 0 10
+
+ # Add entries with arbitrary destination IP. Verify that packets are
+ # not duplicated (this can happen if hardware floods the packets, and
+ # then traps them due to misconfiguration, so software data path repeats
+ # flooding and resends packets).
+ bridge fdb append dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self
+ bridge fdb append dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self
+
+ __test_flood de:ad:be:ef:13:37 192.0.2.100 10 \
+ "flood vlan 10, unresolved FDB entry" 10 10 0 10 0
+ __test_flood ca:fe:be:ef:13:37 198.51.100.100 20 \
+ "flood vlan 20, unresolved FDB entry" 10 0 10 0 10
+
+ bridge fdb del dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self
+ bridge fdb del dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self
}
vxlan_fdb_add_del()
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
index b2184847e388..d5824eadea10 100644
--- a/tools/testing/selftests/net/gro.c
+++ b/tools/testing/selftests/net/gro.c
@@ -1318,11 +1318,13 @@ int main(int argc, char **argv)
read_MAC(src_mac, smac);
read_MAC(dst_mac, dmac);
- if (tx_socket)
+ if (tx_socket) {
gro_sender();
- else
+ } else {
+ /* Only the receiver exit status determines test success. */
gro_receiver();
+ fprintf(stderr, "Gro::%s test passed.\n", testname);
+ }
- fprintf(stderr, "Gro::%s test passed.\n", testname);
return 0;
}
diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh
index 02c21ff4ca81..9e3f186bc2a1 100755
--- a/tools/testing/selftests/net/gro.sh
+++ b/tools/testing/selftests/net/gro.sh
@@ -18,10 +18,10 @@ run_test() {
"--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" )
setup_ns
- # Each test is run 3 times to deflake, because given the receive timing,
+ # Each test is run 6 times to deflake, because given the receive timing,
# not all packets that should coalesce will be considered in the same flow
# on every try.
- for tries in {1..3}; do
+ for tries in {1..6}; do
# Actual test starts here
ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
1>>log.txt &
@@ -100,5 +100,6 @@ trap cleanup EXIT
if [[ "${test}" == "all" ]]; then
run_all_tests
else
- run_test "${proto}" "${test}"
+ exit_code=$(run_test "${proto}" "${test}")
+ exit $exit_code
fi;
diff --git a/tools/testing/selftests/net/ip_local_port_range.sh b/tools/testing/selftests/net/ip_local_port_range.sh
index 6c6ad346eaa0..4ff746db1256 100755
--- a/tools/testing/selftests/net/ip_local_port_range.sh
+++ b/tools/testing/selftests/net/ip_local_port_range.sh
@@ -2,4 +2,6 @@
# SPDX-License-Identifier: GPL-2.0
./in_netns.sh \
- sh -c 'sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && ./ip_local_port_range'
+ sh -c 'sysctl -q -w net.mptcp.enabled=1 && \
+ sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && \
+ ./ip_local_port_range'
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 0bd9a038a1f0..975be4fdbcdb 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -450,6 +450,25 @@ kill_process()
{ kill $pid && wait $pid; } 2>/dev/null
}
+check_command()
+{
+ local cmd=$1; shift
+
+ if [[ ! -x "$(command -v "$cmd")" ]]; then
+ log_test_skip "$cmd not installed"
+ return $EXIT_STATUS
+ fi
+}
+
+require_command()
+{
+ local cmd=$1; shift
+
+ if ! check_command "$cmd"; then
+ exit $EXIT_STATUS
+ fi
+}
+
ip_link_add()
{
local name=$1; shift
diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile
index 18b9443454a9..c22623b9a2a5 100644
--- a/tools/testing/selftests/net/lib/Makefile
+++ b/tools/testing/selftests/net/lib/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES)
# Additional include paths needed by kselftest.h
CFLAGS += -I../../
@@ -9,7 +9,10 @@ TEST_FILES := ../../../../../Documentation/netlink/specs
TEST_FILES += ../../../../net/ynl
TEST_GEN_FILES += csum
+TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
TEST_INCLUDES := $(wildcard py/*.py sh/*.sh)
include ../../lib.mk
+
+include ../bpf.mk
diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py
index 54d8f5eba810..8697bd27dc30 100644
--- a/tools/testing/selftests/net/lib/py/__init__.py
+++ b/tools/testing/selftests/net/lib/py/__init__.py
@@ -2,8 +2,8 @@
from .consts import KSRC
from .ksft import *
-from .netns import NetNS
+from .netns import NetNS, NetNSEnter
from .nsim import *
from .utils import *
-from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily
+from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily
from .ynl import NetshaperFamily
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 3efe005436cd..3cfad0fd4570 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -71,6 +71,11 @@ def ksft_in(a, b, comment=""):
_fail("Check failed", a, "not in", b, comment)
+def ksft_not_in(a, b, comment=""):
+ if a in b:
+ _fail("Check failed", a, "in", b, comment)
+
+
def ksft_is(a, b, comment=""):
if a is not b:
_fail("Check failed", a, "is not", b, comment)
@@ -202,7 +207,7 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
- print("KTAP version 1")
+ print("TAP version 13")
print("1.." + str(len(cases)))
global KSFT_RESULT
diff --git a/tools/testing/selftests/net/lib/py/netns.py b/tools/testing/selftests/net/lib/py/netns.py
index ecff85f9074f..8e9317044eef 100644
--- a/tools/testing/selftests/net/lib/py/netns.py
+++ b/tools/testing/selftests/net/lib/py/netns.py
@@ -1,9 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
from .utils import ip
+import ctypes
import random
import string
+libc = ctypes.cdll.LoadLibrary('libc.so.6')
+
class NetNS:
def __init__(self, name=None):
@@ -29,3 +32,18 @@ class NetNS:
def __repr__(self):
return f"NetNS({self.name})"
+
+
+class NetNSEnter:
+ def __init__(self, ns_name):
+ self.ns_path = f"/run/netns/{ns_name}"
+
+ def __enter__(self):
+ self.saved = open("/proc/thread-self/ns/net")
+ with open(self.ns_path) as ns_file:
+ libc.setns(ns_file.fileno(), 0)
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ libc.setns(self.saved.fileno(), 0)
+ self.saved.close()
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
index 9e3bcddcf3e8..34470d65d871 100644
--- a/tools/testing/selftests/net/lib/py/utils.py
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -2,8 +2,10 @@
import errno
import json as _json
+import os
import random
import re
+import select
import socket
import subprocess
import time
@@ -15,21 +17,56 @@ class CmdExitFailure(Exception):
self.cmd = cmd_obj
+def fd_read_timeout(fd, timeout):
+ rlist, _, _ = select.select([fd], [], [], timeout)
+ if rlist:
+ return os.read(fd, 1024)
+ else:
+ raise TimeoutError("Timeout waiting for fd read")
+
+
class cmd:
- def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None, timeout=5):
+ """
+ Execute a command on local or remote host.
+
+ Use bkg() instead to run a command in the background.
+ """
+ def __init__(self, comm, shell=True, fail=True, ns=None, background=False,
+ host=None, timeout=5, ksft_wait=None):
if ns:
comm = f'ip netns exec {ns} ' + comm
self.stdout = None
self.stderr = None
self.ret = None
+ self.ksft_term_fd = None
self.comm = comm
if host:
self.proc = host.cmd(comm)
else:
+ # ksft_wait lets us wait for the background process to fully start,
+ # we pass an FD to the child process, and wait for it to write back.
+ # Similarly term_fd tells child it's time to exit.
+ pass_fds = ()
+ env = os.environ.copy()
+ if ksft_wait is not None:
+ rfd, ready_fd = os.pipe()
+ wait_fd, self.ksft_term_fd = os.pipe()
+ pass_fds = (ready_fd, wait_fd, )
+ env["KSFT_READY_FD"] = str(ready_fd)
+ env["KSFT_WAIT_FD"] = str(wait_fd)
+
self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
+ stderr=subprocess.PIPE, pass_fds=pass_fds,
+ env=env)
+ if ksft_wait is not None:
+ os.close(ready_fd)
+ os.close(wait_fd)
+ msg = fd_read_timeout(rfd, ksft_wait)
+ os.close(rfd)
+ if not msg:
+ raise Exception("Did not receive ready message")
if not background:
self.process(terminate=False, fail=fail, timeout=timeout)
@@ -37,6 +74,8 @@ class cmd:
if fail is None:
fail = not terminate
+ if self.ksft_term_fd:
+ os.write(self.ksft_term_fd, b"1")
if terminate:
self.proc.terminate()
stdout, stderr = self.proc.communicate(timeout)
@@ -54,13 +93,36 @@ class cmd:
class bkg(cmd):
+ """
+ Run a command in the background.
+
+ Examples usage:
+
+ Run a command on remote host, and wait for it to finish.
+ This is usually paired with wait_port_listen() to make sure
+ the command has initialized:
+
+ with bkg("socat ...", exit_wait=True, host=cfg.remote) as nc:
+ ...
+
+ Run a command and expect it to let us know that it's ready
+ by writing to a special file descriptor passed via KSFT_READY_FD.
+ Command will be terminated when we exit the context manager:
+
+ with bkg("my_binary", ksft_wait=5):
+ """
def __init__(self, comm, shell=True, fail=None, ns=None, host=None,
- exit_wait=False):
+ exit_wait=False, ksft_wait=None):
super().__init__(comm, background=True,
- shell=shell, fail=fail, ns=ns, host=host)
- self.terminate = not exit_wait
+ shell=shell, fail=fail, ns=ns, host=host,
+ ksft_wait=ksft_wait)
+ self.terminate = not exit_wait and not ksft_wait
self.check_fail = fail
+ if shell and self.terminate:
+ print("# Warning: combining shell and terminate is risky!")
+ print("# SIGTERM may not reach the child on zsh/ksh!")
+
def __enter__(self):
return self
@@ -123,20 +185,13 @@ def ethtool(args, json=None, ns=None, host=None):
return tool('ethtool', args, json=json, ns=ns, host=host)
-def rand_port():
+def rand_port(type=socket.SOCK_STREAM):
"""
- Get a random unprivileged port, try to make sure it's not already used.
+ Get a random unprivileged port.
"""
- for _ in range(1000):
- port = random.randint(10000, 65535)
- try:
- with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
- s.bind(("", port))
- return port
- except OSError as e:
- if e.errno != errno.EADDRINUSE:
- raise
- raise Exception("Can't find any free unprivileged port")
+ with socket.socket(socket.AF_INET6, type) as s:
+ s.bind(("", 0))
+ return s.getsockname()[1]
def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5):
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index ad1e36baee2a..8986c584cb37 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -42,6 +42,10 @@ class RtnlFamily(YnlFamily):
super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(),
schema='', recv_size=recv_size)
+class RtnlAddrFamily(YnlFamily):
+ def __init__(self, recv_size=0):
+ super().__init__((SPEC_PATH / Path('rt_addr.yaml')).as_posix(),
+ schema='', recv_size=recv_size)
class NetdevFamily(YnlFamily):
def __init__(self, recv_size=0):
diff --git a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c
new file mode 100644
index 000000000000..e73fab3edd9f
--- /dev/null
+++ b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define KBUILD_MODNAME "xdp_dummy"
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+SEC("xdp.frags")
+int xdp_dummy_prog_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/link_netns.py b/tools/testing/selftests/net/link_netns.py
new file mode 100755
index 000000000000..aab043c59d69
--- /dev/null
+++ b/tools/testing/selftests/net/link_netns.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import time
+
+from lib.py import ksft_run, ksft_exit, ksft_true
+from lib.py import ip
+from lib.py import NetNS, NetNSEnter
+from lib.py import RtnlFamily
+
+
+LINK_NETNSID = 100
+
+
+def test_event() -> None:
+ with NetNS() as ns1, NetNS() as ns2:
+ with NetNSEnter(str(ns2)):
+ rtnl = RtnlFamily()
+
+ rtnl.ntf_subscribe("rtnlgrp-link")
+
+ ip(f"netns set {ns2} {LINK_NETNSID}", ns=str(ns1))
+ ip(f"link add netns {ns1} link-netnsid {LINK_NETNSID} dummy1 type dummy")
+ ip(f"link add netns {ns1} dummy2 type dummy", ns=str(ns2))
+
+ ip("link del dummy1", ns=str(ns1))
+ ip("link del dummy2", ns=str(ns1))
+
+ time.sleep(1)
+ rtnl.check_ntf()
+ ksft_true(rtnl.async_msg_queue.empty(),
+ "Received unexpected link notification")
+
+
+def validate_link_netns(netns, ifname, link_netnsid) -> bool:
+ link_info = ip(f"-d link show dev {ifname}", ns=netns, json=True)
+ if not link_info:
+ return False
+ return link_info[0].get("link_netnsid") == link_netnsid
+
+
+def test_link_net() -> None:
+ configs = [
+ # type, common args, type args, fallback to dev_net
+ ("ipvlan", "link dummy1", "", False),
+ ("macsec", "link dummy1", "", False),
+ ("macvlan", "link dummy1", "", False),
+ ("macvtap", "link dummy1", "", False),
+ ("vlan", "link dummy1", "id 100", False),
+ ("gre", "", "local 192.0.2.1", True),
+ ("vti", "", "local 192.0.2.1", True),
+ ("ipip", "", "local 192.0.2.1", True),
+ ("ip6gre", "", "local 2001:db8::1", True),
+ ("ip6tnl", "", "local 2001:db8::1", True),
+ ("vti6", "", "local 2001:db8::1", True),
+ ("sit", "", "local 192.0.2.1", True),
+ ("xfrm", "", "if_id 1", True),
+ ]
+
+ with NetNS() as ns1, NetNS() as ns2, NetNS() as ns3:
+ net1, net2, net3 = str(ns1), str(ns2), str(ns3)
+
+ # prepare link netnsid and a dummy link needed by certain drivers
+ ip(f"netns set {net3} {LINK_NETNSID}", ns=str(net2))
+ ip("link add dummy1 type dummy", ns=net3)
+
+ cases = [
+ # source, "netns", "link-netns", expected link-netns
+ (net3, None, None, None, None),
+ (net3, net2, None, None, LINK_NETNSID),
+ (net2, None, net3, LINK_NETNSID, LINK_NETNSID),
+ (net1, net2, net3, LINK_NETNSID, LINK_NETNSID),
+ ]
+
+ for src_net, netns, link_netns, exp1, exp2 in cases:
+ tgt_net = netns or src_net
+ for typ, cargs, targs, fb_dev_net in configs:
+ cmd = "link add"
+ if netns:
+ cmd += f" netns {netns}"
+ if link_netns:
+ cmd += f" link-netns {link_netns}"
+ cmd += f" {cargs} foo type {typ} {targs}"
+ ip(cmd, ns=src_net)
+ if fb_dev_net:
+ ksft_true(validate_link_netns(tgt_net, "foo", exp1),
+ f"{typ} link_netns validation failed")
+ else:
+ ksft_true(validate_link_netns(tgt_net, "foo", exp2),
+ f"{typ} link_netns validation failed")
+ ip(f"link del foo", ns=tgt_net)
+
+
+def test_peer_net() -> None:
+ types = [
+ "vxcan",
+ "netkit",
+ "veth",
+ ]
+
+ with NetNS() as ns1, NetNS() as ns2, NetNS() as ns3, NetNS() as ns4:
+ net1, net2, net3, net4 = str(ns1), str(ns2), str(ns3), str(ns4)
+
+ ip(f"netns set {net3} {LINK_NETNSID}", ns=str(net2))
+
+ cases = [
+ # source, "netns", "link-netns", "peer netns", expected
+ (net1, None, None, None, None),
+ (net1, net2, None, None, None),
+ (net2, None, net3, None, LINK_NETNSID),
+ (net1, net2, net3, None, None),
+ (net2, None, None, net3, LINK_NETNSID),
+ (net1, net2, None, net3, LINK_NETNSID),
+ (net2, None, net2, net3, LINK_NETNSID),
+ (net1, net2, net4, net3, LINK_NETNSID),
+ ]
+
+ for src_net, netns, link_netns, peer_netns, exp in cases:
+ tgt_net = netns or src_net
+ for typ in types:
+ cmd = "link add"
+ if netns:
+ cmd += f" netns {netns}"
+ if link_netns:
+ cmd += f" link-netns {link_netns}"
+ cmd += f" foo type {typ}"
+ if peer_netns:
+ cmd += f" peer netns {peer_netns}"
+ ip(cmd, ns=src_net)
+ ksft_true(validate_link_netns(tgt_net, "foo", exp),
+ f"{typ} peer_netns validation failed")
+ ip(f"link del foo", ns=tgt_net)
+
+
+def main() -> None:
+ ksft_run([test_event, test_link_net, test_peer_net])
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh
new file mode 100755
index 000000000000..881eb399798f
--- /dev/null
+++ b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh
@@ -0,0 +1,246 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Author: Justin Iurman <justin.iurman@uliege.be>
+#
+# WARNING
+# -------
+# This is just a dummy script that triggers encap cases with possible dst cache
+# reference loops in affected lwt users (see list below). Some cases are
+# pathological configurations for simplicity, others are valid. Overall, we
+# don't want this issue to happen, no matter what. In order to catch any
+# reference loops, kmemleak MUST be used. The results alone are always blindly
+# successful, don't rely on them. Note that the following tests may crash the
+# kernel if the fix to prevent lwtunnel_{input|output|xmit}() reentry loops is
+# not present.
+#
+# Affected lwt users so far (please update accordingly if needed):
+# - ila_lwt (output only)
+# - ioam6_iptunnel (output only)
+# - rpl_iptunnel (both input and output)
+# - seg6_iptunnel (both input and output)
+
+source lib.sh
+
+check_compatibility()
+{
+ setup_ns tmp_node &>/dev/null
+ if [ $? != 0 ]; then
+ echo "SKIP: Cannot create netns."
+ exit $ksft_skip
+ fi
+
+ ip link add name veth0 netns $tmp_node type veth \
+ peer name veth1 netns $tmp_node &>/dev/null
+ local ret=$?
+
+ ip -netns $tmp_node link set veth0 up &>/dev/null
+ ret=$((ret + $?))
+
+ ip -netns $tmp_node link set veth1 up &>/dev/null
+ ret=$((ret + $?))
+
+ if [ $ret != 0 ]; then
+ echo "SKIP: Cannot configure links."
+ cleanup_ns $tmp_node
+ exit $ksft_skip
+ fi
+
+ lsmod 2>/dev/null | grep -q "ila"
+ ila_lsmod=$?
+ [ $ila_lsmod != 0 ] && modprobe ila &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:1::/64 \
+ encap ila 1:2:3:4 csum-mode no-action ident-type luid \
+ hook-type output \
+ dev veth0 &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:2::/64 \
+ encap ioam6 trace prealloc type 0x800000 ns 0 size 4 \
+ dev veth0 &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:3::/64 \
+ encap rpl segs 2001:db8:3::1 dev veth0 &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:4::/64 \
+ encap seg6 mode inline segs 2001:db8:4::1 dev veth0 &>/dev/null
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ila"
+ skip_ila=$?
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ioam6"
+ skip_ioam6=$?
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap rpl"
+ skip_rpl=$?
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap seg6"
+ skip_seg6=$?
+
+ cleanup_ns $tmp_node
+}
+
+setup()
+{
+ setup_ns alpha beta gamma &>/dev/null
+
+ ip link add name veth-alpha netns $alpha type veth \
+ peer name veth-betaL netns $beta &>/dev/null
+
+ ip link add name veth-betaR netns $beta type veth \
+ peer name veth-gamma netns $gamma &>/dev/null
+
+ ip -netns $alpha link set veth-alpha name veth0 &>/dev/null
+ ip -netns $beta link set veth-betaL name veth0 &>/dev/null
+ ip -netns $beta link set veth-betaR name veth1 &>/dev/null
+ ip -netns $gamma link set veth-gamma name veth0 &>/dev/null
+
+ ip -netns $alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null
+ ip -netns $alpha link set veth0 up &>/dev/null
+ ip -netns $alpha link set lo up &>/dev/null
+ ip -netns $alpha route add 2001:db8:2::/64 \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ ip -netns $beta addr add 2001:db8:1::1/64 dev veth0 &>/dev/null
+ ip -netns $beta addr add 2001:db8:2::1/64 dev veth1 &>/dev/null
+ ip -netns $beta link set veth0 up &>/dev/null
+ ip -netns $beta link set veth1 up &>/dev/null
+ ip -netns $beta link set lo up &>/dev/null
+ ip -netns $beta route del 2001:db8:2::/64
+ ip -netns $beta route add 2001:db8:2::/64 dev veth1
+ ip netns exec $beta \
+ sysctl -wq net.ipv6.conf.all.forwarding=1 &>/dev/null
+
+ ip -netns $gamma addr add 2001:db8:2::2/64 dev veth0 &>/dev/null
+ ip -netns $gamma link set veth0 up &>/dev/null
+ ip -netns $gamma link set lo up &>/dev/null
+ ip -netns $gamma route add 2001:db8:1::/64 \
+ via 2001:db8:2::1 dev veth0 &>/dev/null
+
+ sleep 1
+
+ ip netns exec $alpha ping6 -c 5 -W 1 2001:db8:2::2 &>/dev/null
+ if [ $? != 0 ]; then
+ echo "SKIP: Setup failed."
+ exit $ksft_skip
+ fi
+
+ sleep 1
+}
+
+cleanup()
+{
+ cleanup_ns $alpha $beta $gamma
+ [ $ila_lsmod != 0 ] && modprobe -r ila &>/dev/null
+}
+
+run_ila()
+{
+ if [ $skip_ila != 0 ]; then
+ echo "SKIP: ila (output)"
+ return
+ fi
+
+ ip -netns $beta route del 2001:db8:2::/64
+ ip -netns $beta route add 2001:db8:2:0:0:0:0:2/128 \
+ encap ila 2001:db8:2:0 csum-mode no-action ident-type luid \
+ hook-type output \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: ila (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+
+ ip -netns $beta route del 2001:db8:2:0:0:0:0:2/128
+ ip -netns $beta route add 2001:db8:2::/64 dev veth1
+ sleep 1
+}
+
+run_ioam6()
+{
+ if [ $skip_ioam6 != 0 ]; then
+ echo "SKIP: ioam6 (output)"
+ return
+ fi
+
+ ip -netns $beta route change 2001:db8:2::/64 \
+ encap ioam6 trace prealloc type 0x800000 ns 1 size 4 \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: ioam6 (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+}
+
+run_rpl()
+{
+ if [ $skip_rpl != 0 ]; then
+ echo "SKIP: rpl (input)"
+ echo "SKIP: rpl (output)"
+ return
+ fi
+
+ ip -netns $beta route change 2001:db8:2::/64 \
+ encap rpl segs 2001:db8:2::2 \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: rpl (input)"
+ ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+
+ echo "TEST: rpl (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+}
+
+run_seg6()
+{
+ if [ $skip_seg6 != 0 ]; then
+ echo "SKIP: seg6 (input)"
+ echo "SKIP: seg6 (output)"
+ return
+ fi
+
+ ip -netns $beta route change 2001:db8:2::/64 \
+ encap seg6 mode inline segs 2001:db8:2::2 \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: seg6 (input)"
+ ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+
+ echo "TEST: seg6 (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+}
+
+run()
+{
+ run_ila
+ run_ioam6
+ run_rpl
+ run_seg6
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+ echo "SKIP: Need root privileges."
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool."
+ exit $ksft_skip
+fi
+
+check_compatibility
+
+trap cleanup EXIT
+
+setup
+run
+
+exit $ksft_pass
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 8e3fc05a5397..340e1a777e16 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -2,12 +2,12 @@
top_srcdir = ../../../../..
-CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
simult_flows.sh mptcp_sockopt.sh userspace_pm.sh
-TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq
+TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq mptcp_diag
TEST_FILES := mptcp_lib.sh settings
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 2bd0c1eb70c5..4f55477ffe08 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -200,6 +200,32 @@ chk_msk_cestab()
"${expected}" "${msg}" ""
}
+chk_dump_one()
+{
+ local ss_token
+ local token
+ local msg
+
+ ss_token="$(ss -inmHMN $ns | grep 'token:' |\
+ head -n 1 |\
+ sed 's/.*token:\([0-9a-f]*\).*/\1/')"
+
+ token="$(ip netns exec $ns ./mptcp_diag -t $ss_token |\
+ awk -F':[ \t]+' '/^token/ {print $2}')"
+
+ msg="....chk dump_one"
+
+ mptcp_lib_print_title "$msg"
+ if [ -n "$ss_token" ] && [ "$ss_token" = "$token" ]; then
+ mptcp_lib_pr_ok
+ mptcp_lib_result_pass "${msg}"
+ else
+ mptcp_lib_pr_fail "expected $ss_token found $token"
+ mptcp_lib_result_fail "${msg}"
+ ret=${KSFT_FAIL}
+ fi
+}
+
msk_info_get_value()
{
local port="${1}"
@@ -290,6 +316,7 @@ chk_msk_remote_key_nr 2 "....chk remote_key"
chk_msk_fallback_nr 0 "....chk no fallback"
chk_msk_inuse 2
chk_msk_cestab 2
+chk_dump_one
flush_pids
chk_msk_inuse 0 "2->0"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 414addef9a45..d240d02fa443 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -1302,7 +1302,7 @@ again:
return ret;
if (cfg_truncate > 0) {
- xdisconnect(fd);
+ shutdown(fd, SHUT_WR);
} else if (--cfg_repeat > 0) {
xdisconnect(fd);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c
new file mode 100644
index 000000000000..284286c524cf
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025, Kylin Software */
+
+#include <linux/sock_diag.h>
+#include <linux/rtnetlink.h>
+#include <linux/inet_diag.h>
+#include <linux/netlink.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <linux/tcp.h>
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+
+struct mptcp_info {
+ __u8 mptcpi_subflows;
+ __u8 mptcpi_add_addr_signal;
+ __u8 mptcpi_add_addr_accepted;
+ __u8 mptcpi_subflows_max;
+ __u8 mptcpi_add_addr_signal_max;
+ __u8 mptcpi_add_addr_accepted_max;
+ __u32 mptcpi_flags;
+ __u32 mptcpi_token;
+ __u64 mptcpi_write_seq;
+ __u64 mptcpi_snd_una;
+ __u64 mptcpi_rcv_nxt;
+ __u8 mptcpi_local_addr_used;
+ __u8 mptcpi_local_addr_max;
+ __u8 mptcpi_csum_enabled;
+ __u32 mptcpi_retransmits;
+ __u64 mptcpi_bytes_retrans;
+ __u64 mptcpi_bytes_sent;
+ __u64 mptcpi_bytes_received;
+ __u64 mptcpi_bytes_acked;
+ __u8 mptcpi_subflows_total;
+ __u8 reserved[3];
+ __u32 mptcpi_last_data_sent;
+ __u32 mptcpi_last_data_recv;
+ __u32 mptcpi_last_ack_recv;
+};
+
+static void die_perror(const char *msg)
+{
+ perror(msg);
+ exit(1);
+}
+
+static void die_usage(int r)
+{
+ fprintf(stderr, "Usage: mptcp_diag -t\n");
+ exit(r);
+}
+
+static void send_query(int fd, __u32 token)
+{
+ struct sockaddr_nl nladdr = {
+ .nl_family = AF_NETLINK
+ };
+ struct {
+ struct nlmsghdr nlh;
+ struct inet_diag_req_v2 r;
+ } req = {
+ .nlh = {
+ .nlmsg_len = sizeof(req),
+ .nlmsg_type = SOCK_DIAG_BY_FAMILY,
+ .nlmsg_flags = NLM_F_REQUEST
+ },
+ .r = {
+ .sdiag_family = AF_INET,
+ /* Real proto is set via INET_DIAG_REQ_PROTOCOL */
+ .sdiag_protocol = IPPROTO_TCP,
+ .id.idiag_cookie[0] = token,
+ }
+ };
+ struct rtattr rta_proto;
+ struct iovec iov[6];
+ int iovlen = 1;
+ __u32 proto;
+
+ req.r.idiag_ext |= (1 << (INET_DIAG_INFO - 1));
+ proto = IPPROTO_MPTCP;
+ rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL;
+ rta_proto.rta_len = RTA_LENGTH(sizeof(proto));
+
+ iov[0] = (struct iovec) {
+ .iov_base = &req,
+ .iov_len = sizeof(req)
+ };
+ iov[iovlen] = (struct iovec){ &rta_proto, sizeof(rta_proto)};
+ iov[iovlen + 1] = (struct iovec){ &proto, sizeof(proto)};
+ req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto));
+ iovlen += 2;
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = iov,
+ .msg_iovlen = iovlen
+ };
+
+ for (;;) {
+ if (sendmsg(fd, &msg, 0) < 0) {
+ if (errno == EINTR)
+ continue;
+ die_perror("sendmsg");
+ }
+ break;
+ }
+}
+
+static void parse_rtattr_flags(struct rtattr *tb[], int max, struct rtattr *rta,
+ int len, unsigned short flags)
+{
+ unsigned short type;
+
+ memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+ while (RTA_OK(rta, len)) {
+ type = rta->rta_type & ~flags;
+ if (type <= max && !tb[type])
+ tb[type] = rta;
+ rta = RTA_NEXT(rta, len);
+ }
+}
+
+static void print_info_msg(struct mptcp_info *info)
+{
+ printf("Token & Flags\n");
+ printf("token: %x\n", info->mptcpi_token);
+ printf("flags: %x\n", info->mptcpi_flags);
+ printf("csum_enabled: %u\n", info->mptcpi_csum_enabled);
+
+ printf("\nBasic Info\n");
+ printf("subflows: %u\n", info->mptcpi_subflows);
+ printf("subflows_max: %u\n", info->mptcpi_subflows_max);
+ printf("subflows_total: %u\n", info->mptcpi_subflows_total);
+ printf("local_addr_used: %u\n", info->mptcpi_local_addr_used);
+ printf("local_addr_max: %u\n", info->mptcpi_local_addr_max);
+ printf("add_addr_signal: %u\n", info->mptcpi_add_addr_signal);
+ printf("add_addr_accepted: %u\n", info->mptcpi_add_addr_accepted);
+ printf("add_addr_signal_max: %u\n", info->mptcpi_add_addr_signal_max);
+ printf("add_addr_accepted_max: %u\n", info->mptcpi_add_addr_accepted_max);
+
+ printf("\nTransmission Info\n");
+ printf("write_seq: %llu\n", info->mptcpi_write_seq);
+ printf("snd_una: %llu\n", info->mptcpi_snd_una);
+ printf("rcv_nxt: %llu\n", info->mptcpi_rcv_nxt);
+ printf("last_data_sent: %u\n", info->mptcpi_last_data_sent);
+ printf("last_data_recv: %u\n", info->mptcpi_last_data_recv);
+ printf("last_ack_recv: %u\n", info->mptcpi_last_ack_recv);
+ printf("retransmits: %u\n", info->mptcpi_retransmits);
+ printf("retransmit bytes: %llu\n", info->mptcpi_bytes_retrans);
+ printf("bytes_sent: %llu\n", info->mptcpi_bytes_sent);
+ printf("bytes_received: %llu\n", info->mptcpi_bytes_received);
+ printf("bytes_acked: %llu\n", info->mptcpi_bytes_acked);
+}
+
+static void parse_nlmsg(struct nlmsghdr *nlh)
+{
+ struct inet_diag_msg *r = NLMSG_DATA(nlh);
+ struct rtattr *tb[INET_DIAG_MAX + 1];
+
+ parse_rtattr_flags(tb, INET_DIAG_MAX, (struct rtattr *)(r + 1),
+ nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)),
+ NLA_F_NESTED);
+
+ if (tb[INET_DIAG_INFO]) {
+ int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]);
+ struct mptcp_info *info;
+
+ /* workaround fort older kernels with less fields */
+ if (len < sizeof(*info)) {
+ info = alloca(sizeof(*info));
+ memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len);
+ memset((char *)info + len, 0, sizeof(*info) - len);
+ } else {
+ info = RTA_DATA(tb[INET_DIAG_INFO]);
+ }
+ print_info_msg(info);
+ }
+}
+
+static void recv_nlmsg(int fd, struct nlmsghdr *nlh)
+{
+ char rcv_buff[8192];
+ struct sockaddr_nl rcv_nladdr = {
+ .nl_family = AF_NETLINK
+ };
+ struct iovec rcv_iov = {
+ .iov_base = rcv_buff,
+ .iov_len = sizeof(rcv_buff)
+ };
+ struct msghdr rcv_msg = {
+ .msg_name = &rcv_nladdr,
+ .msg_namelen = sizeof(rcv_nladdr),
+ .msg_iov = &rcv_iov,
+ .msg_iovlen = 1
+ };
+ int len;
+
+ len = recvmsg(fd, &rcv_msg, 0);
+ nlh = (struct nlmsghdr *)rcv_buff;
+
+ while (NLMSG_OK(nlh, len)) {
+ if (nlh->nlmsg_type == NLMSG_DONE) {
+ printf("NLMSG_DONE\n");
+ break;
+ } else if (nlh->nlmsg_type == NLMSG_ERROR) {
+ struct nlmsgerr *err;
+
+ err = (struct nlmsgerr *)NLMSG_DATA(nlh);
+ printf("Error %d:%s\n",
+ -(err->error), strerror(-(err->error)));
+ break;
+ }
+ parse_nlmsg(nlh);
+ nlh = NLMSG_NEXT(nlh, len);
+ }
+}
+
+static void get_mptcpinfo(__u32 token)
+{
+ struct nlmsghdr *nlh = NULL;
+ int fd;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+ if (fd < 0)
+ die_perror("Netlink socket");
+
+ send_query(fd, token);
+ recv_nlmsg(fd, nlh);
+
+ close(fd);
+}
+
+static void parse_opts(int argc, char **argv, __u32 *target_token)
+{
+ int c;
+
+ if (argc < 2)
+ die_usage(1);
+
+ while ((c = getopt(argc, argv, "ht:")) != -1) {
+ switch (c) {
+ case 'h':
+ die_usage(0);
+ break;
+ case 't':
+ sscanf(optarg, "%x", target_token);
+ break;
+ default:
+ die_usage(1);
+ break;
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ __u32 target_token;
+
+ parse_opts(argc, argv, &target_token);
+ get_mptcpinfo(target_token);
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 9c2a415976cb..2329c2f8519b 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -28,7 +28,7 @@ size=0
usage() {
echo "Usage: $0 [ -b ] [ -c ] [ -d ] [ -i]"
- echo -e "\t-b: bail out after first error, otherwise runs al testcases"
+ echo -e "\t-b: bail out after first error, otherwise runs all testcases"
echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
echo -e "\t-d: debug this script"
echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'"
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 3651f73451cf..333064b0b5ac 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -117,7 +117,36 @@ cleanup()
trap cleanup EXIT
# Create and configure network namespaces for testing
+print_title "Init"
mptcp_lib_ns_init ns1 ns2
+
+# check path_manager and pm_type sysctl mapping
+if [ -f /proc/sys/net/mptcp/path_manager ]; then
+ ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=userspace
+ pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)"
+ if [ "${pm_type}" != "1" ]; then
+ test_fail "unexpected pm_type: ${pm_type}"
+ mptcp_lib_result_print_all_tap
+ exit ${KSFT_FAIL}
+ fi
+
+ ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=error 2>/dev/null
+ pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)"
+ if [ "${pm_type}" != "1" ]; then
+ test_fail "unexpected pm_type after error: ${pm_type}"
+ mptcp_lib_result_print_all_tap
+ exit ${KSFT_FAIL}
+ fi
+
+ ip netns exec "$ns1" sysctl -q net.mptcp.pm_type=0
+ pm_name="$(ip netns exec "$ns1" sysctl -n net.mptcp.path_manager)"
+ if [ "${pm_name}" != "kernel" ]; then
+ test_fail "unexpected path-manager: ${pm_name}"
+ mptcp_lib_result_print_all_tap
+ exit ${KSFT_FAIL}
+ fi
+fi
+
for i in "$ns1" "$ns2" ;do
ip netns exec "$i" sysctl -q net.mptcp.pm_type=1
done
@@ -152,7 +181,6 @@ mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid
sleep 0.5
mptcp_lib_subtests_last_ts_reset
-print_title "Init"
print_test "Created network namespaces ns1, ns2"
test_pass
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh
index c28379a965d8..1559ba275105 100755
--- a/tools/testing/selftests/net/netfilter/br_netfilter.sh
+++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh
@@ -13,6 +13,12 @@ source lib.sh
checktool "nft --version" "run test without nft tool"
+read t < /proc/sys/kernel/tainted
+if [ "$t" -ne 0 ];then
+ echo SKIP: kernel is tainted
+ exit $ksft_skip
+fi
+
cleanup() {
cleanup_all_ns
}
@@ -165,6 +171,7 @@ if [ "$t" -eq 0 ];then
echo PASS: kernel not tainted
else
echo ERROR: kernel is tainted
+ dmesg
ret=1
fi
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
index 6a764d70ab06..4788641717d9 100755
--- a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
+++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
@@ -4,6 +4,12 @@ source lib.sh
checktool "nft --version" "run test without nft tool"
+read t < /proc/sys/kernel/tainted
+if [ "$t" -ne 0 ];then
+ echo SKIP: kernel is tainted
+ exit $ksft_skip
+fi
+
cleanup() {
cleanup_all_ns
}
@@ -72,6 +78,7 @@ if [ "$t" -eq 0 ];then
echo PASS: kernel not tainted
else
echo ERROR: kernel is tainted
+ dmesg
exit 1
fi
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index 785e3875a6da..784d1b46912b 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -593,6 +593,7 @@ EOF
echo "PASS: queue program exiting while packets queued"
else
echo "TAINT: queue program exiting while packets queued"
+ dmesg
ret=1
fi
}
diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh
index 6974474c26f3..0be1905d1f2f 100755
--- a/tools/testing/selftests/net/netns-name.sh
+++ b/tools/testing/selftests/net/netns-name.sh
@@ -78,6 +78,16 @@ ip -netns $NS link show dev $ALT_NAME 2> /dev/null &&
fail "Can still find alt-name after move"
ip -netns $test_ns link del $DEV || fail
+#
+# Test no conflict of the same name/ifindex in different netns
+#
+ip -netns $NS link add name $DEV index 100 type dummy || fail
+ip -netns $NS link add netns $test_ns name $DEV index 100 type dummy ||
+ fail "Can create in netns without moving"
+ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found"
+ip -netns $NS link del $DEV || fail
+ip -netns $test_ns link del $DEV || fail
+
echo -ne "$(basename $0) \t\t\t\t"
if [ $RET_CODE -eq 0 ]; then
echo "[ OK ]"
diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py
index 93e8cb671c3d..beaee5e4e2aa 100755
--- a/tools/testing/selftests/net/nl_netdev.py
+++ b/tools/testing/selftests/net/nl_netdev.py
@@ -35,6 +35,21 @@ def napi_list_check(nf) -> None:
comment=f"queue count after reset queue {q} mode {i}")
+def nsim_rxq_reset_down(nf) -> None:
+ """
+ Test that the queue API supports resetting a queue
+ while the interface is down. We should convert this
+ test to testing real HW once more devices support
+ queue API.
+ """
+ with NetdevSimDev(queue_count=4) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} down")
+ for i in [0, 2, 3]:
+ nsim.dfs_write("queue_reset", f"1 {i}")
+
+
def page_pool_check(nf) -> None:
with NetdevSimDev() as nsimdev:
nsim = nsimdev.nsims[0]
@@ -106,7 +121,8 @@ def page_pool_check(nf) -> None:
def main() -> None:
nf = NetdevFamily()
- ksft_run([empty_check, lo_check, page_pool_check, napi_list_check],
+ ksft_run([empty_check, lo_check, page_pool_check, napi_list_check,
+ nsim_rxq_reset_down],
args=(nf, ))
ksft_exit()
diff --git a/tools/testing/selftests/net/openvswitch/Makefile b/tools/testing/selftests/net/openvswitch/Makefile
index 2f1508abc826..3fd1da2ec07d 100644
--- a/tools/testing/selftests/net/openvswitch/Makefile
+++ b/tools/testing/selftests/net/openvswitch/Makefile
@@ -2,7 +2,7 @@
top_srcdir = ../../../../..
-CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
TEST_PROGS := openvswitch.sh
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
index 960e1ab4dd04..3c8d3455d8e7 100755
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -330,6 +330,11 @@ test_psample() {
# - drop packets and verify the right drop reason is reported
test_drop_reason() {
which perf >/dev/null 2>&1 || return $ksft_skip
+ which pahole >/dev/null 2>&1 || return $ksft_skip
+
+ ovs_drop_subsys=$(pahole -C skb_drop_reason_subsys |
+ awk '/OPENVSWITCH/ { print $3; }' |
+ tr -d ,)
sbx_add "test_drop_reason" || return $?
@@ -373,7 +378,7 @@ test_drop_reason() {
"in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,proto=1),icmp()" 'drop'
ovs_drop_record_and_run "test_drop_reason" ip netns exec client ping -c 2 172.31.110.20
- ovs_drop_reason_count 0x30001 # OVS_DROP_FLOW_ACTION
+ ovs_drop_reason_count 0x${ovs_drop_subsys}0001 # OVS_DROP_FLOW_ACTION
if [[ "$?" -ne "2" ]]; then
info "Did not detect expected drops: $?"
return 1
@@ -390,7 +395,7 @@ test_drop_reason() {
ovs_drop_record_and_run \
"test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 6000
- ovs_drop_reason_count 0x30004 # OVS_DROP_EXPLICIT_ACTION_ERROR
+ ovs_drop_reason_count 0x${ovs_drop_subsys}0004 # OVS_DROP_EXPLICIT_ACTION_ERROR
if [[ "$?" -ne "1" ]]; then
info "Did not detect expected explicit error drops: $?"
return 1
@@ -398,7 +403,7 @@ test_drop_reason() {
ovs_drop_record_and_run \
"test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 7000
- ovs_drop_reason_count 0x30003 # OVS_DROP_EXPLICIT_ACTION
+ ovs_drop_reason_count 0x${ovs_drop_subsys}0003 # OVS_DROP_EXPLICIT_ACTION
if [[ "$?" -ne "1" ]]; then
info "Did not detect expected explicit drops: $?"
return 1
diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
index e15c43b7359b..ef8b25a606d8 100755
--- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh
+++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
@@ -39,11 +39,13 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then
# xfail tests that are known flaky with dbg config, not fixable.
# still run them for coverage (and expect 100% pass without dbg).
declare -ar xfail_list=(
+ "tcp_eor_no-coalesce-retrans.pkt"
"tcp_fast_recovery_prr-ss.*.pkt"
+ "tcp_slow_start_slow-start-after-win-update.pkt"
"tcp_timestamping.*.pkt"
"tcp_user_timeout_user-timeout-probe.pkt"
"tcp_zerocopy_epoll_.*.pkt"
- "tcp_tcp_info_tcp-info-*-limited.pkt"
+ "tcp_tcp_info_tcp-info-.*-limited.pkt"
)
readonly xfail_regex="^($(printf '%s|' "${xfail_list[@]}"))$"
[[ "$script" =~ ${xfail_regex} ]] && failfunc=ktap_test_xfail
diff --git a/tools/testing/selftests/net/proc_net_pktgen.c b/tools/testing/selftests/net/proc_net_pktgen.c
new file mode 100644
index 000000000000..69444fb29577
--- /dev/null
+++ b/tools/testing/selftests/net/proc_net_pktgen.c
@@ -0,0 +1,690 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * proc_net_pktgen: kselftest for /proc/net/pktgen interface
+ *
+ * Copyright (c) 2025 Peter Seiderer <ps.report@gmx.net>
+ *
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+static const char ctrl_cmd_stop[] = "stop";
+static const char ctrl_cmd_start[] = "start";
+static const char ctrl_cmd_reset[] = "reset";
+
+static const char wrong_ctrl_cmd[] = "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789";
+
+static const char thr_cmd_add_loopback_0[] = "add_device lo@0";
+static const char thr_cmd_rm_loopback_0[] = "rem_device_all";
+
+static const char wrong_thr_cmd[] = "forsureawrongcommand";
+static const char legacy_thr_cmd[] = "max_before_softirq";
+
+static const char wrong_dev_cmd[] = "forsurewrongcommand";
+static const char dev_cmd_min_pkt_size_0[] = "min_pkt_size";
+static const char dev_cmd_min_pkt_size_1[] = "min_pkt_size ";
+static const char dev_cmd_min_pkt_size_2[] = "min_pkt_size 0";
+static const char dev_cmd_min_pkt_size_3[] = "min_pkt_size 1";
+static const char dev_cmd_min_pkt_size_4[] = "min_pkt_size 100";
+static const char dev_cmd_min_pkt_size_5[] = "min_pkt_size=1001";
+static const char dev_cmd_min_pkt_size_6[] = "min_pkt_size =2002";
+static const char dev_cmd_min_pkt_size_7[] = "min_pkt_size= 3003";
+static const char dev_cmd_min_pkt_size_8[] = "min_pkt_size = 4004";
+static const char dev_cmd_max_pkt_size_0[] = "max_pkt_size 200";
+static const char dev_cmd_pkt_size_0[] = "pkt_size 300";
+static const char dev_cmd_imix_weights_0[] = "imix_weights 0,7 576,4 1500,1";
+static const char dev_cmd_imix_weights_1[] = "imix_weights 101,1 102,2 103,3 104,4 105,5 106,6 107,7 108,8 109,9 110,10 111,11 112,12 113,13 114,14 115,15 116,16 117,17 118,18 119,19 120,20";
+static const char dev_cmd_imix_weights_2[] = "imix_weights 100,1 102,2 103,3 104,4 105,5 106,6 107,7 108,8 109,9 110,10 111,11 112,12 113,13 114,14 115,15 116,16 117,17 118,18 119,19 120,20 121,21";
+static const char dev_cmd_imix_weights_3[] = "imix_weights";
+static const char dev_cmd_imix_weights_4[] = "imix_weights ";
+static const char dev_cmd_imix_weights_5[] = "imix_weights 0";
+static const char dev_cmd_imix_weights_6[] = "imix_weights 0,";
+static const char dev_cmd_debug_0[] = "debug 1";
+static const char dev_cmd_debug_1[] = "debug 0";
+static const char dev_cmd_frags_0[] = "frags 100";
+static const char dev_cmd_delay_0[] = "delay 100";
+static const char dev_cmd_delay_1[] = "delay 2147483647";
+static const char dev_cmd_rate_0[] = "rate 0";
+static const char dev_cmd_rate_1[] = "rate 100";
+static const char dev_cmd_ratep_0[] = "ratep 0";
+static const char dev_cmd_ratep_1[] = "ratep 200";
+static const char dev_cmd_udp_src_min_0[] = "udp_src_min 1";
+static const char dev_cmd_udp_dst_min_0[] = "udp_dst_min 2";
+static const char dev_cmd_udp_src_max_0[] = "udp_src_max 3";
+static const char dev_cmd_udp_dst_max_0[] = "udp_dst_max 4";
+static const char dev_cmd_clone_skb_0[] = "clone_skb 1";
+static const char dev_cmd_clone_skb_1[] = "clone_skb 0";
+static const char dev_cmd_count_0[] = "count 100";
+static const char dev_cmd_src_mac_count_0[] = "src_mac_count 100";
+static const char dev_cmd_dst_mac_count_0[] = "dst_mac_count 100";
+static const char dev_cmd_burst_0[] = "burst 0";
+static const char dev_cmd_node_0[] = "node 100";
+static const char dev_cmd_xmit_mode_0[] = "xmit_mode start_xmit";
+static const char dev_cmd_xmit_mode_1[] = "xmit_mode netif_receive";
+static const char dev_cmd_xmit_mode_2[] = "xmit_mode queue_xmit";
+static const char dev_cmd_xmit_mode_3[] = "xmit_mode nonsense";
+static const char dev_cmd_flag_0[] = "flag UDPCSUM";
+static const char dev_cmd_flag_1[] = "flag !UDPCSUM";
+static const char dev_cmd_flag_2[] = "flag nonsense";
+static const char dev_cmd_dst_min_0[] = "dst_min 101.102.103.104";
+static const char dev_cmd_dst_0[] = "dst 101.102.103.104";
+static const char dev_cmd_dst_max_0[] = "dst_max 201.202.203.204";
+static const char dev_cmd_dst6_0[] = "dst6 2001:db38:1234:0000:0000:0000:0000:0000";
+static const char dev_cmd_dst6_min_0[] = "dst6_min 2001:db8:1234:0000:0000:0000:0000:0000";
+static const char dev_cmd_dst6_max_0[] = "dst6_max 2001:db8:1234:0000:0000:0000:0000:0000";
+static const char dev_cmd_src6_0[] = "src6 2001:db38:1234:0000:0000:0000:0000:0000";
+static const char dev_cmd_src_min_0[] = "src_min 101.102.103.104";
+static const char dev_cmd_src_max_0[] = "src_max 201.202.203.204";
+static const char dev_cmd_dst_mac_0[] = "dst_mac 01:02:03:04:05:06";
+static const char dev_cmd_src_mac_0[] = "src_mac 11:12:13:14:15:16";
+static const char dev_cmd_clear_counters_0[] = "clear_counters";
+static const char dev_cmd_flows_0[] = "flows 100";
+static const char dev_cmd_spi_0[] = "spi 100";
+static const char dev_cmd_flowlen_0[] = "flowlen 100";
+static const char dev_cmd_queue_map_min_0[] = "queue_map_min 1";
+static const char dev_cmd_queue_map_max_0[] = "queue_map_max 2";
+static const char dev_cmd_mpls_0[] = "mpls 00000001";
+static const char dev_cmd_mpls_1[] = "mpls 00000001,000000f2";
+static const char dev_cmd_mpls_2[] = "mpls 00000f00,00000f01,00000f02,00000f03,00000f04,00000f05,00000f06,00000f07,00000f08,00000f09,00000f0a,00000f0b,00000f0c,00000f0d,00000f0e,00000f0f";
+static const char dev_cmd_mpls_3[] = "mpls 00000f00,00000f01,00000f02,00000f03,00000f04,00000f05,00000f06,00000f07,00000f08,00000f09,00000f0a,00000f0b,00000f0c,00000f0d,00000f0e,00000f0f,00000f10";
+static const char dev_cmd_vlan_id_0[] = "vlan_id 1";
+static const char dev_cmd_vlan_p_0[] = "vlan_p 1";
+static const char dev_cmd_vlan_cfi_0[] = "vlan_cfi 1";
+static const char dev_cmd_vlan_id_1[] = "vlan_id 4096";
+static const char dev_cmd_svlan_id_0[] = "svlan_id 1";
+static const char dev_cmd_svlan_p_0[] = "svlan_p 1";
+static const char dev_cmd_svlan_cfi_0[] = "svlan_cfi 1";
+static const char dev_cmd_svlan_id_1[] = "svlan_id 4096";
+static const char dev_cmd_tos_0[] = "tos 0";
+static const char dev_cmd_tos_1[] = "tos 0f";
+static const char dev_cmd_tos_2[] = "tos 0ff";
+static const char dev_cmd_traffic_class_0[] = "traffic_class f0";
+static const char dev_cmd_skb_priority_0[] = "skb_priority 999";
+
+FIXTURE(proc_net_pktgen) {
+ int ctrl_fd;
+ int thr_fd;
+ int dev_fd;
+};
+
+FIXTURE_SETUP(proc_net_pktgen) {
+ int r;
+ ssize_t len;
+
+ r = system("modprobe pktgen");
+ ASSERT_EQ(r, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?");
+
+ self->ctrl_fd = open("/proc/net/pktgen/pgctrl", O_RDWR);
+ ASSERT_GE(self->ctrl_fd, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?");
+
+ self->thr_fd = open("/proc/net/pktgen/kpktgend_0", O_RDWR);
+ ASSERT_GE(self->thr_fd, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?");
+
+ len = write(self->thr_fd, thr_cmd_add_loopback_0, sizeof(thr_cmd_add_loopback_0));
+ ASSERT_EQ(len, sizeof(thr_cmd_add_loopback_0)) TH_LOG("device lo@0 already registered?");
+
+ self->dev_fd = open("/proc/net/pktgen/lo@0", O_RDWR);
+ ASSERT_GE(self->dev_fd, 0) TH_LOG("device entry for lo@0 missing?");
+}
+
+FIXTURE_TEARDOWN(proc_net_pktgen) {
+ int ret;
+ ssize_t len;
+
+ ret = close(self->dev_fd);
+ EXPECT_EQ(ret, 0);
+
+ len = write(self->thr_fd, thr_cmd_rm_loopback_0, sizeof(thr_cmd_rm_loopback_0));
+ EXPECT_EQ(len, sizeof(thr_cmd_rm_loopback_0));
+
+ ret = close(self->thr_fd);
+ EXPECT_EQ(ret, 0);
+
+ ret = close(self->ctrl_fd);
+ EXPECT_EQ(ret, 0);
+}
+
+TEST_F(proc_net_pktgen, wrong_ctrl_cmd) {
+ for (int i = 0; i <= sizeof(wrong_ctrl_cmd); i++) {
+ ssize_t len;
+
+ len = write(self->ctrl_fd, wrong_ctrl_cmd, i);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+ }
+}
+
+TEST_F(proc_net_pktgen, ctrl_cmd) {
+ ssize_t len;
+
+ len = write(self->ctrl_fd, ctrl_cmd_stop, sizeof(ctrl_cmd_stop));
+ EXPECT_EQ(len, sizeof(ctrl_cmd_stop));
+
+ len = write(self->ctrl_fd, ctrl_cmd_stop, sizeof(ctrl_cmd_stop) - 1);
+ EXPECT_EQ(len, sizeof(ctrl_cmd_stop) - 1);
+
+ len = write(self->ctrl_fd, ctrl_cmd_start, sizeof(ctrl_cmd_start));
+ EXPECT_EQ(len, sizeof(ctrl_cmd_start));
+
+ len = write(self->ctrl_fd, ctrl_cmd_start, sizeof(ctrl_cmd_start) - 1);
+ EXPECT_EQ(len, sizeof(ctrl_cmd_start) - 1);
+
+ len = write(self->ctrl_fd, ctrl_cmd_reset, sizeof(ctrl_cmd_reset));
+ EXPECT_EQ(len, sizeof(ctrl_cmd_reset));
+
+ len = write(self->ctrl_fd, ctrl_cmd_reset, sizeof(ctrl_cmd_reset) - 1);
+ EXPECT_EQ(len, sizeof(ctrl_cmd_reset) - 1);
+}
+
+TEST_F(proc_net_pktgen, wrong_thr_cmd) {
+ for (int i = 0; i <= sizeof(wrong_thr_cmd); i++) {
+ ssize_t len;
+
+ len = write(self->thr_fd, wrong_thr_cmd, i);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+ }
+}
+
+TEST_F(proc_net_pktgen, legacy_thr_cmd) {
+ for (int i = 0; i <= sizeof(legacy_thr_cmd); i++) {
+ ssize_t len;
+
+ len = write(self->thr_fd, legacy_thr_cmd, i);
+ if (i < (sizeof(legacy_thr_cmd) - 1)) {
+ /* incomplete command string */
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+ } else {
+ /* complete command string without/with trailing '\0' */
+ EXPECT_EQ(len, i);
+ }
+ }
+}
+
+TEST_F(proc_net_pktgen, wrong_dev_cmd) {
+ for (int i = 0; i <= sizeof(wrong_dev_cmd); i++) {
+ ssize_t len;
+
+ len = write(self->dev_fd, wrong_dev_cmd, i);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+ }
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_min_pkt_size) {
+ ssize_t len;
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_0, sizeof(dev_cmd_min_pkt_size_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_0));
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_0, sizeof(dev_cmd_min_pkt_size_0) - 1);
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_0) - 1);
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_1, sizeof(dev_cmd_min_pkt_size_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_1));
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_1, sizeof(dev_cmd_min_pkt_size_1) - 1);
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_1) - 1);
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_2, sizeof(dev_cmd_min_pkt_size_2));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_2));
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_2, sizeof(dev_cmd_min_pkt_size_2) - 1);
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_2) - 1);
+
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_3, sizeof(dev_cmd_min_pkt_size_3));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_3));
+
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_4, sizeof(dev_cmd_min_pkt_size_4));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_4));
+
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_5, sizeof(dev_cmd_min_pkt_size_5));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_5));
+
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_6, sizeof(dev_cmd_min_pkt_size_6));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_6));
+
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_7, sizeof(dev_cmd_min_pkt_size_7));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_7));
+
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_8, sizeof(dev_cmd_min_pkt_size_8));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_8));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_max_pkt_size) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_max_pkt_size_0, sizeof(dev_cmd_max_pkt_size_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_max_pkt_size_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_pkt_size) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_pkt_size_0, sizeof(dev_cmd_pkt_size_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_pkt_size_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_imix_weights) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_imix_weights_0, sizeof(dev_cmd_imix_weights_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_imix_weights_0));
+
+ len = write(self->dev_fd, dev_cmd_imix_weights_1, sizeof(dev_cmd_imix_weights_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_imix_weights_1));
+
+ len = write(self->dev_fd, dev_cmd_imix_weights_2, sizeof(dev_cmd_imix_weights_2));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, E2BIG);
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_3, sizeof(dev_cmd_imix_weights_3));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_3, sizeof(dev_cmd_imix_weights_3) - 1);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_4, sizeof(dev_cmd_imix_weights_4));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_4, sizeof(dev_cmd_imix_weights_4) - 1);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_5, sizeof(dev_cmd_imix_weights_5));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_5, sizeof(dev_cmd_imix_weights_5) - 1);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_6, sizeof(dev_cmd_imix_weights_6));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_6, sizeof(dev_cmd_imix_weights_6) - 1);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_debug) {
+ ssize_t len;
+
+ /* debug on */
+ len = write(self->dev_fd, dev_cmd_debug_0, sizeof(dev_cmd_debug_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_debug_0));
+
+ /* debug off */
+ len = write(self->dev_fd, dev_cmd_debug_1, sizeof(dev_cmd_debug_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_debug_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_frags) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_frags_0, sizeof(dev_cmd_frags_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_frags_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_delay) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_delay_0, sizeof(dev_cmd_delay_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_delay_0));
+
+ len = write(self->dev_fd, dev_cmd_delay_1, sizeof(dev_cmd_delay_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_delay_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_rate) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_rate_0, sizeof(dev_cmd_rate_0));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ len = write(self->dev_fd, dev_cmd_rate_1, sizeof(dev_cmd_rate_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_rate_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_ratep) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_ratep_0, sizeof(dev_cmd_ratep_0));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ len = write(self->dev_fd, dev_cmd_ratep_1, sizeof(dev_cmd_ratep_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_ratep_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_udp_src_min) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_udp_src_min_0, sizeof(dev_cmd_udp_src_min_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_udp_src_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_udp_dst_min) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_udp_dst_min_0, sizeof(dev_cmd_udp_dst_min_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_udp_dst_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_udp_src_max) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_udp_src_max_0, sizeof(dev_cmd_udp_src_max_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_udp_src_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_udp_dst_max) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_udp_dst_max_0, sizeof(dev_cmd_udp_dst_max_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_udp_dst_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_clone_skb) {
+ ssize_t len;
+
+ /* clone_skb on (gives EOPNOTSUPP on lo device) */
+ len = write(self->dev_fd, dev_cmd_clone_skb_0, sizeof(dev_cmd_clone_skb_0));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EOPNOTSUPP);
+
+ /* clone_skb off */
+ len = write(self->dev_fd, dev_cmd_clone_skb_1, sizeof(dev_cmd_clone_skb_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_clone_skb_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_count) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_count_0, sizeof(dev_cmd_count_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_count_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src_mac_count) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_src_mac_count_0, sizeof(dev_cmd_src_mac_count_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_src_mac_count_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst_mac_count) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst_mac_count_0, sizeof(dev_cmd_dst_mac_count_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst_mac_count_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_burst) {
+ ssize_t len;
+
+ /* burst off */
+ len = write(self->dev_fd, dev_cmd_burst_0, sizeof(dev_cmd_burst_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_burst_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_node) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_node_0, sizeof(dev_cmd_node_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_node_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_xmit_mode) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_xmit_mode_0, sizeof(dev_cmd_xmit_mode_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_0));
+
+ len = write(self->dev_fd, dev_cmd_xmit_mode_1, sizeof(dev_cmd_xmit_mode_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_1));
+
+ len = write(self->dev_fd, dev_cmd_xmit_mode_2, sizeof(dev_cmd_xmit_mode_2));
+ EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_2));
+
+ len = write(self->dev_fd, dev_cmd_xmit_mode_3, sizeof(dev_cmd_xmit_mode_3));
+ EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_3));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_flag) {
+ ssize_t len;
+
+ /* flag UDPCSUM on */
+ len = write(self->dev_fd, dev_cmd_flag_0, sizeof(dev_cmd_flag_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_flag_0));
+
+ /* flag UDPCSUM off */
+ len = write(self->dev_fd, dev_cmd_flag_1, sizeof(dev_cmd_flag_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_flag_1));
+
+ /* flag invalid */
+ len = write(self->dev_fd, dev_cmd_flag_2, sizeof(dev_cmd_flag_2));
+ EXPECT_EQ(len, sizeof(dev_cmd_flag_2));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst_min) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst_min_0, sizeof(dev_cmd_dst_min_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst_0, sizeof(dev_cmd_dst_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst_max) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst_max_0, sizeof(dev_cmd_dst_max_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst6) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst6_0, sizeof(dev_cmd_dst6_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst6_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst6_min) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst6_min_0, sizeof(dev_cmd_dst6_min_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst6_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst6_max) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst6_max_0, sizeof(dev_cmd_dst6_max_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst6_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src6) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_src6_0, sizeof(dev_cmd_src6_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_src6_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src_min) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_src_min_0, sizeof(dev_cmd_src_min_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_src_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src_max) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_src_max_0, sizeof(dev_cmd_src_max_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_src_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst_mac) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst_mac_0, sizeof(dev_cmd_dst_mac_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst_mac_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src_mac) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_src_mac_0, sizeof(dev_cmd_src_mac_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_src_mac_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_clear_counters) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_clear_counters_0, sizeof(dev_cmd_clear_counters_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_clear_counters_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_flows) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_flows_0, sizeof(dev_cmd_flows_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_flows_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_spi) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_spi_0, sizeof(dev_cmd_spi_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_spi_0)) TH_LOG("CONFIG_XFRM not enabled?");
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_flowlen) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_flowlen_0, sizeof(dev_cmd_flowlen_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_flowlen_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_queue_map_min) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_queue_map_min_0, sizeof(dev_cmd_queue_map_min_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_queue_map_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_queue_map_max) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_queue_map_max_0, sizeof(dev_cmd_queue_map_max_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_queue_map_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_mpls) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_mpls_0, sizeof(dev_cmd_mpls_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_mpls_0));
+
+ len = write(self->dev_fd, dev_cmd_mpls_1, sizeof(dev_cmd_mpls_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_mpls_1));
+
+ len = write(self->dev_fd, dev_cmd_mpls_2, sizeof(dev_cmd_mpls_2));
+ EXPECT_EQ(len, sizeof(dev_cmd_mpls_2));
+
+ len = write(self->dev_fd, dev_cmd_mpls_3, sizeof(dev_cmd_mpls_3));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, E2BIG);
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_vlan_id) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_vlan_id_0, sizeof(dev_cmd_vlan_id_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_vlan_id_0));
+
+ len = write(self->dev_fd, dev_cmd_vlan_p_0, sizeof(dev_cmd_vlan_p_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_vlan_p_0));
+
+ len = write(self->dev_fd, dev_cmd_vlan_cfi_0, sizeof(dev_cmd_vlan_cfi_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_vlan_cfi_0));
+
+ len = write(self->dev_fd, dev_cmd_vlan_id_1, sizeof(dev_cmd_vlan_id_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_vlan_id_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_svlan_id) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_svlan_id_0, sizeof(dev_cmd_svlan_id_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_svlan_id_0));
+
+ len = write(self->dev_fd, dev_cmd_svlan_p_0, sizeof(dev_cmd_svlan_p_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_svlan_p_0));
+
+ len = write(self->dev_fd, dev_cmd_svlan_cfi_0, sizeof(dev_cmd_svlan_cfi_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_svlan_cfi_0));
+
+ len = write(self->dev_fd, dev_cmd_svlan_id_1, sizeof(dev_cmd_svlan_id_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_svlan_id_1));
+}
+
+
+TEST_F(proc_net_pktgen, dev_cmd_tos) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_tos_0, sizeof(dev_cmd_tos_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_tos_0));
+
+ len = write(self->dev_fd, dev_cmd_tos_1, sizeof(dev_cmd_tos_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_tos_1));
+
+ len = write(self->dev_fd, dev_cmd_tos_2, sizeof(dev_cmd_tos_2));
+ EXPECT_EQ(len, sizeof(dev_cmd_tos_2));
+}
+
+
+TEST_F(proc_net_pktgen, dev_cmd_traffic_class) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_traffic_class_0, sizeof(dev_cmd_traffic_class_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_traffic_class_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_skb_priority) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_skb_priority_0, sizeof(dev_cmd_skb_priority_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_skb_priority_0));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
index 404a2ce759ab..221270cee3ea 100644
--- a/tools/testing/selftests/net/psock_tpacket.c
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -12,7 +12,7 @@
*
* Datapath:
* Open a pair of packet sockets and send resp. receive an a priori known
- * packet pattern accross the sockets and check if it was received resp.
+ * packet pattern across the sockets and check if it was received resp.
* sent correctly. Fanout in combination with RX_RING is currently not
* tested here.
*
diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
index 066efd30e294..7b9bf8a7bbe1 100644
--- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
+++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
@@ -112,7 +112,7 @@ TEST(reuseaddr_ports_exhausted_reusable_same_euid)
ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind.");
if (opts->reuseport[0] && opts->reuseport[1]) {
- EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened.");
+ EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets successfully listened.");
} else {
EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations.");
}
diff --git a/tools/testing/selftests/net/rtnetlink.py b/tools/testing/selftests/net/rtnetlink.py
new file mode 100755
index 000000000000..80950888800b
--- /dev/null
+++ b/tools/testing/selftests/net/rtnetlink.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_exit, ksft_run, ksft_ge, RtnlAddrFamily
+import socket
+
+IPV4_ALL_HOSTS_MULTICAST = b'\xe0\x00\x00\x01'
+
+def dump_mcaddr_check(rtnl: RtnlAddrFamily) -> None:
+ """
+ Verify that at least one interface has the IPv4 all-hosts multicast address.
+ At least the loopback interface should have this address.
+ """
+
+ addresses = rtnl.getmaddrs({"ifa-family": socket.AF_INET}, dump=True)
+
+ all_host_multicasts = [
+ addr for addr in addresses if addr['ifa-multicast'] == IPV4_ALL_HOSTS_MULTICAST
+ ]
+
+ ksft_ge(len(all_host_multicasts), 1,
+ "No interface found with the IPv4 all-hosts multicast address")
+
+def main() -> None:
+ rtnl = RtnlAddrFamily()
+ ksft_run([dump_mcaddr_check], args=(rtnl, ))
+ ksft_exit()
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh
index 1f78a87f6f37..152bf4c65747 100644
--- a/tools/testing/selftests/net/setup_veth.sh
+++ b/tools/testing/selftests/net/setup_veth.sh
@@ -11,7 +11,8 @@ setup_veth_ns() {
local -r ns_mac="$4"
[[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
- echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
+ echo 200000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
+ echo 1 > "/sys/class/net/${ns_dev}/napi_defer_hard_irqs"
ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535
ip -netns "${ns_name}" link set dev "${ns_dev}" up
diff --git a/tools/testing/selftests/net/so_rcv_listener.c b/tools/testing/selftests/net/so_rcv_listener.c
new file mode 100644
index 000000000000..bc5841192aa6
--- /dev/null
+++ b/tools/testing/selftests/net/so_rcv_listener.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <netdb.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#ifndef SO_RCVPRIORITY
+#define SO_RCVPRIORITY 82
+#endif
+
+struct options {
+ __u32 val;
+ int name;
+ int rcvname;
+ const char *host;
+ const char *service;
+} opt;
+
+static void __attribute__((noreturn)) usage(const char *bin)
+{
+ printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin);
+ printf("Options:\n"
+ "\t\t-M val Test SO_RCVMARK\n"
+ "\t\t-P val Test SO_RCVPRIORITY\n"
+ "");
+ exit(EXIT_FAILURE);
+}
+
+static void parse_args(int argc, char *argv[])
+{
+ int o;
+
+ while ((o = getopt(argc, argv, "M:P:")) != -1) {
+ switch (o) {
+ case 'M':
+ opt.val = atoi(optarg);
+ opt.name = SO_MARK;
+ opt.rcvname = SO_RCVMARK;
+ break;
+ case 'P':
+ opt.val = atoi(optarg);
+ opt.name = SO_PRIORITY;
+ opt.rcvname = SO_RCVPRIORITY;
+ break;
+ default:
+ usage(argv[0]);
+ break;
+ }
+ }
+
+ if (optind != argc - 2)
+ usage(argv[0]);
+
+ opt.host = argv[optind];
+ opt.service = argv[optind + 1];
+}
+
+int main(int argc, char *argv[])
+{
+ int err = 0;
+ int recv_fd = -1;
+ int ret_value = 0;
+ __u32 recv_val;
+ struct cmsghdr *cmsg;
+ char cbuf[CMSG_SPACE(sizeof(__u32))];
+ char recv_buf[CMSG_SPACE(sizeof(__u32))];
+ struct iovec iov[1];
+ struct msghdr msg;
+ struct sockaddr_in recv_addr4;
+ struct sockaddr_in6 recv_addr6;
+
+ parse_args(argc, argv);
+
+ int family = strchr(opt.host, ':') ? AF_INET6 : AF_INET;
+
+ recv_fd = socket(family, SOCK_DGRAM, IPPROTO_UDP);
+ if (recv_fd < 0) {
+ perror("Can't open recv socket");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ err = setsockopt(recv_fd, SOL_SOCKET, opt.rcvname, &opt.val, sizeof(opt.val));
+ if (err < 0) {
+ perror("Recv setsockopt error");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ if (family == AF_INET) {
+ memset(&recv_addr4, 0, sizeof(recv_addr4));
+ recv_addr4.sin_family = family;
+ recv_addr4.sin_port = htons(atoi(opt.service));
+
+ if (inet_pton(family, opt.host, &recv_addr4.sin_addr) <= 0) {
+ perror("Invalid IPV4 address");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ err = bind(recv_fd, (struct sockaddr *)&recv_addr4, sizeof(recv_addr4));
+ } else {
+ memset(&recv_addr6, 0, sizeof(recv_addr6));
+ recv_addr6.sin6_family = family;
+ recv_addr6.sin6_port = htons(atoi(opt.service));
+
+ if (inet_pton(family, opt.host, &recv_addr6.sin6_addr) <= 0) {
+ perror("Invalid IPV6 address");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ err = bind(recv_fd, (struct sockaddr *)&recv_addr6, sizeof(recv_addr6));
+ }
+
+ if (err < 0) {
+ perror("Recv bind error");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ iov[0].iov_base = recv_buf;
+ iov[0].iov_len = sizeof(recv_buf);
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+
+ err = recvmsg(recv_fd, &msg, 0);
+ if (err < 0) {
+ perror("Message receive error");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == opt.name) {
+ recv_val = *(__u32 *)CMSG_DATA(cmsg);
+ printf("Received value: %u\n", recv_val);
+
+ if (recv_val != opt.val) {
+ fprintf(stderr, "Error: expected value: %u, got: %u\n",
+ opt.val, recv_val);
+ ret_value = -EINVAL;
+ }
+ goto cleanup;
+ }
+ }
+
+ fprintf(stderr, "Error: No matching cmsg received\n");
+ ret_value = -ENOMSG;
+
+cleanup:
+ if (recv_fd >= 0)
+ close(recv_fd);
+
+ return ret_value;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c
index d418162d335f..93b61e9a36f1 100644
--- a/tools/testing/selftests/net/tcp_ao/connect-deny.c
+++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c
@@ -4,6 +4,7 @@
#include "aolib.h"
#define fault(type) (inj == FAULT_ ## type)
+static volatile int sk_pair;
static inline int test_add_key_maclen(int sk, const char *key, uint8_t maclen,
union tcp_addr in_addr, uint8_t prefix,
@@ -34,10 +35,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
const char *cnt_name, test_cnt cnt_expected,
fault_t inj)
{
- struct tcp_ao_counters ao_cnt1, ao_cnt2;
+ struct tcp_counters cnt1, cnt2;
uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */
+ test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected;
int lsk, err, sk = 0;
- time_t timeout;
lsk = test_listen_socket(this_ip_addr, port, 1);
@@ -46,21 +47,24 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
if (cnt_name)
before_cnt = netstat_get_one(cnt_name, NULL);
- if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt1))
- test_error("test_get_tcp_ao_counters()");
+ if (pwd && test_get_tcp_counters(lsk, &cnt1))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* preparations done */
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- err = test_wait_fd(lsk, timeout, 0);
+ err = test_skpair_wait_poll(lsk, 0, poll_cnt, &sk_pair);
if (err == -ETIMEDOUT) {
+ sk_pair = err;
if (!fault(TIMEOUT))
- test_fail("timed out for accept()");
+ test_fail("%s: timed out for accept()", tst_name);
+ } else if (err == -EKEYREJECTED) {
+ if (!fault(KEYREJECT))
+ test_fail("%s: key was rejected", tst_name);
} else if (err < 0) {
- test_error("test_wait_fd()");
+ test_error("test_skpair_wait_poll()");
} else {
if (fault(TIMEOUT))
- test_fail("ready to accept");
+ test_fail("%s: ready to accept", tst_name);
sk = accept(lsk, NULL, NULL);
if (sk < 0) {
@@ -72,13 +76,13 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
}
synchronize_threads(); /* before counter checks */
- if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2))
- test_error("test_get_tcp_ao_counters()");
+ if (pwd && test_get_tcp_counters(lsk, &cnt2))
+ test_error("test_get_tcp_counters()");
close(lsk);
if (pwd)
- test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+ test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
if (!cnt_name)
goto out;
@@ -109,7 +113,7 @@ static void *server_fn(void *arg)
try_accept("Non-AO server + AO client", port++, NULL,
this_ip_dest, -1, 100, 100, 0,
- "TCPAOKeyNotFound", 0, FAULT_TIMEOUT);
+ "TCPAOKeyNotFound", TEST_CNT_NS_KEY_NOT_FOUND, FAULT_TIMEOUT);
try_accept("AO server + Non-AO client", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0,
@@ -135,8 +139,9 @@ static void *server_fn(void *arg)
wrong_addr, -1, 100, 100, 0,
"TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT);
+ /* Key rejected by the other side, failing short through skpair */
try_accept("Client: Wrong addr", port++, NULL,
- this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_TIMEOUT);
+ this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_KEYREJECT);
try_accept("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 200, 100, 0,
@@ -163,8 +168,7 @@ static void try_connect(const char *tst_name, unsigned int port,
uint8_t sndid, uint8_t rcvid,
test_cnt cnt_expected, fault_t inj)
{
- struct tcp_ao_counters ao_cnt1, ao_cnt2;
- time_t timeout;
+ struct tcp_counters cnt1, cnt2;
int sk, ret;
sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
@@ -174,16 +178,15 @@ static void try_connect(const char *tst_name, unsigned int port,
if (pwd && test_add_key(sk, pwd, addr, prefix, sndid, rcvid))
test_error("setsockopt(TCP_AO_ADD_KEY)");
- if (pwd && test_get_tcp_ao_counters(sk, &ao_cnt1))
- test_error("test_get_tcp_ao_counters()");
+ if (pwd && test_get_tcp_counters(sk, &cnt1))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* preparations done */
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
-
+ ret = test_skpair_connect_poll(sk, this_ip_dest, port, cnt_expected, &sk_pair);
synchronize_threads(); /* before counter checks */
if (ret < 0) {
+ sk_pair = ret;
if (fault(KEYREJECT) && ret == -EKEYREJECTED) {
test_ok("%s: connect() was prevented", tst_name);
} else if (ret == -ETIMEDOUT && fault(TIMEOUT)) {
@@ -202,9 +205,11 @@ static void try_connect(const char *tst_name, unsigned int port,
else
test_ok("%s: connected", tst_name);
if (pwd && ret > 0) {
- if (test_get_tcp_ao_counters(sk, &ao_cnt2))
- test_error("test_get_tcp_ao_counters()");
- test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
+ test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
+ } else if (pwd) {
+ test_tcp_counters_free(&cnt1);
}
out:
synchronize_threads(); /* close() */
@@ -241,6 +246,11 @@ static void *client_fn(void *arg)
try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ /*
+ * XXX: The test doesn't increase any counters, see tcp_make_synack().
+ * Potentially, it can be speed up by setting sk_pair = -ETIMEDOUT
+ * but the price would be increased complexity of the tracer thread.
+ */
trace_ao_event_sk_expect(TCP_AO_SYNACK_NO_KEY, this_ip_dest, addr_any,
port, 0, 100, 100);
try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD,
diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c
index f1d8d29e393f..340f00e979ea 100644
--- a/tools/testing/selftests/net/tcp_ao/connect.c
+++ b/tools/testing/selftests/net/tcp_ao/connect.c
@@ -35,7 +35,7 @@ static void *client_fn(void *arg)
uint64_t before_aogood, after_aogood;
const size_t nr_packets = 20;
struct netstat *ns_before, *ns_after;
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters ao1, ao2;
if (sk < 0)
test_error("socket()");
@@ -50,18 +50,18 @@ static void *client_fn(void *arg)
ns_before = netstat_read();
before_aogood = netstat_get(ns_before, "TCPAOGood", NULL);
- if (test_get_tcp_ao_counters(sk, &ao1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &ao1))
+ test_error("test_get_tcp_counters()");
- if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+ if (test_client_verify(sk, 100, nr_packets)) {
test_fail("verify failed");
return NULL;
}
ns_after = netstat_read();
after_aogood = netstat_get(ns_after, "TCPAOGood", NULL);
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &ao2))
+ test_error("test_get_tcp_counters()");
netstat_print_diff(ns_before, ns_after);
netstat_free(ns_before);
netstat_free(ns_after);
@@ -71,14 +71,14 @@ static void *client_fn(void *arg)
nr_packets, after_aogood, before_aogood);
return NULL;
}
- if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD))
+ if (test_assert_counters("connect", &ao1, &ao2, TEST_CNT_GOOD))
return NULL;
test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %zu",
- before_aogood, ao1.ao_info_pkt_good,
- ao1.key_cnts[0].pkt_good,
- after_aogood, ao2.ao_info_pkt_good,
- ao2.key_cnts[0].pkt_good,
+ before_aogood, ao1.ao.ao_info_pkt_good,
+ ao1.ao.key_cnts[0].pkt_good,
+ after_aogood, ao2.ao.ao_info_pkt_good,
+ ao2.ao.key_cnts[0].pkt_good,
nr_packets);
return NULL;
}
diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
index a1614f0d8c44..85c1a1e958c6 100644
--- a/tools/testing/selftests/net/tcp_ao/icmps-discard.c
+++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
@@ -53,7 +53,7 @@ static void serve_interfered(int sk)
ssize_t test_quota = packet_size * packets_nr * 10;
uint64_t dest_unreach_a, dest_unreach_b;
uint64_t icmp_ignored_a, icmp_ignored_b;
- struct tcp_ao_counters ao_cnt1, ao_cnt2;
+ struct tcp_counters cnt1, cnt2;
bool counter_not_found;
struct netstat *ns_after, *ns_before;
ssize_t bytes;
@@ -61,16 +61,16 @@ static void serve_interfered(int sk)
ns_before = netstat_read();
dest_unreach_a = netstat_get(ns_before, dst_unreach, NULL);
icmp_ignored_a = netstat_get(ns_before, tcpao_icmps, NULL);
- if (test_get_tcp_ao_counters(sk, &ao_cnt1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt1))
+ test_error("test_get_tcp_counters()");
bytes = test_server_run(sk, test_quota, 0);
ns_after = netstat_read();
netstat_print_diff(ns_before, ns_after);
dest_unreach_b = netstat_get(ns_after, dst_unreach, NULL);
icmp_ignored_b = netstat_get(ns_after, tcpao_icmps,
&counter_not_found);
- if (test_get_tcp_ao_counters(sk, &ao_cnt2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
netstat_free(ns_before);
netstat_free(ns_after);
@@ -91,9 +91,9 @@ static void serve_interfered(int sk)
return;
}
#ifdef TEST_ICMPS_ACCEPT
- test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD);
+ test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD);
#else
- test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP);
+ test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP);
#endif
if (icmp_ignored_a >= icmp_ignored_b) {
test_icmps_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64,
@@ -395,7 +395,6 @@ static void icmp_interfere(const size_t nr, uint32_t rcv_nxt, void *src, void *d
static void send_interfered(int sk)
{
- const unsigned int timeout = TEST_TIMEOUT_SEC;
struct sockaddr_in6 src, dst;
socklen_t addr_sz;
@@ -409,7 +408,7 @@ static void send_interfered(int sk)
while (1) {
uint32_t rcv_nxt;
- if (test_client_verify(sk, packet_size, packets_nr, timeout)) {
+ if (test_client_verify(sk, packet_size, packets_nr)) {
test_fail("client: connection is broken");
return;
}
diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c
index d4385b52c10b..69d9a7a05d5c 100644
--- a/tools/testing/selftests/net/tcp_ao/key-management.c
+++ b/tools/testing/selftests/net/tcp_ao/key-management.c
@@ -629,11 +629,11 @@ static int key_collection_socket(bool server, unsigned int port)
}
static void verify_counters(const char *tst_name, bool is_listen_sk, bool server,
- struct tcp_ao_counters *a, struct tcp_ao_counters *b)
+ struct tcp_counters *a, struct tcp_counters *b)
{
unsigned int i;
- __test_tcp_ao_counters_cmp(tst_name, a, b, TEST_CNT_GOOD);
+ test_assert_counters_sk(tst_name, a, b, TEST_CNT_GOOD);
for (i = 0; i < collection.nr_keys; i++) {
struct test_key *key = &collection.keys[i];
@@ -652,12 +652,12 @@ static void verify_counters(const char *tst_name, bool is_listen_sk, bool server
rx_cnt_expected = key->used_on_server_tx;
}
- test_tcp_ao_key_counters_cmp(tst_name, a, b,
- rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0,
- sndid, rcvid);
+ test_assert_counters_key(tst_name, &a->ao, &b->ao,
+ rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0,
+ sndid, rcvid);
}
- test_tcp_ao_counters_free(a);
- test_tcp_ao_counters_free(b);
+ test_tcp_counters_free(a);
+ test_tcp_counters_free(b);
test_ok("%s: passed counters checks", tst_name);
}
@@ -791,17 +791,17 @@ out:
}
static int start_server(const char *tst_name, unsigned int port, size_t quota,
- struct tcp_ao_counters *begin,
+ struct tcp_counters *begin,
unsigned int current_index, unsigned int rnext_index)
{
- struct tcp_ao_counters lsk_c1, lsk_c2;
+ struct tcp_counters lsk_c1, lsk_c2;
ssize_t bytes;
int sk, lsk;
synchronize_threads(); /* 1: key collection initialized */
lsk = key_collection_socket(true, port);
- if (test_get_tcp_ao_counters(lsk, &lsk_c1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(lsk, &lsk_c1))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* 2: MKTs added => connect() */
if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
test_error("test_wait_fd()");
@@ -809,12 +809,12 @@ static int start_server(const char *tst_name, unsigned int port, size_t quota,
sk = accept(lsk, NULL, NULL);
if (sk < 0)
test_error("accept()");
- if (test_get_tcp_ao_counters(sk, begin))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, begin))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* 3: accepted => send data */
- if (test_get_tcp_ao_counters(lsk, &lsk_c2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(lsk, &lsk_c2))
+ test_error("test_get_tcp_counters()");
verify_keys(tst_name, lsk, true, true);
close(lsk);
@@ -830,12 +830,12 @@ static int start_server(const char *tst_name, unsigned int port, size_t quota,
}
static void end_server(const char *tst_name, int sk,
- struct tcp_ao_counters *begin)
+ struct tcp_counters *begin)
{
- struct tcp_ao_counters end;
+ struct tcp_counters end;
- if (test_get_tcp_ao_counters(sk, &end))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &end))
+ test_error("test_get_tcp_counters()");
verify_keys(tst_name, sk, false, true);
synchronize_threads(); /* 4: verified => closed */
@@ -848,7 +848,7 @@ static void end_server(const char *tst_name, int sk,
static void try_server_run(const char *tst_name, unsigned int port, size_t quota,
unsigned int current_index, unsigned int rnext_index)
{
- struct tcp_ao_counters tmp;
+ struct tcp_counters tmp;
int sk;
sk = start_server(tst_name, port, quota, &tmp,
@@ -860,7 +860,7 @@ static void server_rotations(const char *tst_name, unsigned int port,
size_t quota, unsigned int rotations,
unsigned int current_index, unsigned int rnext_index)
{
- struct tcp_ao_counters tmp;
+ struct tcp_counters tmp;
unsigned int i;
int sk;
@@ -886,7 +886,7 @@ static void server_rotations(const char *tst_name, unsigned int port,
static int run_client(const char *tst_name, unsigned int port,
unsigned int nr_keys, int current_index, int rnext_index,
- struct tcp_ao_counters *before,
+ struct tcp_counters *before,
const size_t msg_sz, const size_t msg_nr)
{
int sk;
@@ -904,8 +904,8 @@ static int run_client(const char *tst_name, unsigned int port,
if (test_set_key(sk, sndid, rcvid))
test_error("failed to set current/rnext keys");
}
- if (before && test_get_tcp_ao_counters(sk, before))
- test_error("test_get_tcp_ao_counters()");
+ if (before && test_get_tcp_counters(sk, before))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* 2: MKTs added => connect() */
if (test_connect_socket(sk, this_ip_dest, port++) <= 0)
@@ -918,11 +918,11 @@ static int run_client(const char *tst_name, unsigned int port,
collection.keys[rnext_index].used_on_server_tx = 1;
synchronize_threads(); /* 3: accepted => send data */
- if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) {
+ if (test_client_verify(sk, msg_sz, msg_nr)) {
test_fail("verify failed");
close(sk);
if (before)
- test_tcp_ao_counters_free(before);
+ test_tcp_counters_free(before);
return -1;
}
@@ -931,7 +931,7 @@ static int run_client(const char *tst_name, unsigned int port,
static int start_client(const char *tst_name, unsigned int port,
unsigned int nr_keys, int current_index, int rnext_index,
- struct tcp_ao_counters *before,
+ struct tcp_counters *before,
const size_t msg_sz, const size_t msg_nr)
{
if (init_default_key_collection(nr_keys, true))
@@ -943,9 +943,9 @@ static int start_client(const char *tst_name, unsigned int port,
static void end_client(const char *tst_name, int sk, unsigned int nr_keys,
int current_index, int rnext_index,
- struct tcp_ao_counters *start)
+ struct tcp_counters *start)
{
- struct tcp_ao_counters end;
+ struct tcp_counters end;
/* Some application may become dependent on this kernel choice */
if (current_index < 0)
@@ -955,8 +955,8 @@ static void end_client(const char *tst_name, int sk, unsigned int nr_keys,
verify_current_rnext(tst_name, sk,
collection.keys[current_index].client_keyid,
collection.keys[rnext_index].server_keyid);
- if (start && test_get_tcp_ao_counters(sk, &end))
- test_error("test_get_tcp_ao_counters()");
+ if (start && test_get_tcp_counters(sk, &end))
+ test_error("test_get_tcp_counters()");
verify_keys(tst_name, sk, false, false);
synchronize_threads(); /* 4: verify => closed */
close(sk);
@@ -1016,7 +1016,7 @@ static void try_unmatched_keys(int sk, int *rnext_index, unsigned int port)
trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_addr, this_ip_dest,
-1, port, 0, -1, -1, -1, -1, -1,
-1, key->server_keyid, -1);
- if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+ if (test_client_verify(sk, msg_len, nr_packets))
test_fail("verify failed");
*rnext_index = i;
}
@@ -1048,7 +1048,7 @@ static void check_current_back(const char *tst_name, unsigned int port,
unsigned int current_index, unsigned int rnext_index,
unsigned int rotate_to_index)
{
- struct tcp_ao_counters tmp;
+ struct tcp_counters tmp;
int sk;
sk = start_client(tst_name, port, nr_keys, current_index, rnext_index,
@@ -1061,7 +1061,7 @@ static void check_current_back(const char *tst_name, unsigned int port,
port, -1, 0, -1, -1, -1, -1, -1,
collection.keys[rotate_to_index].client_keyid,
collection.keys[current_index].client_keyid, -1);
- if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+ if (test_client_verify(sk, msg_len, nr_packets))
test_fail("verify failed");
/* There is a race here: between setting the current_key with
* setsockopt(TCP_AO_INFO) and starting to send some data - there
@@ -1081,7 +1081,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port,
unsigned int nr_keys, unsigned int rotations,
unsigned int current_index, unsigned int rnext_index)
{
- struct tcp_ao_counters tmp;
+ struct tcp_counters tmp;
unsigned int i;
int sk;
@@ -1099,10 +1099,10 @@ static void roll_over_keys(const char *tst_name, unsigned int port,
collection.keys[i].server_keyid, -1);
if (test_set_key(sk, -1, collection.keys[i].server_keyid))
test_error("Can't change the Rnext key");
- if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) {
+ if (test_client_verify(sk, msg_len, nr_packets)) {
test_fail("verify failed");
close(sk);
- test_tcp_ao_counters_free(&tmp);
+ test_tcp_counters_free(&tmp);
return;
}
verify_current_rnext(tst_name, sk, -1,
@@ -1116,7 +1116,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port,
static void try_client_run(const char *tst_name, unsigned int port,
unsigned int nr_keys, int current_index, int rnext_index)
{
- struct tcp_ao_counters tmp;
+ struct tcp_counters tmp;
int sk;
sk = start_client(tst_name, port, nr_keys, current_index, rnext_index,
diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
index 5db2f65cddc4..ebb2899c12fe 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/aolib.h
+++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
@@ -289,7 +289,7 @@ extern int link_set_up(const char *intf);
extern const unsigned int test_server_port;
extern int test_wait_fd(int sk, time_t sec, bool write);
extern int __test_connect_socket(int sk, const char *device,
- void *addr, size_t addr_sz, time_t timeout);
+ void *addr, size_t addr_sz, bool async);
extern int __test_listen_socket(int backlog, void *addr, size_t addr_sz);
static inline int test_listen_socket(const union tcp_addr taddr,
@@ -331,25 +331,26 @@ static inline int test_listen_socket(const union tcp_addr taddr,
* If set to 0 - kernel will try to retransmit SYN number of times, set in
* /proc/sys/net/ipv4/tcp_syn_retries
* By default set to 1 to make tests pass faster on non-busy machine.
+ * [in process of removal, don't use in new tests]
*/
#ifndef TEST_RETRANSMIT_SEC
#define TEST_RETRANSMIT_SEC 1
#endif
static inline int _test_connect_socket(int sk, const union tcp_addr taddr,
- unsigned int port, time_t timeout)
+ unsigned int port, bool async)
{
sockaddr_af addr;
tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port));
return __test_connect_socket(sk, veth_name,
- (void *)&addr, sizeof(addr), timeout);
+ (void *)&addr, sizeof(addr), async);
}
static inline int test_connect_socket(int sk, const union tcp_addr taddr,
unsigned int port)
{
- return _test_connect_socket(sk, taddr, port, TEST_TIMEOUT_SEC);
+ return _test_connect_socket(sk, taddr, port, false);
}
extern int __test_set_md5(int sk, void *addr, size_t addr_sz,
@@ -483,10 +484,7 @@ static inline int test_set_ao_flags(int sk, bool ao_required, bool accept_icmps)
}
extern ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec);
-extern ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
- const size_t msg_len, time_t timeout_sec);
-extern int test_client_verify(int sk, const size_t msg_len, const size_t nr,
- time_t timeout_sec);
+extern int test_client_verify(int sk, const size_t msg_len, const size_t nr);
struct tcp_ao_key_counters {
uint8_t sndid;
@@ -512,7 +510,15 @@ struct tcp_ao_counters {
size_t nr_keys;
struct tcp_ao_key_counters *key_cnts;
};
-extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out);
+
+struct tcp_counters {
+ struct tcp_ao_counters ao;
+ uint64_t netns_md5_notfound;
+ uint64_t netns_md5_unexpected;
+ uint64_t netns_md5_failure;
+};
+
+extern int test_get_tcp_counters(int sk, struct tcp_counters *out);
#define TEST_CNT_KEY_GOOD BIT(0)
#define TEST_CNT_KEY_BAD BIT(1)
@@ -526,8 +532,31 @@ extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out);
#define TEST_CNT_NS_KEY_NOT_FOUND BIT(9)
#define TEST_CNT_NS_AO_REQUIRED BIT(10)
#define TEST_CNT_NS_DROPPED_ICMP BIT(11)
+#define TEST_CNT_NS_MD5_NOT_FOUND BIT(12)
+#define TEST_CNT_NS_MD5_UNEXPECTED BIT(13)
+#define TEST_CNT_NS_MD5_FAILURE BIT(14)
typedef uint16_t test_cnt;
+#define _for_each_counter(f) \
+do { \
+ /* per-netns */ \
+ f(ao.netns_ao_good, TEST_CNT_NS_GOOD); \
+ f(ao.netns_ao_bad, TEST_CNT_NS_BAD); \
+ f(ao.netns_ao_key_not_found, TEST_CNT_NS_KEY_NOT_FOUND); \
+ f(ao.netns_ao_required, TEST_CNT_NS_AO_REQUIRED); \
+ f(ao.netns_ao_dropped_icmp, TEST_CNT_NS_DROPPED_ICMP); \
+ /* per-socket */ \
+ f(ao.ao_info_pkt_good, TEST_CNT_SOCK_GOOD); \
+ f(ao.ao_info_pkt_bad, TEST_CNT_SOCK_BAD); \
+ f(ao.ao_info_pkt_key_not_found, TEST_CNT_SOCK_KEY_NOT_FOUND); \
+ f(ao.ao_info_pkt_ao_required, TEST_CNT_SOCK_AO_REQUIRED); \
+ f(ao.ao_info_pkt_dropped_icmp, TEST_CNT_SOCK_DROPPED_ICMP); \
+ /* non-AO */ \
+ f(netns_md5_notfound, TEST_CNT_NS_MD5_NOT_FOUND); \
+ f(netns_md5_unexpected, TEST_CNT_NS_MD5_UNEXPECTED); \
+ f(netns_md5_failure, TEST_CNT_NS_MD5_FAILURE); \
+} while (0)
+
#define TEST_CNT_AO_GOOD (TEST_CNT_SOCK_GOOD | TEST_CNT_NS_GOOD)
#define TEST_CNT_AO_BAD (TEST_CNT_SOCK_BAD | TEST_CNT_NS_BAD)
#define TEST_CNT_AO_KEY_NOT_FOUND (TEST_CNT_SOCK_KEY_NOT_FOUND | \
@@ -539,34 +568,71 @@ typedef uint16_t test_cnt;
#define TEST_CNT_GOOD (TEST_CNT_KEY_GOOD | TEST_CNT_AO_GOOD)
#define TEST_CNT_BAD (TEST_CNT_KEY_BAD | TEST_CNT_AO_BAD)
-extern int __test_tcp_ao_counters_cmp(const char *tst_name,
- struct tcp_ao_counters *before, struct tcp_ao_counters *after,
+extern test_cnt test_cmp_counters(struct tcp_counters *before,
+ struct tcp_counters *after);
+extern int test_assert_counters_sk(const char *tst_name,
+ struct tcp_counters *before, struct tcp_counters *after,
test_cnt expected);
-extern int test_tcp_ao_key_counters_cmp(const char *tst_name,
+extern int test_assert_counters_key(const char *tst_name,
struct tcp_ao_counters *before, struct tcp_ao_counters *after,
test_cnt expected, int sndid, int rcvid);
-extern void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts);
+extern void test_tcp_counters_free(struct tcp_counters *cnts);
+
+/*
+ * Polling for netns and socket counters during select()/connect() and also
+ * client/server messaging. Instead of constant timeout on underlying select(),
+ * check the counters and return early. This allows to pass the tests where
+ * timeout is expected without waiting for that fixing timeout (tests speed-up).
+ * Previously shorter timeouts were used for tests expecting to time out,
+ * but that leaded to sporadic false positives on counter checks failures,
+ * as one second timeouts aren't enough for TCP retransmit.
+ *
+ * Two sides of the socketpair (client/server) should synchronize failures
+ * using a shared variable *err, so that they can detect the other side's
+ * failure.
+ */
+extern int test_skpair_wait_poll(int sk, bool write, test_cnt cond,
+ volatile int *err);
+extern int _test_skpair_connect_poll(int sk, const char *device,
+ void *addr, size_t addr_sz,
+ test_cnt cond, volatile int *err);
+static inline int test_skpair_connect_poll(int sk, const union tcp_addr taddr,
+ unsigned int port,
+ test_cnt cond, volatile int *err)
+{
+ sockaddr_af addr;
+
+ tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port));
+ return _test_skpair_connect_poll(sk, veth_name,
+ (void *)&addr, sizeof(addr), cond, err);
+}
+
+extern int test_skpair_client(int sk, const size_t msg_len, const size_t nr,
+ test_cnt cond, volatile int *err);
+extern int test_skpair_server(int sk, ssize_t quota,
+ test_cnt cond, volatile int *err);
+
/*
- * Frees buffers allocated in test_get_tcp_ao_counters().
+ * Frees buffers allocated in test_get_tcp_counters().
* The function doesn't expect new keys or keys removed between calls
- * to test_get_tcp_ao_counters(). Check key counters manually if they
+ * to test_get_tcp_counters(). Check key counters manually if they
* may change.
*/
-static inline int test_tcp_ao_counters_cmp(const char *tst_name,
- struct tcp_ao_counters *before,
- struct tcp_ao_counters *after,
- test_cnt expected)
+static inline int test_assert_counters(const char *tst_name,
+ struct tcp_counters *before,
+ struct tcp_counters *after,
+ test_cnt expected)
{
int ret;
- ret = __test_tcp_ao_counters_cmp(tst_name, before, after, expected);
+ ret = test_assert_counters_sk(tst_name, before, after, expected);
if (ret)
goto out;
- ret = test_tcp_ao_key_counters_cmp(tst_name, before, after,
- expected, -1, -1);
+ ret = test_assert_counters_key(tst_name, &before->ao, &after->ao,
+ expected, -1, -1);
out:
- test_tcp_ao_counters_free(before);
- test_tcp_ao_counters_free(after);
+ test_tcp_counters_free(before);
+ test_tcp_counters_free(after);
return ret;
}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c
index 24380c68fec6..27403f875054 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c
@@ -427,11 +427,8 @@ static void dump_trace_event(struct expected_trace_point *e)
test_print("trace event filter %s [%s:%d => %s:%d, L3index %d, flags: %s%s%s%s%s, keyid: %d, rnext: %d, maclen: %d, sne: %d] = %zu",
trace_event_names[e->type],
src, e->src_port, dst, e->dst_port, e->L3index,
- (e->fin > 0) ? "F" : (e->fin == 0) ? "!F" : "",
- (e->syn > 0) ? "S" : (e->syn == 0) ? "!S" : "",
- (e->rst > 0) ? "R" : (e->rst == 0) ? "!R" : "",
- (e->psh > 0) ? "P" : (e->psh == 0) ? "!P" : "",
- (e->ack > 0) ? "." : (e->ack == 0) ? "!." : "",
+ e->fin ? "F" : "", e->syn ? "S" : "", e->rst ? "R" : "",
+ e->psh ? "P" : "", e->ack ? "." : "",
e->keyid, e->rnext, e->maclen, e->sne, e->matched);
}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c
index 0ffda966c677..ef8e9031d47a 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/sock.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c
@@ -34,10 +34,8 @@ int __test_listen_socket(int backlog, void *addr, size_t addr_sz)
return sk;
}
-int test_wait_fd(int sk, time_t sec, bool write)
+static int __test_wait_fd(int sk, struct timeval *tv, bool write)
{
- struct timeval tv = { .tv_sec = sec };
- struct timeval *ptv = NULL;
fd_set fds, efds;
int ret;
socklen_t slen = sizeof(ret);
@@ -47,14 +45,11 @@ int test_wait_fd(int sk, time_t sec, bool write)
FD_ZERO(&efds);
FD_SET(sk, &efds);
- if (sec)
- ptv = &tv;
-
errno = 0;
if (write)
- ret = select(sk + 1, NULL, &fds, &efds, ptv);
+ ret = select(sk + 1, NULL, &fds, &efds, tv);
else
- ret = select(sk + 1, &fds, NULL, &efds, ptv);
+ ret = select(sk + 1, &fds, NULL, &efds, tv);
if (ret < 0)
return -errno;
if (ret == 0) {
@@ -69,8 +64,54 @@ int test_wait_fd(int sk, time_t sec, bool write)
return 0;
}
+int test_wait_fd(int sk, time_t sec, bool write)
+{
+ struct timeval tv = { .tv_sec = sec, };
+
+ return __test_wait_fd(sk, sec ? &tv : NULL, write);
+}
+
+static bool __skpair_poll_should_stop(int sk, struct tcp_counters *c,
+ test_cnt condition)
+{
+ struct tcp_counters c2;
+ test_cnt diff;
+
+ if (test_get_tcp_counters(sk, &c2))
+ test_error("test_get_tcp_counters()");
+
+ diff = test_cmp_counters(c, &c2);
+ test_tcp_counters_free(&c2);
+ return (diff & condition) == condition;
+}
+
+/* How often wake up and check netns counters & paired (*err) */
+#define POLL_USEC 150
+static int __test_skpair_poll(int sk, bool write, uint64_t timeout,
+ struct tcp_counters *c, test_cnt cond,
+ volatile int *err)
+{
+ uint64_t t;
+
+ for (t = 0; t <= timeout * 1000000; t += POLL_USEC) {
+ struct timeval tv = { .tv_usec = POLL_USEC, };
+ int ret;
+
+ ret = __test_wait_fd(sk, &tv, write);
+ if (ret != -ETIMEDOUT)
+ return ret;
+ if (c && cond && __skpair_poll_should_stop(sk, c, cond))
+ break;
+ if (err && *err)
+ return *err;
+ }
+ if (err)
+ *err = -ETIMEDOUT;
+ return -ETIMEDOUT;
+}
+
int __test_connect_socket(int sk, const char *device,
- void *addr, size_t addr_sz, time_t timeout)
+ void *addr, size_t addr_sz, bool async)
{
long flags;
int err;
@@ -82,15 +123,6 @@ int __test_connect_socket(int sk, const char *device,
test_error("setsockopt(SO_BINDTODEVICE, %s)", device);
}
- if (!timeout) {
- err = connect(sk, addr, addr_sz);
- if (err) {
- err = -errno;
- goto out;
- }
- return 0;
- }
-
flags = fcntl(sk, F_GETFL);
if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0))
test_error("fcntl()");
@@ -100,9 +132,9 @@ int __test_connect_socket(int sk, const char *device,
err = -errno;
goto out;
}
- if (timeout < 0)
+ if (async)
return sk;
- err = test_wait_fd(sk, timeout, 1);
+ err = test_wait_fd(sk, TEST_TIMEOUT_SEC, 1);
if (err)
goto out;
}
@@ -113,6 +145,45 @@ out:
return err;
}
+int test_skpair_wait_poll(int sk, bool write,
+ test_cnt cond, volatile int *err)
+{
+ struct tcp_counters c;
+ int ret;
+
+ *err = 0;
+ if (test_get_tcp_counters(sk, &c))
+ test_error("test_get_tcp_counters()");
+ synchronize_threads(); /* 1: init skpair & read nscounters */
+
+ ret = __test_skpair_poll(sk, write, TEST_TIMEOUT_SEC, &c, cond, err);
+ test_tcp_counters_free(&c);
+ return ret;
+}
+
+int _test_skpair_connect_poll(int sk, const char *device,
+ void *addr, size_t addr_sz,
+ test_cnt condition, volatile int *err)
+{
+ struct tcp_counters c;
+ int ret;
+
+ *err = 0;
+ if (test_get_tcp_counters(sk, &c))
+ test_error("test_get_tcp_counters()");
+ synchronize_threads(); /* 1: init skpair & read nscounters */
+ ret = __test_connect_socket(sk, device, addr, addr_sz, true);
+ if (ret < 0) {
+ test_tcp_counters_free(&c);
+ return (*err = ret);
+ }
+ ret = __test_skpair_poll(sk, 1, TEST_TIMEOUT_SEC, &c, condition, err);
+ if (ret < 0)
+ close(sk);
+ test_tcp_counters_free(&c);
+ return ret;
+}
+
int __test_set_md5(int sk, void *addr, size_t addr_sz, uint8_t prefix,
int vrf, const char *password)
{
@@ -333,12 +404,12 @@ do { \
return 0;
}
-int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
+int test_get_tcp_counters(int sk, struct tcp_counters *out)
{
struct tcp_ao_getsockopt *key_dump;
socklen_t key_dump_sz = sizeof(*key_dump);
struct tcp_ao_info_opt info = {};
- bool c1, c2, c3, c4, c5;
+ bool c1, c2, c3, c4, c5, c6, c7, c8;
struct netstat *ns;
int err, nr_keys;
@@ -346,25 +417,30 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
/* per-netns */
ns = netstat_read();
- out->netns_ao_good = netstat_get(ns, "TCPAOGood", &c1);
- out->netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2);
- out->netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3);
- out->netns_ao_required = netstat_get(ns, "TCPAORequired", &c4);
- out->netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5);
+ out->ao.netns_ao_good = netstat_get(ns, "TCPAOGood", &c1);
+ out->ao.netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2);
+ out->ao.netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3);
+ out->ao.netns_ao_required = netstat_get(ns, "TCPAORequired", &c4);
+ out->ao.netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5);
+ out->netns_md5_notfound = netstat_get(ns, "TCPMD5NotFound", &c6);
+ out->netns_md5_unexpected = netstat_get(ns, "TCPMD5Unexpected", &c7);
+ out->netns_md5_failure = netstat_get(ns, "TCPMD5Failure", &c8);
netstat_free(ns);
- if (c1 || c2 || c3 || c4 || c5)
+ if (c1 || c2 || c3 || c4 || c5 || c6 || c7 || c8)
return -EOPNOTSUPP;
err = test_get_ao_info(sk, &info);
+ if (err == -ENOENT)
+ return 0;
if (err)
return err;
/* per-socket */
- out->ao_info_pkt_good = info.pkt_good;
- out->ao_info_pkt_bad = info.pkt_bad;
- out->ao_info_pkt_key_not_found = info.pkt_key_not_found;
- out->ao_info_pkt_ao_required = info.pkt_ao_required;
- out->ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp;
+ out->ao.ao_info_pkt_good = info.pkt_good;
+ out->ao.ao_info_pkt_bad = info.pkt_bad;
+ out->ao.ao_info_pkt_key_not_found = info.pkt_key_not_found;
+ out->ao.ao_info_pkt_ao_required = info.pkt_ao_required;
+ out->ao.ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp;
/* per-key */
nr_keys = test_get_ao_keys_nr(sk);
@@ -372,7 +448,7 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
return nr_keys;
if (nr_keys == 0)
test_error("test_get_ao_keys_nr() == 0");
- out->nr_keys = (size_t)nr_keys;
+ out->ao.nr_keys = (size_t)nr_keys;
key_dump = calloc(nr_keys, key_dump_sz);
if (!key_dump)
return -errno;
@@ -386,72 +462,84 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
return -errno;
}
- out->key_cnts = calloc(nr_keys, sizeof(out->key_cnts[0]));
- if (!out->key_cnts) {
+ out->ao.key_cnts = calloc(nr_keys, sizeof(out->ao.key_cnts[0]));
+ if (!out->ao.key_cnts) {
free(key_dump);
return -errno;
}
while (nr_keys--) {
- out->key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid;
- out->key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid;
- out->key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good;
- out->key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad;
+ out->ao.key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid;
+ out->ao.key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid;
+ out->ao.key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good;
+ out->ao.key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad;
}
free(key_dump);
return 0;
}
-int __test_tcp_ao_counters_cmp(const char *tst_name,
- struct tcp_ao_counters *before,
- struct tcp_ao_counters *after,
- test_cnt expected)
+test_cnt test_cmp_counters(struct tcp_counters *before,
+ struct tcp_counters *after)
+{
+#define __cmp(cnt, e_cnt) \
+do { \
+ if (before->cnt > after->cnt) \
+ test_error("counter " __stringify(cnt) " decreased"); \
+ if (before->cnt != after->cnt) \
+ ret |= e_cnt; \
+} while (0)
+
+ test_cnt ret = 0;
+ size_t i;
+
+ if (before->ao.nr_keys != after->ao.nr_keys)
+ test_error("the number of keys has changed");
+
+ _for_each_counter(__cmp);
+
+ i = before->ao.nr_keys;
+ while (i--) {
+ __cmp(ao.key_cnts[i].pkt_good, TEST_CNT_KEY_GOOD);
+ __cmp(ao.key_cnts[i].pkt_bad, TEST_CNT_KEY_BAD);
+ }
+#undef __cmp
+ return ret;
+}
+
+int test_assert_counters_sk(const char *tst_name,
+ struct tcp_counters *before,
+ struct tcp_counters *after,
+ test_cnt expected)
{
-#define __cmp_ao(cnt, expecting_inc) \
+#define __cmp_ao(cnt, e_cnt) \
do { \
if (before->cnt > after->cnt) { \
test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64, \
- tst_name ?: "", before->cnt, after->cnt); \
+ tst_name ?: "", before->cnt, after->cnt); \
return -1; \
} \
- if ((before->cnt != after->cnt) != (expecting_inc)) { \
+ if ((before->cnt != after->cnt) != !!(expected & e_cnt)) { \
test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64, \
- tst_name ?: "", (expecting_inc) ? "" : "not ", \
+ tst_name ?: "", (expected & e_cnt) ? "" : "not ", \
before->cnt, after->cnt); \
return -1; \
} \
-} while(0)
+} while (0)
errno = 0;
- /* per-netns */
- __cmp_ao(netns_ao_good, !!(expected & TEST_CNT_NS_GOOD));
- __cmp_ao(netns_ao_bad, !!(expected & TEST_CNT_NS_BAD));
- __cmp_ao(netns_ao_key_not_found,
- !!(expected & TEST_CNT_NS_KEY_NOT_FOUND));
- __cmp_ao(netns_ao_required, !!(expected & TEST_CNT_NS_AO_REQUIRED));
- __cmp_ao(netns_ao_dropped_icmp,
- !!(expected & TEST_CNT_NS_DROPPED_ICMP));
- /* per-socket */
- __cmp_ao(ao_info_pkt_good, !!(expected & TEST_CNT_SOCK_GOOD));
- __cmp_ao(ao_info_pkt_bad, !!(expected & TEST_CNT_SOCK_BAD));
- __cmp_ao(ao_info_pkt_key_not_found,
- !!(expected & TEST_CNT_SOCK_KEY_NOT_FOUND));
- __cmp_ao(ao_info_pkt_ao_required, !!(expected & TEST_CNT_SOCK_AO_REQUIRED));
- __cmp_ao(ao_info_pkt_dropped_icmp,
- !!(expected & TEST_CNT_SOCK_DROPPED_ICMP));
+ _for_each_counter(__cmp_ao);
return 0;
#undef __cmp_ao
}
-int test_tcp_ao_key_counters_cmp(const char *tst_name,
- struct tcp_ao_counters *before,
- struct tcp_ao_counters *after,
- test_cnt expected,
- int sndid, int rcvid)
+int test_assert_counters_key(const char *tst_name,
+ struct tcp_ao_counters *before,
+ struct tcp_ao_counters *after,
+ test_cnt expected, int sndid, int rcvid)
{
size_t i;
-#define __cmp_ao(i, cnt, expecting_inc) \
+#define __cmp_ao(i, cnt, e_cnt) \
do { \
if (before->key_cnts[i].cnt > after->key_cnts[i].cnt) { \
test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64 " for key %u:%u", \
@@ -461,16 +549,16 @@ do { \
before->key_cnts[i].rcvid); \
return -1; \
} \
- if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != (expecting_inc)) { \
+ if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != !!(expected & e_cnt)) { \
test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64 " for key %u:%u", \
- tst_name ?: "", (expecting_inc) ? "" : "not ",\
+ tst_name ?: "", (expected & e_cnt) ? "" : "not ",\
before->key_cnts[i].cnt, \
after->key_cnts[i].cnt, \
before->key_cnts[i].sndid, \
before->key_cnts[i].rcvid); \
return -1; \
} \
-} while(0)
+} while (0)
if (before->nr_keys != after->nr_keys) {
test_fail("%s: Keys changed on the socket %zu != %zu",
@@ -485,20 +573,22 @@ do { \
continue;
if (rcvid >= 0 && before->key_cnts[i].rcvid != rcvid)
continue;
- __cmp_ao(i, pkt_good, !!(expected & TEST_CNT_KEY_GOOD));
- __cmp_ao(i, pkt_bad, !!(expected & TEST_CNT_KEY_BAD));
+ __cmp_ao(i, pkt_good, TEST_CNT_KEY_GOOD);
+ __cmp_ao(i, pkt_bad, TEST_CNT_KEY_BAD);
}
return 0;
#undef __cmp_ao
}
-void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts)
+void test_tcp_counters_free(struct tcp_counters *cnts)
{
- free(cnts->key_cnts);
+ free(cnts->ao.key_cnts);
}
#define TEST_BUF_SIZE 4096
-ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
+static ssize_t _test_server_run(int sk, ssize_t quota, struct tcp_counters *c,
+ test_cnt cond, volatile int *err,
+ time_t timeout_sec)
{
ssize_t total = 0;
@@ -507,7 +597,7 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
ssize_t bytes, sent;
int ret;
- ret = test_wait_fd(sk, timeout_sec, 0);
+ ret = __test_skpair_poll(sk, 0, timeout_sec, c, cond, err);
if (ret)
return ret;
@@ -518,7 +608,7 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
if (bytes == 0)
break;
- ret = test_wait_fd(sk, timeout_sec, 1);
+ ret = __test_skpair_poll(sk, 1, timeout_sec, c, cond, err);
if (ret)
return ret;
@@ -533,13 +623,41 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
return total;
}
-ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
- const size_t msg_len, time_t timeout_sec)
+ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
+{
+ return _test_server_run(sk, quota, NULL, 0, NULL,
+ timeout_sec ?: TEST_TIMEOUT_SEC);
+}
+
+int test_skpair_server(int sk, ssize_t quota, test_cnt cond, volatile int *err)
+{
+ struct tcp_counters c;
+ ssize_t ret;
+
+ *err = 0;
+ if (test_get_tcp_counters(sk, &c))
+ test_error("test_get_tcp_counters()");
+ synchronize_threads(); /* 1: init skpair & read nscounters */
+
+ ret = _test_server_run(sk, quota, &c, cond, err, TEST_TIMEOUT_SEC);
+ test_tcp_counters_free(&c);
+ return ret;
+}
+
+static ssize_t test_client_loop(int sk, size_t buf_sz, const size_t msg_len,
+ struct tcp_counters *c, test_cnt cond,
+ volatile int *err)
{
char msg[msg_len];
int nodelay = 1;
+ char *buf;
size_t i;
+ buf = alloca(buf_sz);
+ if (!buf)
+ return -ENOMEM;
+ randomize_buffer(buf, buf_sz);
+
if (setsockopt(sk, IPPROTO_TCP, TCP_NODELAY, &nodelay, sizeof(nodelay)))
test_error("setsockopt(TCP_NODELAY)");
@@ -547,7 +665,7 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
size_t sent, bytes = min(msg_len, buf_sz - i);
int ret;
- ret = test_wait_fd(sk, timeout_sec, 1);
+ ret = __test_skpair_poll(sk, 1, TEST_TIMEOUT_SEC, c, cond, err);
if (ret)
return ret;
@@ -561,7 +679,8 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
do {
ssize_t got;
- ret = test_wait_fd(sk, timeout_sec, 0);
+ ret = __test_skpair_poll(sk, 0, TEST_TIMEOUT_SEC,
+ c, cond, err);
if (ret)
return ret;
@@ -580,15 +699,31 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
return i;
}
-int test_client_verify(int sk, const size_t msg_len, const size_t nr,
- time_t timeout_sec)
+int test_client_verify(int sk, const size_t msg_len, const size_t nr)
{
size_t buf_sz = msg_len * nr;
- char *buf = alloca(buf_sz);
ssize_t ret;
- randomize_buffer(buf, buf_sz);
- ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec);
+ ret = test_client_loop(sk, buf_sz, msg_len, NULL, 0, NULL);
+ if (ret < 0)
+ return (int)ret;
+ return ret != buf_sz ? -1 : 0;
+}
+
+int test_skpair_client(int sk, const size_t msg_len, const size_t nr,
+ test_cnt cond, volatile int *err)
+{
+ struct tcp_counters c;
+ size_t buf_sz = msg_len * nr;
+ ssize_t ret;
+
+ *err = 0;
+ if (test_get_tcp_counters(sk, &c))
+ test_error("test_get_tcp_counters()");
+ synchronize_threads(); /* 1: init skpair & read nscounters */
+
+ ret = test_client_loop(sk, buf_sz, msg_len, &c, cond, err);
+ test_tcp_counters_free(&c);
if (ret < 0)
return (int)ret;
return ret != buf_sz ? -1 : 0;
diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c
index ecc6f1e3a414..9a059b6c4523 100644
--- a/tools/testing/selftests/net/tcp_ao/restore.c
+++ b/tools/testing/selftests/net/tcp_ao/restore.c
@@ -16,11 +16,11 @@ const size_t quota = nr_packets * msg_len;
static void try_server_run(const char *tst_name, unsigned int port,
fault_t inj, test_cnt cnt_expected)
{
+ test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected;
const char *cnt_name = "TCPAOGood";
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters cnt1, cnt2;
uint64_t before_cnt, after_cnt;
- int sk, lsk;
- time_t timeout;
+ int sk, lsk, dummy;
ssize_t bytes;
if (fault(TIMEOUT))
@@ -48,11 +48,10 @@ static void try_server_run(const char *tst_name, unsigned int port,
}
before_cnt = netstat_get_one(cnt_name, NULL);
- if (test_get_tcp_ao_counters(sk, &ao1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt1))
+ test_error("test_get_tcp_counters()");
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- bytes = test_server_run(sk, quota, timeout);
+ bytes = test_skpair_server(sk, quota, poll_cnt, &dummy);
if (fault(TIMEOUT)) {
if (bytes > 0)
test_fail("%s: server served: %zd", tst_name, bytes);
@@ -65,17 +64,17 @@ static void try_server_run(const char *tst_name, unsigned int port,
test_ok("%s: server alive", tst_name);
}
synchronize_threads(); /* 3: counters checks */
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
after_cnt = netstat_get_one(cnt_name, NULL);
- test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
+ test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
if (after_cnt <= before_cnt) {
- test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
- tst_name, cnt_name, after_cnt, before_cnt);
+ test_fail("%s(server): %s counter did not increase: %" PRIu64 " <= %" PRIu64,
+ tst_name, cnt_name, after_cnt, before_cnt);
} else {
- test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64,
+ test_ok("%s(server): counter %s increased %" PRIu64 " => %" PRIu64,
tst_name, cnt_name, before_cnt, after_cnt);
}
@@ -92,16 +91,16 @@ static void *server_fn(void *arg)
{
unsigned int port = test_server_port;
- try_server_run("TCP-AO migrate to another socket", port++,
+ try_server_run("TCP-AO migrate to another socket (server)", port++,
0, TEST_CNT_GOOD);
- try_server_run("TCP-AO with wrong send ISN", port++,
+ try_server_run("TCP-AO with wrong send ISN (server)", port++,
FAULT_TIMEOUT, TEST_CNT_BAD);
- try_server_run("TCP-AO with wrong receive ISN", port++,
+ try_server_run("TCP-AO with wrong receive ISN (server)", port++,
FAULT_TIMEOUT, TEST_CNT_BAD);
- try_server_run("TCP-AO with wrong send SEQ ext number", port++,
+ try_server_run("TCP-AO with wrong send SEQ ext number (server)", port++,
FAULT_TIMEOUT, TEST_CNT_BAD);
- try_server_run("TCP-AO with wrong receive SEQ ext number", port++,
- FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD);
+ try_server_run("TCP-AO with wrong receive SEQ ext number (server)",
+ port++, FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD);
synchronize_threads(); /* don't race to exit: client exits */
return NULL;
@@ -125,7 +124,7 @@ static void test_get_sk_checkpoint(unsigned int server_port, sockaddr_af *saddr,
test_error("failed to connect()");
synchronize_threads(); /* 2: accepted => send data */
- if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+ if (test_client_verify(sk, msg_len, nr_packets))
test_fail("pre-migrate verify failed");
test_enable_repair(sk);
@@ -139,11 +138,11 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port,
struct tcp_ao_repair *ao_img,
fault_t inj, test_cnt cnt_expected)
{
+ test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected;
const char *cnt_name = "TCPAOGood";
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters cnt1, cnt2;
uint64_t before_cnt, after_cnt;
- time_t timeout;
- int sk;
+ int sk, dummy;
if (fault(TIMEOUT))
cnt_name = "TCPAOBad";
@@ -159,30 +158,30 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port,
test_error("setsockopt(TCP_AO_ADD_KEY)");
test_ao_restore(sk, ao_img);
- if (test_get_tcp_ao_counters(sk, &ao1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt1))
+ test_error("test_get_tcp_counters()");
test_disable_repair(sk);
test_sock_state_free(img);
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- if (test_client_verify(sk, msg_len, nr_packets, timeout)) {
+ if (test_skpair_client(sk, msg_len, nr_packets, poll_cnt, &dummy)) {
if (fault(TIMEOUT))
test_ok("%s: post-migrate connection is broken", tst_name);
else
test_fail("%s: post-migrate connection is working", tst_name);
} else {
if (fault(TIMEOUT))
- test_fail("%s: post-migrate connection still working", tst_name);
+ test_fail("%s: post-migrate connection is working", tst_name);
else
test_ok("%s: post-migrate connection is alive", tst_name);
}
+
synchronize_threads(); /* 3: counters checks */
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
after_cnt = netstat_get_one(cnt_name, NULL);
- test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
+ test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
if (after_cnt <= before_cnt) {
test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
@@ -203,7 +202,7 @@ static void *client_fn(void *arg)
sockaddr_af saddr;
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
- test_sk_restore("TCP-AO migrate to another socket", port++,
+ test_sk_restore("TCP-AO migrate to another socket (client)", port++,
&saddr, &tcp_img, &ao_img, 0, TEST_CNT_GOOD);
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
@@ -212,7 +211,7 @@ static void *client_fn(void *arg)
-1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
- test_sk_restore("TCP-AO with wrong send ISN", port++,
+ test_sk_restore("TCP-AO with wrong send ISN (client)", port++,
&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
@@ -221,7 +220,7 @@ static void *client_fn(void *arg)
-1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
- test_sk_restore("TCP-AO with wrong receive ISN", port++,
+ test_sk_restore("TCP-AO with wrong receive ISN (client)", port++,
&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
@@ -229,8 +228,8 @@ static void *client_fn(void *arg)
trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
-1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
/* not expecting server => client mismatches as only snd sne is broken */
- test_sk_restore("TCP-AO with wrong send SEQ ext number", port++,
- &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
+ test_sk_restore("TCP-AO with wrong send SEQ ext number (client)",
+ port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
TEST_CNT_NS_BAD | TEST_CNT_GOOD);
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
@@ -238,8 +237,8 @@ static void *client_fn(void *arg)
/* not expecting client => server mismatches as only rcv sne is broken */
trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
- test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++,
- &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
+ test_sk_restore("TCP-AO with wrong receive SEQ ext number (client)",
+ port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
TEST_CNT_NS_GOOD | TEST_CNT_BAD);
return NULL;
diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c
index 6364facaa63e..883cddf377cf 100644
--- a/tools/testing/selftests/net/tcp_ao/rst.c
+++ b/tools/testing/selftests/net/tcp_ao/rst.c
@@ -84,15 +84,15 @@ static void close_forced(int sk)
static void test_server_active_rst(unsigned int port)
{
- struct tcp_ao_counters cnt1, cnt2;
+ struct tcp_counters cnt1, cnt2;
ssize_t bytes;
int sk, lsk;
lsk = test_listen_socket(this_ip_addr, port, backlog);
if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
test_error("setsockopt(TCP_AO_ADD_KEY)");
- if (test_get_tcp_ao_counters(lsk, &cnt1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(lsk, &cnt1))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* 1: MKT added */
if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
@@ -103,8 +103,8 @@ static void test_server_active_rst(unsigned int port)
test_error("accept()");
synchronize_threads(); /* 2: connection accept()ed, another queued */
- if (test_get_tcp_ao_counters(lsk, &cnt2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(lsk, &cnt2))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* 3: close listen socket */
close(lsk);
@@ -120,7 +120,7 @@ static void test_server_active_rst(unsigned int port)
synchronize_threads(); /* 5: closed active sk */
synchronize_threads(); /* 6: counters checks */
- if (test_tcp_ao_counters_cmp("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD))
+ if (test_assert_counters("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD))
test_fail("MKT counters (server) have not only good packets");
else
test_ok("MKT counters are good on server");
@@ -128,7 +128,7 @@ static void test_server_active_rst(unsigned int port)
static void test_server_passive_rst(unsigned int port)
{
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters cnt1, cnt2;
int sk, lsk;
ssize_t bytes;
@@ -147,8 +147,8 @@ static void test_server_passive_rst(unsigned int port)
synchronize_threads(); /* 2: accepted => send data */
close(lsk);
- if (test_get_tcp_ao_counters(sk, &ao1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt1))
+ test_error("test_get_tcp_counters()");
bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
if (bytes != quota) {
@@ -160,12 +160,12 @@ static void test_server_passive_rst(unsigned int port)
synchronize_threads(); /* 3: checkpoint the client */
synchronize_threads(); /* 4: close the server, creating twsk */
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
close(sk);
synchronize_threads(); /* 5: restore the socket, send more data */
- test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD);
+ test_assert_counters("passive RST server", &cnt1, &cnt2, TEST_CNT_GOOD);
synchronize_threads(); /* 6: server exits */
}
@@ -271,8 +271,7 @@ static void test_client_active_rst(unsigned int port)
synchronize_threads(); /* 1: MKT added */
for (i = 0; i < last; i++) {
- err = _test_connect_socket(sk[i], this_ip_dest, port,
- (i == 0) ? TEST_TIMEOUT_SEC : -1);
+ err = _test_connect_socket(sk[i], this_ip_dest, port, i != 0);
if (err < 0)
test_error("failed to connect()");
}
@@ -283,12 +282,12 @@ static void test_client_active_rst(unsigned int port)
test_error("test_wait_fds(): %d", err);
/* async connect() with third sk to get into request_sock_queue */
- err = _test_connect_socket(sk[last], this_ip_dest, port, -1);
+ err = _test_connect_socket(sk[last], this_ip_dest, port, 1);
if (err < 0)
test_error("failed to connect()");
synchronize_threads(); /* 3: close listen socket */
- if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
+ if (test_client_verify(sk[0], packet_sz, quota / packet_sz))
test_fail("Failed to send data on connected socket");
else
test_ok("Verified established tcp connection");
@@ -323,7 +322,7 @@ static void test_client_active_rst(unsigned int port)
static void test_client_passive_rst(unsigned int port)
{
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters cnt1, cnt2;
struct tcp_ao_repair ao_img;
struct tcp_sock_state img;
sockaddr_af saddr;
@@ -341,7 +340,7 @@ static void test_client_passive_rst(unsigned int port)
test_error("failed to connect()");
synchronize_threads(); /* 2: accepted => send data */
- if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
+ if (test_client_verify(sk, packet_sz, quota / packet_sz))
test_fail("Failed to send data on connected socket");
else
test_ok("Verified established tcp connection");
@@ -397,8 +396,8 @@ static void test_client_passive_rst(unsigned int port)
test_error("setsockopt(TCP_AO_ADD_KEY)");
test_ao_restore(sk, &ao_img);
- if (test_get_tcp_ao_counters(sk, &ao1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt1))
+ test_error("test_get_tcp_counters()");
test_disable_repair(sk);
test_sock_state_free(&img);
@@ -417,7 +416,7 @@ static void test_client_passive_rst(unsigned int port)
* IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0,
* options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0
*/
- err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC);
+ err = test_client_verify(sk, packet_sz, quota / packet_sz);
/* Make sure that the connection was reset, not timeouted */
if (err && err == -ECONNRESET)
test_ok("client sock was passively reset post-seq-adjust");
@@ -426,12 +425,12 @@ static void test_client_passive_rst(unsigned int port)
else
test_fail("client sock is yet connected post-seq-adjust");
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* 6: server exits */
close(sk);
- test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD);
+ test_assert_counters("client passive RST", &cnt1, &cnt2, TEST_CNT_GOOD);
}
static void *client_fn(void *arg)
diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c
index 3ecd2b58de6a..73b2f2276f3f 100644
--- a/tools/testing/selftests/net/tcp_ao/self-connect.c
+++ b/tools/testing/selftests/net/tcp_ao/self-connect.c
@@ -30,7 +30,7 @@ static void setup_lo_intf(const char *lo_intf)
static void tcp_self_connect(const char *tst, unsigned int port,
bool different_keyids, bool check_restore)
{
- struct tcp_ao_counters before_ao, after_ao;
+ struct tcp_counters before, after;
uint64_t before_aogood, after_aogood;
struct netstat *ns_before, *ns_after;
const size_t nr_packets = 20;
@@ -60,17 +60,17 @@ static void tcp_self_connect(const char *tst, unsigned int port,
ns_before = netstat_read();
before_aogood = netstat_get(ns_before, "TCPAOGood", NULL);
- if (test_get_tcp_ao_counters(sk, &before_ao))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &before))
+ test_error("test_get_tcp_counters()");
if (__test_connect_socket(sk, "lo", (struct sockaddr *)&addr,
- sizeof(addr), TEST_TIMEOUT_SEC) < 0) {
+ sizeof(addr), 0) < 0) {
ns_after = netstat_read();
netstat_print_diff(ns_before, ns_after);
test_error("failed to connect()");
}
- if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+ if (test_client_verify(sk, 100, nr_packets)) {
test_fail("%s: tcp connection verify failed", tst);
close(sk);
return;
@@ -78,8 +78,8 @@ static void tcp_self_connect(const char *tst, unsigned int port,
ns_after = netstat_read();
after_aogood = netstat_get(ns_after, "TCPAOGood", NULL);
- if (test_get_tcp_ao_counters(sk, &after_ao))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &after))
+ test_error("test_get_tcp_counters()");
if (!check_restore) {
/* to debug: netstat_print_diff(ns_before, ns_after); */
netstat_free(ns_before);
@@ -93,7 +93,7 @@ static void tcp_self_connect(const char *tst, unsigned int port,
return;
}
- if (test_tcp_ao_counters_cmp(tst, &before_ao, &after_ao, TEST_CNT_GOOD)) {
+ if (test_assert_counters(tst, &before, &after, TEST_CNT_GOOD)) {
close(sk);
return;
}
@@ -136,7 +136,7 @@ static void tcp_self_connect(const char *tst, unsigned int port,
test_ao_restore(sk, &ao_img);
test_disable_repair(sk);
test_sock_state_free(&img);
- if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+ if (test_client_verify(sk, 100, nr_packets)) {
test_fail("%s: tcp connection verify failed", tst);
close(sk);
return;
diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c
index 8901a6785dc8..f00245263b20 100644
--- a/tools/testing/selftests/net/tcp_ao/seq-ext.c
+++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c
@@ -40,7 +40,7 @@ static void test_adjust_seqs(struct tcp_sock_state *img,
static int test_sk_restore(struct tcp_sock_state *img,
struct tcp_ao_repair *ao_img, sockaddr_af *saddr,
const union tcp_addr daddr, unsigned int dport,
- struct tcp_ao_counters *cnt)
+ struct tcp_counters *cnt)
{
int sk;
@@ -54,8 +54,8 @@ static int test_sk_restore(struct tcp_sock_state *img,
test_error("setsockopt(TCP_AO_ADD_KEY)");
test_ao_restore(sk, ao_img);
- if (test_get_tcp_ao_counters(sk, cnt))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, cnt))
+ test_error("test_get_tcp_counters()");
test_disable_repair(sk);
test_sock_state_free(img);
@@ -65,7 +65,7 @@ static int test_sk_restore(struct tcp_sock_state *img,
static void *server_fn(void *arg)
{
uint64_t before_good, after_good, after_bad;
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters cnt1, cnt2;
struct tcp_sock_state img;
struct tcp_ao_repair ao_img;
sockaddr_af saddr;
@@ -114,7 +114,7 @@ static void *server_fn(void *arg)
test_adjust_seqs(&img, &ao_img, true);
synchronize_threads(); /* 4: dump finished */
sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
- client_new_port, &ao1);
+ client_new_port, &cnt1);
trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_addr,
this_ip_dest, test_server_port + 1, client_new_port, 1);
@@ -136,11 +136,11 @@ static void *server_fn(void *arg)
}
synchronize_threads(); /* 6: verify counters after SEQ-number rollover */
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
after_good = netstat_get_one("TCPAOGood", NULL);
- test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
+ test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD);
if (after_good <= before_good) {
test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64,
@@ -173,7 +173,7 @@ out:
static void *client_fn(void *arg)
{
uint64_t before_good, after_good, after_bad;
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters cnt1, cnt2;
struct tcp_sock_state img;
struct tcp_ao_repair ao_img;
sockaddr_af saddr;
@@ -191,7 +191,7 @@ static void *client_fn(void *arg)
test_error("failed to connect()");
synchronize_threads(); /* 2: accepted => send data */
- if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) {
+ if (test_client_verify(sk, msg_len, nr_packets)) {
test_fail("pre-migrate verify failed");
return NULL;
}
@@ -213,20 +213,20 @@ static void *client_fn(void *arg)
test_adjust_seqs(&img, &ao_img, false);
synchronize_threads(); /* 4: dump finished */
sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
- test_server_port + 1, &ao1);
+ test_server_port + 1, &cnt1);
synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */
- if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+ if (test_client_verify(sk, msg_len, nr_packets))
test_fail("post-migrate verify failed");
else
test_ok("post-migrate connection alive");
synchronize_threads(); /* 5: verify counters after SEQ-number rollover */
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
after_good = netstat_get_one("TCPAOGood", NULL);
- test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
+ test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD);
if (after_good <= before_good) {
test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64,
diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
index f779e5892bc1..a1467b64390a 100644
--- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
+++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
@@ -6,6 +6,7 @@
#define fault(type) (inj == FAULT_ ## type)
static const char *md5_password = "Some evil genius, enemy to mankind, must have been the first contriver.";
static const char *ao_password = DEFAULT_TEST_PASSWORD;
+static volatile int sk_pair;
static union tcp_addr client2;
static union tcp_addr client3;
@@ -41,10 +42,10 @@ static void try_accept(const char *tst_name, unsigned int port,
const char *cnt_name, test_cnt cnt_expected,
int needs_tcp_md5, fault_t inj)
{
- struct tcp_ao_counters ao_cnt1, ao_cnt2;
+ struct tcp_counters cnt1, cnt2;
uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */
- int lsk, err, sk = 0;
- time_t timeout;
+ test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected;
+ int lsk, err, sk = -1;
if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
return;
@@ -63,22 +64,25 @@ static void try_accept(const char *tst_name, unsigned int port,
if (cnt_name)
before_cnt = netstat_get_one(cnt_name, NULL);
- if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt1))
- test_error("test_get_tcp_ao_counters()");
+ if (ao_addr && test_get_tcp_counters(lsk, &cnt1))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* preparations done */
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- err = test_wait_fd(lsk, timeout, 0);
+ err = test_skpair_wait_poll(lsk, 0, poll_cnt, &sk_pair);
synchronize_threads(); /* connect()/accept() timeouts */
if (err == -ETIMEDOUT) {
+ sk_pair = err;
if (!fault(TIMEOUT))
- test_fail("timed out for accept()");
+ test_fail("%s: timed out for accept()", tst_name);
+ } else if (err == -EKEYREJECTED) {
+ if (!fault(KEYREJECT))
+ test_fail("%s: key was rejected", tst_name);
} else if (err < 0) {
- test_error("test_wait_fd()");
+ test_error("test_skpair_wait_poll()");
} else {
if (fault(TIMEOUT))
- test_fail("ready to accept");
+ test_fail("%s: ready to accept", tst_name);
sk = accept(lsk, NULL, NULL);
if (sk < 0) {
@@ -89,8 +93,8 @@ static void try_accept(const char *tst_name, unsigned int port,
}
}
- if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt2))
- test_error("test_get_tcp_ao_counters()");
+ if (ao_addr && test_get_tcp_counters(lsk, &cnt2))
+ test_error("test_get_tcp_counters()");
close(lsk);
if (!cnt_name) {
@@ -108,11 +112,11 @@ static void try_accept(const char *tst_name, unsigned int port,
tst_name, cnt_name, before_cnt, after_cnt);
}
if (ao_addr)
- test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+ test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
out:
synchronize_threads(); /* test_kill_sk() */
- if (sk > 0)
+ if (sk >= 0)
test_kill_sk(sk);
}
@@ -153,78 +157,82 @@ static void *server_fn(void *arg)
server_add_routes();
- try_accept("AO server (INADDR_ANY): AO client", port++, NULL, 0,
+ try_accept("[server] AO server (INADDR_ANY): AO client", port++, NULL, 0,
&addr_any, 0, 0, 100, 100, 0, "TCPAOGood",
TEST_CNT_GOOD, 0, 0);
- try_accept("AO server (INADDR_ANY): MD5 client", port++, NULL, 0,
+ try_accept("[server] AO server (INADDR_ANY): MD5 client", port++, NULL, 0,
&addr_any, 0, 0, 100, 100, 0, "TCPMD5Unexpected",
- 0, 1, FAULT_TIMEOUT);
- try_accept("AO server (INADDR_ANY): no sign client", port++, NULL, 0,
+ TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT);
+ try_accept("[server] AO server (INADDR_ANY): no sign client", port++, NULL, 0,
&addr_any, 0, 0, 100, 100, 0, "TCPAORequired",
TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT);
- try_accept("AO server (AO_REQUIRED): AO client", port++, NULL, 0,
+ try_accept("[server] AO server (AO_REQUIRED): AO client", port++, NULL, 0,
&this_ip_dest, TEST_PREFIX, true,
100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0);
- try_accept("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0,
+ try_accept("[server] AO server (AO_REQUIRED): unsigned client", port++, NULL, 0,
&this_ip_dest, TEST_PREFIX, true,
100, 100, 0, "TCPAORequired",
TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT);
- try_accept("MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0,
+ try_accept("[server] MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0,
NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound",
- 0, 1, FAULT_TIMEOUT);
- try_accept("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
+ TEST_CNT_NS_KEY_NOT_FOUND, 1, FAULT_TIMEOUT);
+ try_accept("[server] MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0);
- try_accept("MD5 server (INADDR_ANY): no sign client", port++, &addr_any,
+ try_accept("[server] MD5 server (INADDR_ANY): no sign client", port++, &addr_any,
0, NULL, 0, 0, 0, 0, 0, "TCPMD5NotFound",
- 0, 1, FAULT_TIMEOUT);
+ TEST_CNT_NS_MD5_NOT_FOUND, 1, FAULT_TIMEOUT);
- try_accept("no sign server: AO client", port++, NULL, 0,
+ try_accept("[server] no sign server: AO client", port++, NULL, 0,
NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound",
- TEST_CNT_AO_KEY_NOT_FOUND, 0, FAULT_TIMEOUT);
- try_accept("no sign server: MD5 client", port++, NULL, 0,
+ TEST_CNT_NS_KEY_NOT_FOUND, 0, FAULT_TIMEOUT);
+ try_accept("[server] no sign server: MD5 client", port++, NULL, 0,
NULL, 0, 0, 0, 0, 0, "TCPMD5Unexpected",
- 0, 1, FAULT_TIMEOUT);
- try_accept("no sign server: no sign client", port++, NULL, 0,
+ TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT);
+ try_accept("[server] no sign server: no sign client", port++, NULL, 0,
NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0);
- try_accept("AO+MD5 server: AO client (matching)", port++,
+ try_accept("[server] AO+MD5 server: AO client (matching)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 1, 0);
- try_accept("AO+MD5 server: AO client (misconfig, matching MD5)", port++,
+ try_accept("[server] AO+MD5 server: AO client (misconfig, matching MD5)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND,
1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: AO client (misconfig, non-matching)", port++,
+ try_accept("[server] AO+MD5 server: AO client (misconfig, non-matching)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND,
1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: MD5 client (matching)", port++,
+ try_accept("[server] AO+MD5 server: MD5 client (matching)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
100, 100, 0, NULL, 0, 1, 0);
- try_accept("AO+MD5 server: MD5 client (misconfig, matching AO)", port++,
+ try_accept("[server] AO+MD5 server: MD5 client (misconfig, matching AO)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
- 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: MD5 client (misconfig, non-matching)", port++,
+ 100, 100, 0, "TCPMD5Unexpected",
+ TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT);
+ try_accept("[server] AO+MD5 server: MD5 client (misconfig, non-matching)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
- 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: no sign client (unmatched)", port++,
+ 100, 100, 0, "TCPMD5Unexpected",
+ TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT);
+ try_accept("[server] AO+MD5 server: no sign client (unmatched)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
100, 100, 0, "CurrEstab", 0, 1, 0);
- try_accept("AO+MD5 server: no sign client (misconfig, matching AO)",
+ try_accept("[server] AO+MD5 server: no sign client (misconfig, matching AO)",
port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
100, 100, 0, "TCPAORequired",
TEST_CNT_AO_REQUIRED, 1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: no sign client (misconfig, matching MD5)",
+ try_accept("[server] AO+MD5 server: no sign client (misconfig, matching MD5)",
port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
- 100, 100, 0, "TCPMD5NotFound", 0, 1, FAULT_TIMEOUT);
+ 100, 100, 0, "TCPMD5NotFound",
+ TEST_CNT_NS_MD5_NOT_FOUND, 1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys",
+ /* Key rejected by the other side, failing short through skpair */
+ try_accept("[server] AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys",
port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
- 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys",
+ 100, 100, 0, NULL, 0, 1, FAULT_KEYREJECT);
+ try_accept("[server] AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys",
port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
- 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT);
+ 100, 100, 0, NULL, 0, 1, FAULT_KEYREJECT);
server_add_fail_tests(&port);
@@ -259,7 +267,6 @@ static void try_connect(const char *tst_name, unsigned int port,
uint8_t sndid, uint8_t rcvid, uint8_t vrf,
fault_t inj, int needs_tcp_md5, union tcp_addr *bind_addr)
{
- time_t timeout;
int sk, ret;
if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
@@ -281,11 +288,10 @@ static void try_connect(const char *tst_name, unsigned int port,
synchronize_threads(); /* preparations done */
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
-
+ ret = test_skpair_connect_poll(sk, this_ip_dest, port, 0, &sk_pair);
synchronize_threads(); /* connect()/accept() timeouts */
if (ret < 0) {
+ sk_pair = ret;
if (fault(KEYREJECT) && ret == -EKEYREJECTED)
test_ok("%s: connect() was prevented", tst_name);
else if (ret == -ETIMEDOUT && fault(TIMEOUT))
@@ -305,8 +311,7 @@ static void try_connect(const char *tst_name, unsigned int port,
out:
synchronize_threads(); /* test_kill_sk() */
- /* _test_connect_socket() cleans up on failure */
- if (ret > 0)
+ if (ret > 0) /* test_skpair_connect_poll() cleans up on failure */
test_kill_sk(sk);
}
@@ -437,7 +442,6 @@ static void try_to_add(const char *tst_name, unsigned int port,
int ao_vrf, uint8_t sndid, uint8_t rcvid,
int needs_tcp_md5, fault_t inj)
{
- time_t timeout;
int sk, ret;
if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
@@ -450,11 +454,10 @@ static void try_to_add(const char *tst_name, unsigned int port,
synchronize_threads(); /* preparations done */
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+ ret = test_skpair_connect_poll(sk, this_ip_dest, port, 0, &sk_pair);
synchronize_threads(); /* connect()/accept() timeouts */
- if (ret <= 0) {
+ if (ret < 0) {
test_error("%s: connect() returned %d", tst_name, ret);
goto out;
}
@@ -490,8 +493,7 @@ static void try_to_add(const char *tst_name, unsigned int port,
out:
synchronize_threads(); /* test_kill_sk() */
- /* _test_connect_socket() cleans up on failure */
- if (ret > 0)
+ if (ret > 0) /* test_skpair_connect_poll() cleans up on failure */
test_kill_sk(sk);
}
diff --git a/tools/testing/selftests/net/test_blackhole_dev.sh b/tools/testing/selftests/net/test_blackhole_dev.sh
deleted file mode 100755
index 3119b80e711f..000000000000
--- a/tools/testing/selftests/net/test_blackhole_dev.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Runs blackhole-dev test using blackhole-dev kernel module
-
-if /sbin/modprobe -q test_blackhole_dev ; then
- /sbin/modprobe -q -r test_blackhole_dev;
- echo "test_blackhole_dev: ok";
-else
- echo "test_blackhole_dev: [FAIL]";
- exit 1;
-fi
diff --git a/tools/testing/selftests/net/test_so_rcv.sh b/tools/testing/selftests/net/test_so_rcv.sh
new file mode 100755
index 000000000000..d8aa4362879d
--- /dev/null
+++ b/tools/testing/selftests/net/test_so_rcv.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+HOSTS=("127.0.0.1" "::1")
+PORT=1234
+TOTAL_TESTS=0
+FAILED_TESTS=0
+
+declare -A TESTS=(
+ ["SO_RCVPRIORITY"]="-P 2"
+ ["SO_RCVMARK"]="-M 3"
+)
+
+check_result() {
+ ((TOTAL_TESTS++))
+ if [ "$1" -ne 0 ]; then
+ ((FAILED_TESTS++))
+ fi
+}
+
+cleanup()
+{
+ cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+setup_ns NS
+
+for HOST in "${HOSTS[@]}"; do
+ PROTOCOL="IPv4"
+ if [[ "$HOST" == "::1" ]]; then
+ PROTOCOL="IPv6"
+ fi
+ for test_name in "${!TESTS[@]}"; do
+ echo "Running $test_name test, $PROTOCOL"
+ arg=${TESTS[$test_name]}
+
+ ip netns exec $NS ./so_rcv_listener $arg $HOST $PORT &
+ LISTENER_PID=$!
+
+ sleep 0.5
+
+ if ! ip netns exec $NS ./cmsg_sender $arg $HOST $PORT; then
+ echo "Sender failed for $test_name, $PROTOCOL"
+ kill "$LISTENER_PID" 2>/dev/null
+ wait "$LISTENER_PID"
+ check_result 1
+ continue
+ fi
+
+ wait "$LISTENER_PID"
+ LISTENER_EXIT_CODE=$?
+
+ if [ "$LISTENER_EXIT_CODE" -eq 0 ]; then
+ echo "Rcv test OK for $test_name, $PROTOCOL"
+ check_result 0
+ else
+ echo "Rcv test FAILED for $test_name, $PROTOCOL"
+ check_result 1
+ fi
+ done
+done
+
+if [ "$FAILED_TESTS" -ne 0 ]; then
+ echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed"
+ exit ${KSFT_FAIL}
+else
+ echo "OK - All $TOTAL_TESTS tests passed"
+ exit ${KSFT_PASS}
+fi
diff --git a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh
index 2d442cdab11e..062f957950af 100755
--- a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh
+++ b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh
@@ -1,29 +1,114 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# Check FDB default-remote handling across "ip link set".
+ALL_TESTS="
+ test_set_remote
+ test_change_mc_remote
+"
+source lib.sh
check_remotes()
{
local what=$1; shift
local N=$(bridge fdb sh dev vx | grep 00:00:00:00:00:00 | wc -l)
- echo -ne "expected two remotes after $what\t"
- if [[ $N != 2 ]]; then
- echo "[FAIL]"
- EXIT_STATUS=1
+ ((N == 2))
+ check_err $? "expected 2 remotes after $what, got $N"
+}
+
+# Check FDB default-remote handling across "ip link set".
+test_set_remote()
+{
+ RET=0
+
+ ip_link_add vx up type vxlan id 2000 dstport 4789
+ bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent
+ bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent
+ check_remotes "fdb append"
+
+ ip link set dev vx type vxlan remote 192.0.2.30
+ check_remotes "link set"
+
+ log_test 'FDB default-remote handling across "ip link set"'
+}
+
+fmt_remote()
+{
+ local addr=$1; shift
+
+ if [[ $addr == 224.* ]]; then
+ echo "group $addr"
else
- echo "[ OK ]"
+ echo "remote $addr"
fi
}
-ip link add name vx up type vxlan id 2000 dstport 4789
-bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent
-bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent
-check_remotes "fdb append"
+change_remote()
+{
+ local remote=$1; shift
+
+ ip link set dev vx type vxlan $(fmt_remote $remote) dev v1
+}
+
+check_membership()
+{
+ local check_vec=("$@")
+
+ local memberships
+ memberships=$(
+ netstat -n --groups |
+ sed -n '/^v1\b/p' |
+ grep -o '[^ ]*$'
+ )
+ check_err $? "Couldn't obtain group memberships"
+
+ local item
+ for item in "${check_vec[@]}"; do
+ eval "local $item"
+ echo "$memberships" | grep -q "\b$group\b"
+ check_err_fail $fail $? "$group is_ex reported in IGMP query response"
+ done
+}
+
+test_change_mc_remote()
+{
+ check_command netstat || return
+
+ ip_link_add v1 up type veth peer name v2
+ ip_link_set_up v2
+
+ RET=0
+
+ ip_link_add vx up type vxlan dstport 4789 \
+ local 192.0.2.1 $(fmt_remote 224.1.1.1) dev v1 vni 1000
+
+ check_membership "group=224.1.1.1 fail=0" \
+ "group=224.1.1.2 fail=1" \
+ "group=224.1.1.3 fail=1"
+
+ log_test "MC group report after VXLAN creation"
+
+ RET=0
+
+ change_remote 224.1.1.2
+ check_membership "group=224.1.1.1 fail=1" \
+ "group=224.1.1.2 fail=0" \
+ "group=224.1.1.3 fail=1"
+
+ log_test "MC group report after changing VXLAN remote MC->MC"
+
+ RET=0
+
+ change_remote 192.0.2.2
+ check_membership "group=224.1.1.1 fail=1" \
+ "group=224.1.1.2 fail=1" \
+ "group=224.1.1.3 fail=1"
+
+ log_test "MC group report after changing VXLAN remote MC->UC"
+}
+
+trap defer_scopes_cleanup EXIT
-ip link set dev vx type vxlan remote 192.0.2.30
-check_remotes "link set"
+tests_run
-ip link del dev vx
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index 3f2fca02fec5..36ff28af4b19 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -103,6 +103,19 @@ struct testcase testcases_v4[] = {
.r_num_mss = 1,
},
{
+ /* datalen <= MSS < gso_len: will fall back to no GSO */
+ .tlen = CONST_MSS_V4,
+ .gso_len = CONST_MSS_V4 + 1,
+ .r_num_mss = 0,
+ .r_len_last = CONST_MSS_V4,
+ },
+ {
+ /* MSS < datalen < gso_len: fail */
+ .tlen = CONST_MSS_V4 + 1,
+ .gso_len = CONST_MSS_V4 + 2,
+ .tfail = true,
+ },
+ {
/* send a single MSS + 1B */
.tlen = CONST_MSS_V4 + 1,
.gso_len = CONST_MSS_V4,
@@ -206,6 +219,19 @@ struct testcase testcases_v6[] = {
.r_num_mss = 1,
},
{
+ /* datalen <= MSS < gso_len: will fall back to no GSO */
+ .tlen = CONST_MSS_V6,
+ .gso_len = CONST_MSS_V6 + 1,
+ .r_num_mss = 0,
+ .r_len_last = CONST_MSS_V6,
+ },
+ {
+ /* MSS < datalen < gso_len: fail */
+ .tlen = CONST_MSS_V6 + 1,
+ .gso_len = CONST_MSS_V6 + 2,
+ .tfail = true
+ },
+ {
/* send a single MSS + 1B */
.tlen = CONST_MSS_V6 + 1,
.gso_len = CONST_MSS_V6,
diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk
index 12e7cae251be..e907c2751956 100644
--- a/tools/testing/selftests/net/ynl.mk
+++ b/tools/testing/selftests/net/ynl.mk
@@ -27,7 +27,8 @@ $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig:
$(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig
$(Q)rm -f $(top_srcdir)/tools/net/ynl/libynl.a
- $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a
+ $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl \
+ GENS="$(YNL_GENS)" RSTS="" libynl.a
$(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a
EXTRA_CLEAN += \
diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 7d14a7c0cb62..58bcbbd029bc 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -47,6 +47,7 @@ XARCH_riscv = riscv64
XARCH = $(or $(XARCH_$(ARCH)),$(ARCH))
# map from user input variants to their kernel supported architectures
+ARCH_armthumb = arm
ARCH_ppc = powerpc
ARCH_ppc64 = powerpc
ARCH_ppc64le = powerpc
@@ -54,6 +55,7 @@ ARCH_mips32le = mips
ARCH_mips32be = mips
ARCH_riscv32 = riscv
ARCH_riscv64 = riscv
+ARCH_s390x = s390
ARCH := $(or $(ARCH_$(XARCH)),$(XARCH))
# kernel image names by architecture
@@ -62,6 +64,7 @@ IMAGE_x86_64 = arch/x86/boot/bzImage
IMAGE_x86 = arch/x86/boot/bzImage
IMAGE_arm64 = arch/arm64/boot/Image
IMAGE_arm = arch/arm/boot/zImage
+IMAGE_armthumb = arch/arm/boot/zImage
IMAGE_mips32le = vmlinuz
IMAGE_mips32be = vmlinuz
IMAGE_ppc = vmlinux
@@ -70,6 +73,7 @@ IMAGE_ppc64le = arch/powerpc/boot/zImage
IMAGE_riscv = arch/riscv/boot/Image
IMAGE_riscv32 = arch/riscv/boot/Image
IMAGE_riscv64 = arch/riscv/boot/Image
+IMAGE_s390x = arch/s390/boot/bzImage
IMAGE_s390 = arch/s390/boot/bzImage
IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi
IMAGE = $(objtree)/$(IMAGE_$(XARCH))
@@ -81,19 +85,20 @@ DEFCONFIG_x86_64 = defconfig
DEFCONFIG_x86 = defconfig
DEFCONFIG_arm64 = defconfig
DEFCONFIG_arm = multi_v7_defconfig
+DEFCONFIG_armthumb = multi_v7_defconfig
DEFCONFIG_mips32le = malta_defconfig
-DEFCONFIG_mips32be = malta_defconfig
+DEFCONFIG_mips32be = malta_defconfig generic/eb.config
DEFCONFIG_ppc = pmac32_defconfig
DEFCONFIG_ppc64 = powernv_be_defconfig
DEFCONFIG_ppc64le = powernv_defconfig
DEFCONFIG_riscv = defconfig
DEFCONFIG_riscv32 = rv32_defconfig
DEFCONFIG_riscv64 = defconfig
-DEFCONFIG_s390 = defconfig
+DEFCONFIG_s390x = defconfig
+DEFCONFIG_s390 = defconfig compat.config
DEFCONFIG_loongarch = defconfig
DEFCONFIG = $(DEFCONFIG_$(XARCH))
-EXTRACONFIG_mips32be = -d CONFIG_CPU_LITTLE_ENDIAN -e CONFIG_CPU_BIG_ENDIAN
EXTRACONFIG = $(EXTRACONFIG_$(XARCH))
# optional tests to run (default = all)
@@ -105,6 +110,7 @@ QEMU_ARCH_x86_64 = x86_64
QEMU_ARCH_x86 = x86_64
QEMU_ARCH_arm64 = aarch64
QEMU_ARCH_arm = arm
+QEMU_ARCH_armthumb = arm
QEMU_ARCH_mips32le = mipsel # works with malta_defconfig
QEMU_ARCH_mips32be = mips
QEMU_ARCH_ppc = ppc
@@ -113,6 +119,7 @@ QEMU_ARCH_ppc64le = ppc64
QEMU_ARCH_riscv = riscv64
QEMU_ARCH_riscv32 = riscv32
QEMU_ARCH_riscv64 = riscv64
+QEMU_ARCH_s390x = s390x
QEMU_ARCH_s390 = s390x
QEMU_ARCH_loongarch = loongarch64
QEMU_ARCH = $(QEMU_ARCH_$(XARCH))
@@ -133,6 +140,7 @@ QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(
QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_armthumb = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_mips32le = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_mips32be = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
@@ -141,6 +149,7 @@ QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC
QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_riscv32 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_s390x = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA)
@@ -156,15 +165,18 @@ Q=@
endif
CFLAGS_i386 = $(call cc-option,-m32)
+CFLAGS_arm = -marm
+CFLAGS_armthumb = -mthumb -march=armv6t2
CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2)
-CFLAGS_s390 = -m64
+CFLAGS_s390x = -m64
+CFLAGS_s390 = -m31
CFLAGS_mips32le = -EL -mabi=32 -fPIC
CFLAGS_mips32be = -EB -mabi=32
CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all))
CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \
- $(call cc-option,-fno-stack-protector) \
+ $(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \
$(CFLAGS_$(XARCH)) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_EXTRA)
LDFLAGS :=
@@ -220,7 +232,7 @@ all: run
sysroot: sysroot/$(ARCH)/include
-sysroot/$(ARCH)/include:
+sysroot/$(ARCH)/include: | defconfig
$(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot
$(QUIET_MKDIR)mkdir -p sysroot
$(Q)$(MAKE) -C $(srctree) outputmakefile
@@ -264,16 +276,16 @@ initramfs: nolibc-test
$(Q)cp nolibc-test initramfs/init
defconfig:
- $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) mrproper $(DEFCONFIG) prepare
+ $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(DEFCONFIG)
$(Q)if [ -n "$(EXTRACONFIG)" ]; then \
$(srctree)/scripts/config --file $(objtree)/.config $(EXTRACONFIG); \
$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \
fi
-kernel:
+kernel: | defconfig
$(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null
-kernel-standalone: initramfs
+kernel-standalone: initramfs | defconfig
$(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null
# run the tests after building the kernel
diff --git a/tools/testing/selftests/nolibc/nolibc-test-linkage.c b/tools/testing/selftests/nolibc/nolibc-test-linkage.c
index 5ff4c8a1db2a..a7ca8325863f 100644
--- a/tools/testing/selftests/nolibc/nolibc-test-linkage.c
+++ b/tools/testing/selftests/nolibc/nolibc-test-linkage.c
@@ -11,16 +11,16 @@ void *linkage_test_errno_addr(void)
return &errno;
}
-int linkage_test_constructor_test_value;
+int linkage_test_constructor_test_value = 0;
__attribute__((constructor))
static void constructor1(void)
{
- linkage_test_constructor_test_value = 2;
+ linkage_test_constructor_test_value |= 1 << 0;
}
__attribute__((constructor))
static void constructor2(void)
{
- linkage_test_constructor_test_value *= 3;
+ linkage_test_constructor_test_value |= 1 << 1;
}
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 0e0e3b48a8c3..5884a891c491 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -43,6 +43,8 @@
#endif
#endif
+#pragma GCC diagnostic ignored "-Wmissing-prototypes"
+
#include "nolibc-test-linkage.h"
/* for the type of int_fast16_t and int_fast32_t, musl differs from glibc and nolibc */
@@ -690,14 +692,14 @@ int expect_strtox(int llen, void *func, const char *input, int base, intmax_t ex
__attribute__((constructor))
static void constructor1(void)
{
- constructor_test_value = 1;
+ constructor_test_value |= 1 << 0;
}
__attribute__((constructor))
static void constructor2(int argc, char **argv, char **envp)
{
if (argc && argv && envp)
- constructor_test_value *= 2;
+ constructor_test_value |= 1 << 1;
}
int run_startup(int min, int max)
@@ -736,9 +738,9 @@ int run_startup(int min, int max)
CASE_TEST(environ_HOME); EXPECT_PTRNZ(1, getenv("HOME")); break;
CASE_TEST(auxv_addr); EXPECT_PTRGT(test_auxv != (void *)-1, test_auxv, brk); break;
CASE_TEST(auxv_AT_UID); EXPECT_EQ(1, getauxval(AT_UID), getuid()); break;
- CASE_TEST(constructor); EXPECT_EQ(1, constructor_test_value, 2); break;
+ CASE_TEST(constructor); EXPECT_EQ(is_nolibc, constructor_test_value, 0x3); break;
CASE_TEST(linkage_errno); EXPECT_PTREQ(1, linkage_test_errno_addr(), &errno); break;
- CASE_TEST(linkage_constr); EXPECT_EQ(1, linkage_test_constructor_test_value, 6); break;
+ CASE_TEST(linkage_constr); EXPECT_EQ(1, linkage_test_constructor_test_value, 0x3); break;
case __LINE__:
return ret; /* must be last */
/* note: do not set any defaults so as to permit holes above */
@@ -767,6 +769,44 @@ int test_getdents64(const char *dir)
return ret;
}
+static int test_dirent(void)
+{
+ int comm = 0, cmdline = 0;
+ struct dirent dirent, *result;
+ DIR *dir;
+ int ret;
+
+ dir = opendir("/proc/self");
+ if (!dir)
+ return 1;
+
+ while (1) {
+ errno = 0;
+ ret = readdir_r(dir, &dirent, &result);
+ if (ret != 0)
+ return 1;
+ if (!result)
+ break;
+
+ if (strcmp(dirent.d_name, "comm") == 0)
+ comm++;
+ else if (strcmp(dirent.d_name, "cmdline") == 0)
+ cmdline++;
+ }
+
+ if (errno)
+ return 1;
+
+ ret = closedir(dir);
+ if (ret)
+ return 1;
+
+ if (comm != 1 || cmdline != 1)
+ return 1;
+
+ return 0;
+}
+
int test_getpagesize(void)
{
int x = getpagesize();
@@ -988,6 +1028,22 @@ int test_rlimit(void)
return 0;
}
+int test_openat(void)
+{
+ int dev, null;
+
+ dev = openat(AT_FDCWD, "/dev", O_DIRECTORY);
+ if (dev < 0)
+ return -1;
+
+ null = openat(dev, "null", O_RDONLY);
+ close(dev);
+ if (null < 0)
+ return -1;
+
+ close(null);
+ return 0;
+}
/* Run syscall tests between IDs <min> and <max>.
* Return 0 on success, non-zero on failure.
@@ -1059,6 +1115,7 @@ int run_syscall(int min, int max)
CASE_TEST(fork); EXPECT_SYSZR(1, test_fork()); break;
CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break;
CASE_TEST(getdents64_null); EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break;
+ CASE_TEST(directories); EXPECT_SYSZR(proc, test_dirent()); break;
CASE_TEST(gettimeofday_tv); EXPECT_SYSZR(1, gettimeofday(&tv, NULL)); break;
CASE_TEST(gettimeofday_tv_tz);EXPECT_SYSZR(1, gettimeofday(&tv, &tz)); break;
CASE_TEST(getpagesize); EXPECT_SYSZR(1, test_getpagesize()); break;
@@ -1073,8 +1130,9 @@ int run_syscall(int min, int max)
CASE_TEST(mmap_bad); EXPECT_PTRER(1, mmap(NULL, 0, PROT_READ, MAP_PRIVATE, 0, 0), MAP_FAILED, EINVAL); break;
CASE_TEST(munmap_bad); EXPECT_SYSER(1, munmap(NULL, 0), -1, EINVAL); break;
CASE_TEST(mmap_munmap_good); EXPECT_SYSZR(1, test_mmap_munmap()); break;
- CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", 0), -1); if (tmp != -1) close(tmp); break;
- CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", 0), -1, ENOENT); if (tmp != -1) close(tmp); break;
+ CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", O_RDONLY), -1); if (tmp != -1) close(tmp); break;
+ CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", O_RDONLY), -1, ENOENT); if (tmp != -1) close(tmp); break;
+ CASE_TEST(openat_dir); EXPECT_SYSZR(1, test_openat()); break;
CASE_TEST(pipe); EXPECT_SYSZR(1, test_pipe()); break;
CASE_TEST(poll_null); EXPECT_SYSZR(1, poll(NULL, 0, 0)); break;
CASE_TEST(poll_stdout); EXPECT_SYSNE(1, ({ struct pollfd fds = { 1, POLLOUT, 0}; poll(&fds, 1, 0); }), -1); break;
@@ -1284,6 +1342,73 @@ static int expect_vfprintf(int llen, int c, const char *expected, const char *fm
return ret;
}
+static int test_scanf(void)
+{
+ unsigned long long ull;
+ unsigned long ul;
+ unsigned int u;
+ long long ll;
+ long l;
+ void *p;
+ int i;
+
+ /* return __LINE__ to point to the specific failure */
+
+ /* test EOF */
+ if (sscanf("", "foo") != EOF)
+ return __LINE__;
+
+ /* test simple literal without placeholder */
+ if (sscanf("foo", "foo") != 0)
+ return __LINE__;
+
+ /* test single placeholder */
+ if (sscanf("123", "%d", &i) != 1)
+ return __LINE__;
+
+ if (i != 123)
+ return __LINE__;
+
+ /* test multiple place holders and separators */
+ if (sscanf("a123b456c0x90", "a%db%uc%p", &i, &u, &p) != 3)
+ return __LINE__;
+
+ if (i != 123)
+ return __LINE__;
+
+ if (u != 456)
+ return __LINE__;
+
+ if (p != (void *)0x90)
+ return __LINE__;
+
+ /* test space handling */
+ if (sscanf("a b1", "a b%d", &i) != 1)
+ return __LINE__;
+
+ if (i != 1)
+ return __LINE__;
+
+ /* test literal percent */
+ if (sscanf("a%1", "a%%%d", &i) != 1)
+ return __LINE__;
+
+ if (i != 1)
+ return __LINE__;
+
+ /* test stdint.h types */
+ if (sscanf("1|2|3|4|5|6",
+ "%d|%ld|%lld|%u|%lu|%llu",
+ &i, &l, &ll, &u, &ul, &ull) != 6)
+ return __LINE__;
+
+ if (i != 1 || l != 2 || ll != 3 ||
+ u != 4 || ul != 5 || ull != 6)
+ return __LINE__;
+
+ return 0;
+}
+
static int run_vfprintf(int min, int max)
{
int test;
@@ -1305,6 +1430,7 @@ static int run_vfprintf(int min, int max)
CASE_TEST(char); EXPECT_VFPRINTF(1, "c", "%c", 'c'); break;
CASE_TEST(hex); EXPECT_VFPRINTF(1, "f", "%x", 0xf); break;
CASE_TEST(pointer); EXPECT_VFPRINTF(3, "0x1", "%p", (void *) 0x1); break;
+ CASE_TEST(scanf); EXPECT_ZR(1, test_scanf()); break;
case __LINE__:
return ret; /* must be last */
/* note: do not set any defaults so as to permit holes above */
diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh
index 9c5160c53881..0299a0912d40 100755
--- a/tools/testing/selftests/nolibc/run-tests.sh
+++ b/tools/testing/selftests/nolibc/run-tests.sh
@@ -17,7 +17,16 @@ perform_download=0
test_mode=system
werror=1
llvm=
-archs="i386 x86_64 arm64 arm mips32le mips32be ppc ppc64 ppc64le riscv32 riscv64 s390 loongarch"
+all_archs=(
+ i386 x86_64
+ arm64 arm armthumb
+ mips32le mips32be
+ ppc ppc64 ppc64le
+ riscv32 riscv64
+ s390x s390
+ loongarch
+)
+archs="${all_archs[@]}"
TEMP=$(getopt -o 'j:d:c:b:a:m:pelh' -n "$0" -- "$@")
@@ -94,19 +103,21 @@ fi
crosstool_arch() {
case "$1" in
arm64) echo aarch64;;
+ armthumb) echo arm;;
ppc) echo powerpc;;
ppc64) echo powerpc64;;
ppc64le) echo powerpc64;;
riscv) echo riscv64;;
loongarch) echo loongarch64;;
mips*) echo mips;;
+ s390*) echo s390;;
*) echo "$1";;
esac
}
crosstool_abi() {
case "$1" in
- arm) echo linux-gnueabi;;
+ arm | armthumb) echo linux-gnueabi;;
*) echo linux;;
esac
}
@@ -157,10 +168,6 @@ test_arch() {
fi
MAKE=(make -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" LLVM="${llvm}" O="${build_dir}")
- mkdir -p "$build_dir"
- if [ "$test_mode" = "system" ] && [ ! -f "${build_dir}/.config" ]; then
- swallow_output "${MAKE[@]}" defconfig
- fi
case "$test_mode" in
'system')
test_target=run
@@ -173,6 +180,13 @@ test_arch() {
exit 1
esac
printf '%-15s' "$arch:"
+ if [ "$arch" = "s390" ] && ([ "$llvm" = "1" ] || [ "$test_mode" = "user" ]); then
+ echo "Unsupported configuration"
+ return
+ fi
+
+ mkdir -p "$build_dir"
+ swallow_output "${MAKE[@]}" defconfig
swallow_output "${MAKE[@]}" CFLAGS_EXTRA="$CFLAGS_EXTRA" "$test_target" V=1
cp run.out run.out."${arch}"
"${MAKE[@]}" report | grep passed
diff --git a/tools/testing/selftests/pci_endpoint/.gitignore b/tools/testing/selftests/pci_endpoint/.gitignore
new file mode 100644
index 000000000000..6a4837a3e034
--- /dev/null
+++ b/tools/testing/selftests/pci_endpoint/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+pci_endpoint_test
diff --git a/tools/testing/selftests/pci_endpoint/Makefile b/tools/testing/selftests/pci_endpoint/Makefile
new file mode 100644
index 000000000000..bf21ebf20b4a
--- /dev/null
+++ b/tools/testing/selftests/pci_endpoint/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -O2 -Wl,-no-as-needed -Wall $(KHDR_INCLUDES)
+LDFLAGS += -lrt -lpthread -lm
+
+TEST_GEN_PROGS = pci_endpoint_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/pci_endpoint/config b/tools/testing/selftests/pci_endpoint/config
new file mode 100644
index 000000000000..7cdcf117db8d
--- /dev/null
+++ b/tools/testing/selftests/pci_endpoint/config
@@ -0,0 +1,4 @@
+CONFIG_PCI_ENDPOINT=y
+CONFIG_PCI_ENDPOINT_CONFIGFS=y
+CONFIG_PCI_EPF_TEST=m
+CONFIG_PCI_ENDPOINT_TEST=m
diff --git a/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c
new file mode 100644
index 000000000000..ac26481d29d9
--- /dev/null
+++ b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Kselftest for PCI Endpoint Subsystem
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd.
+ * https://www.samsung.com
+ * Author: Aman Gupta <aman1.gupta@samsung.com>
+ *
+ * Copyright (c) 2024, Linaro Ltd.
+ * Author: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include "../../../../include/uapi/linux/pcitest.h"
+
+#include "../kselftest_harness.h"
+
+#define pci_ep_ioctl(cmd, arg) \
+({ \
+ ret = ioctl(self->fd, cmd, arg); \
+ ret = ret < 0 ? -errno : ret; \
+})
+
+static const char *test_device = "/dev/pci-endpoint-test.0";
+static const unsigned long test_size[5] = { 1, 1024, 1025, 1024000, 1024001 };
+
+FIXTURE(pci_ep_bar)
+{
+ int fd;
+};
+
+FIXTURE_SETUP(pci_ep_bar)
+{
+ self->fd = open(test_device, O_RDWR);
+
+ ASSERT_NE(-1, self->fd) TH_LOG("Can't open PCI Endpoint Test device");
+}
+
+FIXTURE_TEARDOWN(pci_ep_bar)
+{
+ close(self->fd);
+}
+
+FIXTURE_VARIANT(pci_ep_bar)
+{
+ int barno;
+};
+
+FIXTURE_VARIANT_ADD(pci_ep_bar, BAR0) { .barno = 0 };
+FIXTURE_VARIANT_ADD(pci_ep_bar, BAR1) { .barno = 1 };
+FIXTURE_VARIANT_ADD(pci_ep_bar, BAR2) { .barno = 2 };
+FIXTURE_VARIANT_ADD(pci_ep_bar, BAR3) { .barno = 3 };
+FIXTURE_VARIANT_ADD(pci_ep_bar, BAR4) { .barno = 4 };
+FIXTURE_VARIANT_ADD(pci_ep_bar, BAR5) { .barno = 5 };
+
+TEST_F(pci_ep_bar, BAR_TEST)
+{
+ int ret;
+
+ pci_ep_ioctl(PCITEST_BAR, variant->barno);
+ if (ret == -ENODATA)
+ SKIP(return, "BAR is disabled");
+ EXPECT_FALSE(ret) TH_LOG("Test failed for BAR%d", variant->barno);
+}
+
+FIXTURE(pci_ep_basic)
+{
+ int fd;
+};
+
+FIXTURE_SETUP(pci_ep_basic)
+{
+ self->fd = open(test_device, O_RDWR);
+
+ ASSERT_NE(-1, self->fd) TH_LOG("Can't open PCI Endpoint Test device");
+}
+
+FIXTURE_TEARDOWN(pci_ep_basic)
+{
+ close(self->fd);
+}
+
+TEST_F(pci_ep_basic, CONSECUTIVE_BAR_TEST)
+{
+ int ret;
+
+ pci_ep_ioctl(PCITEST_BARS, 0);
+ EXPECT_FALSE(ret) TH_LOG("Consecutive BAR test failed");
+}
+
+TEST_F(pci_ep_basic, LEGACY_IRQ_TEST)
+{
+ int ret;
+
+ pci_ep_ioctl(PCITEST_SET_IRQTYPE, PCITEST_IRQ_TYPE_INTX);
+ ASSERT_EQ(0, ret) TH_LOG("Can't set Legacy IRQ type");
+
+ pci_ep_ioctl(PCITEST_GET_IRQTYPE, 0);
+ ASSERT_EQ(PCITEST_IRQ_TYPE_INTX, ret) TH_LOG("Can't get Legacy IRQ type");
+
+ pci_ep_ioctl(PCITEST_LEGACY_IRQ, 0);
+ EXPECT_FALSE(ret) TH_LOG("Test failed for Legacy IRQ");
+}
+
+TEST_F(pci_ep_basic, MSI_TEST)
+{
+ int ret, i;
+
+ pci_ep_ioctl(PCITEST_SET_IRQTYPE, PCITEST_IRQ_TYPE_MSI);
+ ASSERT_EQ(0, ret) TH_LOG("Can't set MSI IRQ type");
+
+ pci_ep_ioctl(PCITEST_GET_IRQTYPE, 0);
+ ASSERT_EQ(PCITEST_IRQ_TYPE_MSI, ret) TH_LOG("Can't get MSI IRQ type");
+
+ for (i = 1; i <= 32; i++) {
+ pci_ep_ioctl(PCITEST_MSI, i);
+ EXPECT_FALSE(ret) TH_LOG("Test failed for MSI%d", i);
+ }
+}
+
+TEST_F(pci_ep_basic, MSIX_TEST)
+{
+ int ret, i;
+
+ pci_ep_ioctl(PCITEST_SET_IRQTYPE, PCITEST_IRQ_TYPE_MSIX);
+ ASSERT_EQ(0, ret) TH_LOG("Can't set MSI-X IRQ type");
+
+ pci_ep_ioctl(PCITEST_GET_IRQTYPE, 0);
+ ASSERT_EQ(PCITEST_IRQ_TYPE_MSIX, ret) TH_LOG("Can't get MSI-X IRQ type");
+
+ for (i = 1; i <= 2048; i++) {
+ pci_ep_ioctl(PCITEST_MSIX, i);
+ EXPECT_FALSE(ret) TH_LOG("Test failed for MSI-X%d", i);
+ }
+}
+
+FIXTURE(pci_ep_data_transfer)
+{
+ int fd;
+};
+
+FIXTURE_SETUP(pci_ep_data_transfer)
+{
+ self->fd = open(test_device, O_RDWR);
+
+ ASSERT_NE(-1, self->fd) TH_LOG("Can't open PCI Endpoint Test device");
+}
+
+FIXTURE_TEARDOWN(pci_ep_data_transfer)
+{
+ close(self->fd);
+}
+
+FIXTURE_VARIANT(pci_ep_data_transfer)
+{
+ bool use_dma;
+};
+
+FIXTURE_VARIANT_ADD(pci_ep_data_transfer, memcpy)
+{
+ .use_dma = false,
+};
+
+FIXTURE_VARIANT_ADD(pci_ep_data_transfer, dma)
+{
+ .use_dma = true,
+};
+
+TEST_F(pci_ep_data_transfer, READ_TEST)
+{
+ struct pci_endpoint_test_xfer_param param = {};
+ int ret, i;
+
+ if (variant->use_dma)
+ param.flags = PCITEST_FLAGS_USE_DMA;
+
+ pci_ep_ioctl(PCITEST_SET_IRQTYPE, PCITEST_IRQ_TYPE_AUTO);
+ ASSERT_EQ(0, ret) TH_LOG("Can't set AUTO IRQ type");
+
+ for (i = 0; i < ARRAY_SIZE(test_size); i++) {
+ param.size = test_size[i];
+ pci_ep_ioctl(PCITEST_READ, &param);
+ EXPECT_FALSE(ret) TH_LOG("Test failed for size (%ld)",
+ test_size[i]);
+ }
+}
+
+TEST_F(pci_ep_data_transfer, WRITE_TEST)
+{
+ struct pci_endpoint_test_xfer_param param = {};
+ int ret, i;
+
+ if (variant->use_dma)
+ param.flags = PCITEST_FLAGS_USE_DMA;
+
+ pci_ep_ioctl(PCITEST_SET_IRQTYPE, PCITEST_IRQ_TYPE_AUTO);
+ ASSERT_EQ(0, ret) TH_LOG("Can't set AUTO IRQ type");
+
+ for (i = 0; i < ARRAY_SIZE(test_size); i++) {
+ param.size = test_size[i];
+ pci_ep_ioctl(PCITEST_WRITE, &param);
+ EXPECT_FALSE(ret) TH_LOG("Test failed for size (%ld)",
+ test_size[i]);
+ }
+}
+
+TEST_F(pci_ep_data_transfer, COPY_TEST)
+{
+ struct pci_endpoint_test_xfer_param param = {};
+ int ret, i;
+
+ if (variant->use_dma)
+ param.flags = PCITEST_FLAGS_USE_DMA;
+
+ pci_ep_ioctl(PCITEST_SET_IRQTYPE, PCITEST_IRQ_TYPE_AUTO);
+ ASSERT_EQ(0, ret) TH_LOG("Can't set AUTO IRQ type");
+
+ for (i = 0; i < ARRAY_SIZE(test_size); i++) {
+ param.size = test_size[i];
+ pci_ep_ioctl(PCITEST_COPY, &param);
+ EXPECT_FALSE(ret) TH_LOG("Test failed for size (%ld)",
+ test_size[i]);
+ }
+}
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pcie_bwctrl/Makefile b/tools/testing/selftests/pcie_bwctrl/Makefile
index 3e84e26341d1..48ec048f47af 100644
--- a/tools/testing/selftests/pcie_bwctrl/Makefile
+++ b/tools/testing/selftests/pcie_bwctrl/Makefile
@@ -1,2 +1,2 @@
-TEST_PROGS = set_pcie_cooling_state.sh
+TEST_PROGS = set_pcie_cooling_state.sh set_pcie_speed.sh
include ../lib.mk
diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore
index bf92481f925c..0406a065deb4 100644
--- a/tools/testing/selftests/pidfd/.gitignore
+++ b/tools/testing/selftests/pidfd/.gitignore
@@ -8,3 +8,5 @@ pidfd_getfd_test
pidfd_setns_test
pidfd_file_handle_test
pidfd_bind_mount
+pidfd_info_test
+pidfd_exec_helper
diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile
index 301343a11b62..fcbefc0d77f6 100644
--- a/tools/testing/selftests/pidfd/Makefile
+++ b/tools/testing/selftests/pidfd/Makefile
@@ -3,7 +3,9 @@ CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall
TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \
pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \
- pidfd_file_handle_test pidfd_bind_mount
+ pidfd_file_handle_test pidfd_bind_mount pidfd_info_test
+
+TEST_GEN_PROGS_EXTENDED := pidfd_exec_helper
include ../lib.mk
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index 0b96ac4b8ce5..cec22aa11cdf 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -12,6 +12,7 @@
#include <stdlib.h>
#include <string.h>
#include <syscall.h>
+#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/wait.h>
@@ -50,6 +51,107 @@
#define PIDFD_NONBLOCK O_NONBLOCK
#endif
+#ifndef PIDFD_SELF_THREAD
+#define PIDFD_SELF_THREAD -10000 /* Current thread. */
+#endif
+
+#ifndef PIDFD_SELF_THREAD_GROUP
+#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */
+#endif
+
+#ifndef PIDFD_SELF
+#define PIDFD_SELF PIDFD_SELF_THREAD
+#endif
+
+#ifndef PIDFD_SELF_PROCESS
+#define PIDFD_SELF_PROCESS PIDFD_SELF_THREAD_GROUP
+#endif
+
+#ifndef PIDFS_IOCTL_MAGIC
+#define PIDFS_IOCTL_MAGIC 0xFF
+#endif
+
+#ifndef PIDFD_GET_CGROUP_NAMESPACE
+#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1)
+#endif
+
+#ifndef PIDFD_GET_IPC_NAMESPACE
+#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2)
+#endif
+
+#ifndef PIDFD_GET_MNT_NAMESPACE
+#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3)
+#endif
+
+#ifndef PIDFD_GET_NET_NAMESPACE
+#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4)
+#endif
+
+#ifndef PIDFD_GET_PID_NAMESPACE
+#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5)
+#endif
+
+#ifndef PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE
+#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6)
+#endif
+
+#ifndef PIDFD_GET_TIME_NAMESPACE
+#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7)
+#endif
+
+#ifndef PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE
+#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
+#endif
+
+#ifndef PIDFD_GET_USER_NAMESPACE
+#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9)
+#endif
+
+#ifndef PIDFD_GET_UTS_NAMESPACE
+#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10)
+#endif
+
+#ifndef PIDFD_GET_INFO
+#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
+#endif
+
+#ifndef PIDFD_INFO_PID
+#define PIDFD_INFO_PID (1UL << 0) /* Always returned, even if not requested */
+#endif
+
+#ifndef PIDFD_INFO_CREDS
+#define PIDFD_INFO_CREDS (1UL << 1) /* Always returned, even if not requested */
+#endif
+
+#ifndef PIDFD_INFO_CGROUPID
+#define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */
+#endif
+
+#ifndef PIDFD_INFO_EXIT
+#define PIDFD_INFO_EXIT (1UL << 3) /* Always returned if available, even if not requested */
+#endif
+
+#ifndef PIDFD_THREAD
+#define PIDFD_THREAD O_EXCL
+#endif
+
+struct pidfd_info {
+ __u64 mask;
+ __u64 cgroupid;
+ __u32 pid;
+ __u32 tgid;
+ __u32 ppid;
+ __u32 ruid;
+ __u32 rgid;
+ __u32 euid;
+ __u32 egid;
+ __u32 suid;
+ __u32 sgid;
+ __u32 fsuid;
+ __u32 fsgid;
+ __s32 exit_code;
+};
+
/*
* The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c
* That means, when it wraps around any pid < 300 will be skipped.
@@ -152,4 +254,11 @@ static inline ssize_t write_nointr(int fd, const void *buf, size_t count)
return ret;
}
+static inline int sys_execveat(int dirfd, const char *pathname,
+ char *const argv[], char *const envp[],
+ int flags)
+{
+ return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
+}
+
#endif /* __PIDFD_H */
diff --git a/tools/testing/selftests/pidfd/pidfd_exec_helper.c b/tools/testing/selftests/pidfd/pidfd_exec_helper.c
new file mode 100644
index 000000000000..5516808c95f2
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_exec_helper.c
@@ -0,0 +1,12 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+int main(int argc, char *argv[])
+{
+ if (pause())
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+}
diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
index f062a986e382..f718aac75068 100644
--- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
@@ -13,6 +13,7 @@
#include <syscall.h>
#include <sys/wait.h>
#include <sys/mman.h>
+#include <sys/mount.h>
#include "pidfd.h"
#include "../kselftest.h"
diff --git a/tools/testing/selftests/pidfd/pidfd_info_test.c b/tools/testing/selftests/pidfd/pidfd_info_test.c
new file mode 100644
index 000000000000..1758a1b0457b
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_info_test.c
@@ -0,0 +1,692 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/types.h>
+#include <poll.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <linux/kcmp.h>
+#include <sys/stat.h>
+
+#include "pidfd.h"
+#include "../kselftest_harness.h"
+
+FIXTURE(pidfd_info)
+{
+ pid_t child_pid1;
+ int child_pidfd1;
+
+ pid_t child_pid2;
+ int child_pidfd2;
+
+ pid_t child_pid3;
+ int child_pidfd3;
+
+ pid_t child_pid4;
+ int child_pidfd4;
+};
+
+FIXTURE_SETUP(pidfd_info)
+{
+ int ret;
+ int ipc_sockets[2];
+ char c;
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ EXPECT_EQ(ret, 0);
+
+ self->child_pid1 = create_child(&self->child_pidfd1, 0);
+ EXPECT_GE(self->child_pid1, 0);
+
+ if (self->child_pid1 == 0) {
+ close(ipc_sockets[0]);
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ _exit(EXIT_FAILURE);
+
+ close(ipc_sockets[1]);
+
+ pause();
+ _exit(EXIT_SUCCESS);
+ }
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ /* SIGKILL but don't reap. */
+ EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd1, SIGKILL, NULL, 0), 0);
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ EXPECT_EQ(ret, 0);
+
+ self->child_pid2 = create_child(&self->child_pidfd2, 0);
+ EXPECT_GE(self->child_pid2, 0);
+
+ if (self->child_pid2 == 0) {
+ close(ipc_sockets[0]);
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ _exit(EXIT_FAILURE);
+
+ close(ipc_sockets[1]);
+
+ pause();
+ _exit(EXIT_SUCCESS);
+ }
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ /* SIGKILL and reap. */
+ EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd2, SIGKILL, NULL, 0), 0);
+ EXPECT_EQ(sys_waitid(P_PID, self->child_pid2, NULL, WEXITED), 0);
+
+ self->child_pid3 = create_child(&self->child_pidfd3, CLONE_NEWUSER | CLONE_NEWPID);
+ EXPECT_GE(self->child_pid3, 0);
+
+ if (self->child_pid3 == 0)
+ _exit(EXIT_SUCCESS);
+
+ self->child_pid4 = create_child(&self->child_pidfd4, CLONE_NEWUSER | CLONE_NEWPID);
+ EXPECT_GE(self->child_pid4, 0);
+
+ if (self->child_pid4 == 0)
+ _exit(EXIT_SUCCESS);
+
+ EXPECT_EQ(sys_waitid(P_PID, self->child_pid4, NULL, WEXITED), 0);
+}
+
+FIXTURE_TEARDOWN(pidfd_info)
+{
+ sys_pidfd_send_signal(self->child_pidfd1, SIGKILL, NULL, 0);
+ if (self->child_pidfd1 >= 0)
+ EXPECT_EQ(0, close(self->child_pidfd1));
+
+ sys_waitid(P_PID, self->child_pid1, NULL, WEXITED);
+
+ sys_pidfd_send_signal(self->child_pidfd2, SIGKILL, NULL, 0);
+ if (self->child_pidfd2 >= 0)
+ EXPECT_EQ(0, close(self->child_pidfd2));
+
+ sys_waitid(P_PID, self->child_pid2, NULL, WEXITED);
+ sys_waitid(P_PID, self->child_pid3, NULL, WEXITED);
+ sys_waitid(P_PID, self->child_pid4, NULL, WEXITED);
+}
+
+TEST_F(pidfd_info, sigkill_exit)
+{
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_CGROUPID,
+ };
+
+ /* Process has exited but not been reaped so this must work. */
+ ASSERT_EQ(ioctl(self->child_pidfd1, PIDFD_GET_INFO, &info), 0);
+
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(self->child_pidfd1, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+ /* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+}
+
+TEST_F(pidfd_info, sigkill_reaped)
+{
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_CGROUPID,
+ };
+
+ /* Process has already been reaped and PIDFD_INFO_EXIT hasn't been set. */
+ ASSERT_NE(ioctl(self->child_pidfd2, PIDFD_GET_INFO, &info), 0);
+ ASSERT_EQ(errno, ESRCH);
+
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(self->child_pidfd2, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+ ASSERT_TRUE(WIFSIGNALED(info.exit_code));
+ ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL);
+}
+
+TEST_F(pidfd_info, success_exit)
+{
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_CGROUPID,
+ };
+
+ /* Process has exited but not been reaped so this must work. */
+ ASSERT_EQ(ioctl(self->child_pidfd3, PIDFD_GET_INFO, &info), 0);
+
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(self->child_pidfd3, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+ /* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+}
+
+TEST_F(pidfd_info, success_reaped)
+{
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_CGROUPID,
+ };
+
+ /* Process has already been reaped and PIDFD_INFO_EXIT hasn't been set. */
+ ASSERT_NE(ioctl(self->child_pidfd4, PIDFD_GET_INFO, &info), 0);
+ ASSERT_EQ(errno, ESRCH);
+
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(self->child_pidfd4, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+ ASSERT_TRUE(WIFEXITED(info.exit_code));
+ ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
+}
+
+TEST_F(pidfd_info, success_reaped_poll)
+{
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT,
+ };
+ struct pollfd fds = {};
+ int nevents;
+
+ fds.events = POLLIN;
+ fds.fd = self->child_pidfd2;
+
+ nevents = poll(&fds, 1, -1);
+ ASSERT_EQ(nevents, 1);
+ ASSERT_TRUE(!!(fds.revents & POLLIN));
+ ASSERT_TRUE(!!(fds.revents & POLLHUP));
+
+ ASSERT_EQ(ioctl(self->child_pidfd2, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+ ASSERT_TRUE(WIFSIGNALED(info.exit_code));
+ ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL);
+}
+
+static void *pidfd_info_pause_thread(void *arg)
+{
+ pid_t pid_thread = gettid();
+ int ipc_socket = *(int *)arg;
+
+ /* Inform the grand-parent what the tid of this thread is. */
+ if (write_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
+ return NULL;
+
+ close(ipc_socket);
+
+ /* Sleep untill we're killed. */
+ pause();
+ return NULL;
+}
+
+TEST_F(pidfd_info, thread_group)
+{
+ pid_t pid_leader, pid_poller, pid_thread;
+ pthread_t thread;
+ int nevents, pidfd_leader, pidfd_thread, pidfd_leader_thread, ret;
+ int ipc_sockets[2];
+ struct pollfd fds = {};
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT,
+ }, info2;
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ EXPECT_EQ(ret, 0);
+
+ pid_leader = create_child(&pidfd_leader, 0);
+ EXPECT_GE(pid_leader, 0);
+
+ if (pid_leader == 0) {
+ close(ipc_sockets[0]);
+
+ /* The thread will outlive the thread-group leader. */
+ if (pthread_create(&thread, NULL, pidfd_info_pause_thread, &ipc_sockets[1]))
+ syscall(__NR_exit, EXIT_FAILURE);
+
+ /* Make the thread-group leader exit prematurely. */
+ syscall(__NR_exit, EXIT_SUCCESS);
+ }
+
+ /*
+ * Opening a PIDFD_THREAD aka thread-specific pidfd based on a
+ * thread-group leader must succeed.
+ */
+ pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD);
+ ASSERT_GE(pidfd_leader_thread, 0);
+
+ pid_poller = fork();
+ ASSERT_GE(pid_poller, 0);
+ if (pid_poller == 0) {
+ /*
+ * We can't poll and wait for the old thread-group
+ * leader to exit using a thread-specific pidfd. The
+ * thread-group leader exited prematurely and
+ * notification is delayed until all subthreads have
+ * exited.
+ */
+ fds.events = POLLIN;
+ fds.fd = pidfd_leader_thread;
+ nevents = poll(&fds, 1, 10000 /* wait 5 seconds */);
+ if (nevents != 0)
+ _exit(EXIT_FAILURE);
+ if (fds.revents & POLLIN)
+ _exit(EXIT_FAILURE);
+ if (fds.revents & POLLHUP)
+ _exit(EXIT_FAILURE);
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* Retrieve the tid of the thread. */
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ /* Opening a thread as a thread-group leader must fail. */
+ pidfd_thread = sys_pidfd_open(pid_thread, 0);
+ ASSERT_LT(pidfd_thread, 0);
+
+ /* Opening a thread as a PIDFD_THREAD must succeed. */
+ pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD);
+ ASSERT_GE(pidfd_thread, 0);
+
+ ASSERT_EQ(wait_for_pid(pid_poller), 0);
+
+ /*
+ * Note that pidfd_leader is a thread-group pidfd, so polling on it
+ * would only notify us once all thread in the thread-group have
+ * exited. So we can't poll before we have taken down the whole
+ * thread-group.
+ */
+
+ /* Get PIDFD_GET_INFO using the thread-group leader pidfd. */
+ ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+ /* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+ ASSERT_EQ(info.pid, pid_leader);
+
+ /*
+ * Now retrieve the same info using the thread specific pidfd
+ * for the thread-group leader.
+ */
+ info2.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(pidfd_leader_thread, PIDFD_GET_INFO, &info2), 0);
+ ASSERT_TRUE(!!(info2.mask & PIDFD_INFO_CREDS));
+ /* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */
+ ASSERT_FALSE(!!(info2.mask & PIDFD_INFO_EXIT));
+ ASSERT_EQ(info2.pid, pid_leader);
+
+ /* Now try the thread-specific pidfd. */
+ ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+ /* The thread hasn't exited, so no PIDFD_INFO_EXIT information yet. */
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+ ASSERT_EQ(info.pid, pid_thread);
+
+ /*
+ * Take down the whole thread-group. The thread-group leader
+ * exited successfully but the thread will now be SIGKILLed.
+ * This must be reflected in the recorded exit information.
+ */
+ EXPECT_EQ(sys_pidfd_send_signal(pidfd_leader, SIGKILL, NULL, 0), 0);
+ EXPECT_EQ(sys_waitid(P_PIDFD, pidfd_leader, NULL, WEXITED), 0);
+
+ fds.events = POLLIN;
+ fds.fd = pidfd_leader;
+ nevents = poll(&fds, 1, -1);
+ ASSERT_EQ(nevents, 1);
+ ASSERT_TRUE(!!(fds.revents & POLLIN));
+ /* The thread-group leader has been reaped. */
+ ASSERT_TRUE(!!(fds.revents & POLLHUP));
+
+ /*
+ * Retrieve exit information for the thread-group leader via the
+ * thread-group leader pidfd.
+ */
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+ /* The thread-group leader exited successfully. Only the specific thread was SIGKILLed. */
+ ASSERT_TRUE(WIFEXITED(info.exit_code));
+ ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
+
+ /*
+ * Retrieve exit information for the thread-group leader via the
+ * thread-specific pidfd.
+ */
+ info2.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(pidfd_leader_thread, PIDFD_GET_INFO, &info2), 0);
+ ASSERT_FALSE(!!(info2.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info2.mask & PIDFD_INFO_EXIT));
+
+ /* The thread-group leader exited successfully. Only the specific thread was SIGKILLed. */
+ ASSERT_TRUE(WIFEXITED(info2.exit_code));
+ ASSERT_EQ(WEXITSTATUS(info2.exit_code), 0);
+
+ /* Retrieve exit information for the thread. */
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+
+ /* The thread got SIGKILLed. */
+ ASSERT_TRUE(WIFSIGNALED(info.exit_code));
+ ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL);
+
+ EXPECT_EQ(close(pidfd_leader), 0);
+ EXPECT_EQ(close(pidfd_thread), 0);
+}
+
+static void *pidfd_info_thread_exec(void *arg)
+{
+ pid_t pid_thread = gettid();
+ int ipc_socket = *(int *)arg;
+
+ /* Inform the grand-parent what the tid of this thread is. */
+ if (write_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
+ return NULL;
+
+ if (read_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
+ return NULL;
+
+ close(ipc_socket);
+
+ sys_execveat(AT_FDCWD, "pidfd_exec_helper", NULL, NULL, 0);
+ return NULL;
+}
+
+TEST_F(pidfd_info, thread_group_exec)
+{
+ pid_t pid_leader, pid_poller, pid_thread;
+ pthread_t thread;
+ int nevents, pidfd_leader, pidfd_leader_thread, pidfd_thread, ret;
+ int ipc_sockets[2];
+ struct pollfd fds = {};
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT,
+ };
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ EXPECT_EQ(ret, 0);
+
+ pid_leader = create_child(&pidfd_leader, 0);
+ EXPECT_GE(pid_leader, 0);
+
+ if (pid_leader == 0) {
+ close(ipc_sockets[0]);
+
+ /* The thread will outlive the thread-group leader. */
+ if (pthread_create(&thread, NULL, pidfd_info_thread_exec, &ipc_sockets[1]))
+ syscall(__NR_exit, EXIT_FAILURE);
+
+ /* Make the thread-group leader exit prematurely. */
+ syscall(__NR_exit, EXIT_SUCCESS);
+ }
+
+ /* Open a thread-specific pidfd for the thread-group leader. */
+ pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD);
+ ASSERT_GE(pidfd_leader_thread, 0);
+
+ pid_poller = fork();
+ ASSERT_GE(pid_poller, 0);
+ if (pid_poller == 0) {
+ /*
+ * We can't poll and wait for the old thread-group
+ * leader to exit using a thread-specific pidfd. The
+ * thread-group leader exited prematurely and
+ * notification is delayed until all subthreads have
+ * exited.
+ *
+ * When the thread has execed it will taken over the old
+ * thread-group leaders struct pid. Calling poll after
+ * the thread execed will thus block again because a new
+ * thread-group has started.
+ */
+ fds.events = POLLIN;
+ fds.fd = pidfd_leader_thread;
+ nevents = poll(&fds, 1, 10000 /* wait 5 seconds */);
+ if (nevents != 0)
+ _exit(EXIT_FAILURE);
+ if (fds.revents & POLLIN)
+ _exit(EXIT_FAILURE);
+ if (fds.revents & POLLHUP)
+ _exit(EXIT_FAILURE);
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* Retrieve the tid of the thread. */
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
+
+ /* Opening a thread as a PIDFD_THREAD must succeed. */
+ pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD);
+ ASSERT_GE(pidfd_thread, 0);
+
+ /* Now that we've opened a thread-specific pidfd the thread can exec. */
+ ASSERT_EQ(write_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ ASSERT_EQ(wait_for_pid(pid_poller), 0);
+
+ /* Wait until the kernel has SIGKILLed the thread. */
+ fds.events = POLLHUP;
+ fds.fd = pidfd_thread;
+ nevents = poll(&fds, 1, -1);
+ ASSERT_EQ(nevents, 1);
+ /* The thread has been reaped. */
+ ASSERT_TRUE(!!(fds.revents & POLLHUP));
+
+ /* Retrieve thread-specific exit info from pidfd. */
+ ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+ /*
+ * While the kernel will have SIGKILLed the whole thread-group
+ * during exec it will cause the individual threads to exit
+ * cleanly.
+ */
+ ASSERT_TRUE(WIFEXITED(info.exit_code));
+ ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
+
+ /*
+ * The thread-group leader is still alive, the thread has taken
+ * over its struct pid and thus its pid number.
+ */
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+ ASSERT_EQ(info.pid, pid_leader);
+
+ /* Take down the thread-group leader. */
+ EXPECT_EQ(sys_pidfd_send_signal(pidfd_leader, SIGKILL, NULL, 0), 0);
+
+ /*
+ * Afte the exec we're dealing with an empty thread-group so now
+ * we must see an exit notification on the thread-specific pidfd
+ * for the thread-group leader as there's no subthread that can
+ * revive the struct pid.
+ */
+ fds.events = POLLIN;
+ fds.fd = pidfd_leader_thread;
+ nevents = poll(&fds, 1, -1);
+ ASSERT_EQ(nevents, 1);
+ ASSERT_TRUE(!!(fds.revents & POLLIN));
+ ASSERT_FALSE(!!(fds.revents & POLLHUP));
+
+ EXPECT_EQ(sys_waitid(P_PIDFD, pidfd_leader, NULL, WEXITED), 0);
+
+ /* Retrieve exit information for the thread-group leader. */
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+
+ EXPECT_EQ(close(pidfd_leader), 0);
+ EXPECT_EQ(close(pidfd_thread), 0);
+}
+
+static void *pidfd_info_thread_exec_sane(void *arg)
+{
+ pid_t pid_thread = gettid();
+ int ipc_socket = *(int *)arg;
+
+ /* Inform the grand-parent what the tid of this thread is. */
+ if (write_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
+ return NULL;
+
+ if (read_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
+ return NULL;
+
+ close(ipc_socket);
+
+ sys_execveat(AT_FDCWD, "pidfd_exec_helper", NULL, NULL, 0);
+ return NULL;
+}
+
+TEST_F(pidfd_info, thread_group_exec_thread)
+{
+ pid_t pid_leader, pid_poller, pid_thread;
+ pthread_t thread;
+ int nevents, pidfd_leader, pidfd_leader_thread, pidfd_thread, ret;
+ int ipc_sockets[2];
+ struct pollfd fds = {};
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT,
+ };
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ EXPECT_EQ(ret, 0);
+
+ pid_leader = create_child(&pidfd_leader, 0);
+ EXPECT_GE(pid_leader, 0);
+
+ if (pid_leader == 0) {
+ close(ipc_sockets[0]);
+
+ /* The thread will outlive the thread-group leader. */
+ if (pthread_create(&thread, NULL, pidfd_info_thread_exec_sane, &ipc_sockets[1]))
+ syscall(__NR_exit, EXIT_FAILURE);
+
+ /*
+ * Pause the thread-group leader. It will be killed once
+ * the subthread execs.
+ */
+ pause();
+ syscall(__NR_exit, EXIT_SUCCESS);
+ }
+
+ /* Retrieve the tid of the thread. */
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
+
+ /* Opening a thread as a PIDFD_THREAD must succeed. */
+ pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD);
+ ASSERT_GE(pidfd_thread, 0);
+
+ /* Open a thread-specific pidfd for the thread-group leader. */
+ pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD);
+ ASSERT_GE(pidfd_leader_thread, 0);
+
+ pid_poller = fork();
+ ASSERT_GE(pid_poller, 0);
+ if (pid_poller == 0) {
+ /*
+ * The subthread will now exec. The struct pid of the old
+ * thread-group leader will be assumed by the subthread which
+ * becomes the new thread-group leader. So no exit notification
+ * must be generated. Wait for 5 seconds and call it a success
+ * if no notification has been received.
+ */
+ fds.events = POLLIN;
+ fds.fd = pidfd_leader_thread;
+ nevents = poll(&fds, 1, 10000 /* wait 5 seconds */);
+ if (nevents != 0)
+ _exit(EXIT_FAILURE);
+ if (fds.revents & POLLIN)
+ _exit(EXIT_FAILURE);
+ if (fds.revents & POLLHUP)
+ _exit(EXIT_FAILURE);
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* Now that we've opened a thread-specific pidfd the thread can exec. */
+ ASSERT_EQ(write_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+ ASSERT_EQ(wait_for_pid(pid_poller), 0);
+
+ /* Wait until the kernel has SIGKILLed the thread. */
+ fds.events = POLLHUP;
+ fds.fd = pidfd_thread;
+ nevents = poll(&fds, 1, -1);
+ ASSERT_EQ(nevents, 1);
+ /* The thread has been reaped. */
+ ASSERT_TRUE(!!(fds.revents & POLLHUP));
+
+ /* Retrieve thread-specific exit info from pidfd. */
+ ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+ /*
+ * While the kernel will have SIGKILLed the whole thread-group
+ * during exec it will cause the individual threads to exit
+ * cleanly.
+ */
+ ASSERT_TRUE(WIFEXITED(info.exit_code));
+ ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
+
+ /*
+ * The thread-group leader is still alive, the thread has taken
+ * over its struct pid and thus its pid number.
+ */
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+ ASSERT_EQ(info.pid, pid_leader);
+
+ /* Take down the thread-group leader. */
+ EXPECT_EQ(sys_pidfd_send_signal(pidfd_leader, SIGKILL, NULL, 0), 0);
+
+ /*
+ * Afte the exec we're dealing with an empty thread-group so now
+ * we must see an exit notification on the thread-specific pidfd
+ * for the thread-group leader as there's no subthread that can
+ * revive the struct pid.
+ */
+ fds.events = POLLIN;
+ fds.fd = pidfd_leader_thread;
+ nevents = poll(&fds, 1, -1);
+ ASSERT_EQ(nevents, 1);
+ ASSERT_TRUE(!!(fds.revents & POLLIN));
+ ASSERT_FALSE(!!(fds.revents & POLLHUP));
+
+ EXPECT_EQ(sys_waitid(P_PIDFD, pidfd_leader, NULL, WEXITED), 0);
+
+ /* Retrieve exit information for the thread-group leader. */
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+
+ EXPECT_EQ(close(pidfd_leader), 0);
+ EXPECT_EQ(close(pidfd_thread), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c
index ce413a221bac..cd3de40e4977 100644
--- a/tools/testing/selftests/pidfd/pidfd_open_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_open_test.c
@@ -22,32 +22,6 @@
#include "pidfd.h"
#include "../kselftest.h"
-#ifndef PIDFS_IOCTL_MAGIC
-#define PIDFS_IOCTL_MAGIC 0xFF
-#endif
-
-#ifndef PIDFD_GET_INFO
-#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
-#define PIDFD_INFO_CGROUPID (1UL << 0)
-
-struct pidfd_info {
- __u64 request_mask;
- __u64 cgroupid;
- __u32 pid;
- __u32 tgid;
- __u32 ppid;
- __u32 ruid;
- __u32 rgid;
- __u32 euid;
- __u32 egid;
- __u32 suid;
- __u32 sgid;
- __u32 fsuid;
- __u32 fsgid;
- __u32 spare0[1];
-};
-#endif
-
static int safe_int(const char *numstr, int *converted)
{
char *err = NULL;
@@ -148,7 +122,7 @@ out:
int main(int argc, char **argv)
{
struct pidfd_info info = {
- .request_mask = PIDFD_INFO_CGROUPID,
+ .mask = PIDFD_INFO_CGROUPID,
};
int pidfd = -1, ret = 1;
pid_t pid;
@@ -227,7 +201,7 @@ int main(int argc, char **argv)
getegid(), info.sgid);
goto on_error;
}
- if ((info.request_mask & PIDFD_INFO_CGROUPID) && info.cgroupid == 0) {
+ if ((info.mask & PIDFD_INFO_CGROUPID) && info.cgroupid == 0) {
ksft_print_msg("cgroupid should not be 0 when PIDFD_INFO_CGROUPID is set\n");
goto on_error;
}
diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c
index 222f8131283b..e6a079b3d5e2 100644
--- a/tools/testing/selftests/pidfd/pidfd_setns_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -16,55 +16,10 @@
#include <unistd.h>
#include <sys/socket.h>
#include <sys/stat.h>
-#include <linux/ioctl.h>
#include "pidfd.h"
#include "../kselftest_harness.h"
-#ifndef PIDFS_IOCTL_MAGIC
-#define PIDFS_IOCTL_MAGIC 0xFF
-#endif
-
-#ifndef PIDFD_GET_CGROUP_NAMESPACE
-#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1)
-#endif
-
-#ifndef PIDFD_GET_IPC_NAMESPACE
-#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2)
-#endif
-
-#ifndef PIDFD_GET_MNT_NAMESPACE
-#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3)
-#endif
-
-#ifndef PIDFD_GET_NET_NAMESPACE
-#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4)
-#endif
-
-#ifndef PIDFD_GET_PID_NAMESPACE
-#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5)
-#endif
-
-#ifndef PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE
-#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6)
-#endif
-
-#ifndef PIDFD_GET_TIME_NAMESPACE
-#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7)
-#endif
-
-#ifndef PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE
-#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
-#endif
-
-#ifndef PIDFD_GET_USER_NAMESPACE
-#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9)
-#endif
-
-#ifndef PIDFD_GET_UTS_NAMESPACE
-#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10)
-#endif
-
enum {
PIDFD_NS_USER,
PIDFD_NS_MNT,
diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
index 9faa686f90e4..fcd85cad9f18 100644
--- a/tools/testing/selftests/pidfd/pidfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_test.c
@@ -42,12 +42,41 @@ static pid_t pidfd_clone(int flags, int *pidfd, int (*fn)(void *))
#endif
}
-static int signal_received;
+static pthread_t signal_received;
static void set_signal_received_on_sigusr1(int sig)
{
if (sig == SIGUSR1)
- signal_received = 1;
+ signal_received = pthread_self();
+}
+
+static int send_signal(int pidfd)
+{
+ int ret = 0;
+
+ if (sys_pidfd_send_signal(pidfd, SIGUSR1, NULL, 0) < 0) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (signal_received != pthread_self()) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+exit:
+ signal_received = 0;
+ return ret;
+}
+
+static void *send_signal_worker(void *arg)
+{
+ int pidfd = (int)(intptr_t)arg;
+ int ret;
+
+ /* We forward any errors for the caller to handle. */
+ ret = send_signal(pidfd);
+ return (void *)(intptr_t)ret;
}
/*
@@ -56,8 +85,11 @@ static void set_signal_received_on_sigusr1(int sig)
*/
static int test_pidfd_send_signal_simple_success(void)
{
- int pidfd, ret;
+ int pidfd;
const char *test_name = "pidfd_send_signal send SIGUSR1";
+ pthread_t thread;
+ void *thread_res;
+ int err;
if (!have_pidfd_send_signal) {
ksft_test_result_skip(
@@ -66,25 +98,45 @@ static int test_pidfd_send_signal_simple_success(void)
return 0;
}
+ signal(SIGUSR1, set_signal_received_on_sigusr1);
+
+ /* Try sending a signal to ourselves via /proc/self. */
pidfd = open("/proc/self", O_DIRECTORY | O_CLOEXEC);
if (pidfd < 0)
ksft_exit_fail_msg(
"%s test: Failed to open process file descriptor\n",
test_name);
+ err = send_signal(pidfd);
+ if (err)
+ ksft_exit_fail_msg(
+ "%s test: Error %d on sending pidfd signal\n",
+ test_name, err);
+ close(pidfd);
- signal(SIGUSR1, set_signal_received_on_sigusr1);
+ /* Now try the same thing only using PIDFD_SELF_THREAD_GROUP. */
+ err = send_signal(PIDFD_SELF_THREAD_GROUP);
+ if (err)
+ ksft_exit_fail_msg(
+ "%s test: Error %d on PIDFD_SELF_THREAD_GROUP signal\n",
+ test_name, err);
- ret = sys_pidfd_send_signal(pidfd, SIGUSR1, NULL, 0);
- close(pidfd);
- if (ret < 0)
- ksft_exit_fail_msg("%s test: Failed to send signal\n",
+ /*
+ * Now try the same thing in a thread and assert thread ID is equal to
+ * worker thread ID.
+ */
+ if (pthread_create(&thread, NULL, send_signal_worker,
+ (void *)(intptr_t)PIDFD_SELF_THREAD))
+ ksft_exit_fail_msg("%s test: Failed to create thread\n",
test_name);
-
- if (signal_received != 1)
- ksft_exit_fail_msg("%s test: Failed to receive signal\n",
+ if (pthread_join(thread, &thread_res))
+ ksft_exit_fail_msg("%s test: Failed to join thread\n",
test_name);
+ err = (int)(intptr_t)thread_res;
+ if (err)
+ ksft_exit_fail_msg(
+ "%s test: Error %d on PIDFD_SELF_THREAD signal\n",
+ test_name, err);
- signal_received = 0;
ksft_test_result_pass("%s test: Sent signal\n", test_name);
return 0;
}
@@ -497,7 +549,7 @@ static int child_poll_leader_exit_test(void *args)
pthread_create(&t2, NULL, test_pidfd_poll_leader_exit_thread, NULL);
/*
- * glibc exit calls exit_group syscall, so explicity call exit only
+ * glibc exit calls exit_group syscall, so explicitly call exit only
* so that only the group leader exits, leaving the threads alone.
*/
*child_exit_secs = time(NULL);
diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h
index 3a0129467de6..d6deb6ffa1b9 100644
--- a/tools/testing/selftests/powerpc/include/pkeys.h
+++ b/tools/testing/selftests/powerpc/include/pkeys.h
@@ -24,6 +24,9 @@
#undef PKEY_DISABLE_EXECUTE
#define PKEY_DISABLE_EXECUTE 0x4
+#undef PKEY_UNRESTRICTED
+#define PKEY_UNRESTRICTED 0x0
+
/* Older versions of libc do not define this */
#ifndef SEGV_PKUERR
#define SEGV_PKUERR 4
@@ -93,7 +96,7 @@ int pkeys_unsupported(void)
SKIP_IF(!hash_mmu);
/* Check if the system call is supported */
- pkey = sys_pkey_alloc(0, 0);
+ pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
SKIP_IF(pkey < 0);
sys_pkey_free(pkey);
diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
index 0af4f02669a1..29b91b7456eb 100644
--- a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
+++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
@@ -72,7 +72,7 @@ static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
switch (fault_type) {
case PKEY_DISABLE_ACCESS:
- pkey_set_rights(fault_pkey, 0);
+ pkey_set_rights(fault_pkey, PKEY_UNRESTRICTED);
break;
case PKEY_DISABLE_EXECUTE:
/*
diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
index 2db76e56d4cb..e89a164c686b 100644
--- a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
+++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
@@ -83,7 +83,7 @@ static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
mprotect(pgstart, pgsize, PROT_EXEC))
_exit(1);
else
- pkey_set_rights(pkey, 0);
+ pkey_set_rights(pkey, PKEY_UNRESTRICTED);
fault_count++;
}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c
index 8be7aada6523..355f8bbe06c3 100644
--- a/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c
@@ -26,6 +26,7 @@ static int event_alternatives_tests_p10(void)
{
struct event *e, events[5];
int i;
+ int pvr = PVR_VER(mfspr(SPRN_PVR));
/* Check for platform support for the test */
SKIP_IF(platform_check_for_tests());
@@ -36,7 +37,7 @@ static int event_alternatives_tests_p10(void)
* code and using PVR will work correctly for all cases
* including generic compat mode.
*/
- SKIP_IF(PVR_VER(mfspr(SPRN_PVR)) != POWER10);
+ SKIP_IF((pvr != POWER10) && (pvr != POWER11));
SKIP_IF(check_for_generic_compat_pmu());
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c
index 0d237c15d3f2..a378fa9a5a7b 100644
--- a/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c
@@ -17,6 +17,7 @@
static int generic_events_valid_test(void)
{
struct event event;
+ int pvr = mfspr(SPRN_PVR);
/* Check for platform support for the test */
SKIP_IF(platform_check_for_tests());
@@ -31,7 +32,7 @@ static int generic_events_valid_test(void)
* - PERF_COUNT_HW_STALLED_CYCLES_BACKEND
* - PERF_COUNT_HW_REF_CPU_CYCLES
*/
- if (PVR_VER(mfspr(SPRN_PVR)) == POWER10) {
+ if ((pvr == POWER10) || (pvr == POWER11)) {
event_init_opts(&event, PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "event");
FAIL_IF(event_open(&event));
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c
index 85a636886069..e3c7a0c071e2 100644
--- a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c
@@ -30,7 +30,7 @@ static int group_constraint_l2l3_sel(void)
/*
* Check for platform support for the test.
- * This test is only aplicable on power10
+ * This test is only aplicable on ISA v3.1
*/
SKIP_IF(platform_check_for_tests());
SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c
index 9225618b846a..9233175787cc 100644
--- a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c
@@ -26,7 +26,7 @@ static int group_constraint_radix_scope_qual(void)
/*
* Check for platform support for the test.
- * This test is aplicable on power10 only.
+ * This test is aplicable on ISA v3.1 only.
*/
SKIP_IF(platform_check_for_tests());
SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c
index 9f1197104e8c..4b69e7214c0b 100644
--- a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c
@@ -25,7 +25,7 @@
/*
* Testcase for group constraint check of thresh_cmp bits which is
* used to program thresh compare field in Monitor Mode Control Register A
- * (MMCRA: 9-18 bits for power9 and MMCRA: 8-18 bits for power10).
+ * (MMCRA: 9-18 bits for power9 and MMCRA: 8-18 bits for power10/power11).
* All events in the group should match thresh compare bits otherwise
* event_open for the group will fail.
*/
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c
index f51fcab837fc..88aa7fd64ec1 100644
--- a/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c
@@ -20,7 +20,7 @@
* Some of the bits in the event code is
* reserved for specific platforms.
* Event code bits 52-59 are reserved in power9,
- * whereas in power10, these are used for programming
+ * whereas in ISA v3.1, these are used for programming
* Monitor Mode Control Register 3 (MMCR3).
* Bit 9 in event code is reserved in power9,
* whereas it is used for programming "radix_scope_qual"
@@ -39,7 +39,7 @@ static int invalid_event_code(void)
/*
* Events using MMCR3 bits and radix scope qual bits
- * should fail in power9 and should succeed in power10.
+ * should fail in power9 and should succeed in power10 ( ISA v3.1 )
* Init the events and check for pass/fail in event open.
*/
if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) {
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c
index 4c119c821b99..757f454c0dc0 100644
--- a/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c
@@ -21,6 +21,7 @@
static int reserved_bits_mmcra_sample_elig_mode(void)
{
struct event event;
+ int pvr = PVR_VER(mfspr(SPRN_PVR));
/* Check for platform support for the test */
SKIP_IF(platform_check_for_tests());
@@ -56,10 +57,10 @@ static int reserved_bits_mmcra_sample_elig_mode(void)
/*
* MMCRA Random Sampling Mode (SM) value 0x10
- * is reserved in power10 and 0xC is reserved in
+ * is reserved in power10/power11 and 0xC is reserved in
* power9.
*/
- if (PVR_VER(mfspr(SPRN_PVR)) == POWER10) {
+ if ((pvr == POWER10) || (pvr == POWER11)) {
event_init(&event, 0x100401e0);
FAIL_IF(!event_open(&event));
} else if (PVR_VER(mfspr(SPRN_PVR)) == POWER9) {
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
index 9f79bec5fce7..0c4ed299c3b8 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
@@ -5,7 +5,8 @@ TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test
mmcr3_src_test mmcra_thresh_marked_sample_test mmcra_thresh_cmp_test \
mmcra_bhrb_ind_call_test mmcra_bhrb_any_test mmcra_bhrb_cond_test \
mmcra_bhrb_disable_test bhrb_no_crash_wo_pmu_test intr_regs_no_crash_wo_pmu_test \
- bhrb_filter_map_test mmcr1_sel_unit_cache_test mmcra_bhrb_disable_no_branch_test
+ bhrb_filter_map_test mmcr1_sel_unit_cache_test mmcra_bhrb_disable_no_branch_test \
+ check_extended_reg_test
top_srcdir = ../../../../../..
include ../../../lib.mk
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c
index 3f43c315c666..3ad71d49ae65 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c
@@ -14,7 +14,7 @@
* A perf sampling test to check bhrb filter
* map. All the branch filters are not supported
* in powerpc. Supported filters in:
- * power10: any, any_call, ind_call, cond
+ * power10/power11: any, any_call, ind_call, cond
* power9: any, any_call
*
* Testcase checks event open for invalid bhrb filter
@@ -24,13 +24,13 @@
*/
/* Invalid types for powerpc */
-/* Valid bhrb filters in power9/power10 */
+/* Valid bhrb filters in power9/power10/power11 */
int bhrb_filter_map_valid_common[] = {
PERF_SAMPLE_BRANCH_ANY,
PERF_SAMPLE_BRANCH_ANY_CALL,
};
-/* Valid bhrb filters in power10 */
+/* Valid bhrb filters in power10/power11 */
int bhrb_filter_map_valid_p10[] = {
PERF_SAMPLE_BRANCH_IND_CALL,
PERF_SAMPLE_BRANCH_COND,
@@ -69,7 +69,7 @@ static int bhrb_filter_map_test(void)
FAIL_IF(!event_open(&event));
}
- /* valid filter maps for power9/power10 which are expected to pass in event_open */
+ /* valid filter maps for power9/power10/power11 which are expected to pass in event_open */
for (i = 0; i < ARRAY_SIZE(bhrb_filter_map_valid_common); i++) {
event.attr.branch_sample_type = bhrb_filter_map_valid_common[i];
FAIL_IF(event_open(&event));
@@ -77,19 +77,22 @@ static int bhrb_filter_map_test(void)
}
/*
- * filter maps which are valid in power10 and invalid in power9.
+ * filter maps which are valid in power10/power11 and invalid in power9.
* PVR check is used here since PMU specific data like bhrb filter
* alternative tests is handled by respective PMU driver code and
* using PVR will work correctly for all cases including generic
* compat mode.
*/
- if (PVR_VER(mfspr(SPRN_PVR)) == POWER10) {
+ switch (PVR_VER(mfspr(SPRN_PVR))) {
+ case POWER11:
+ case POWER10:
for (i = 0; i < ARRAY_SIZE(bhrb_filter_map_valid_p10); i++) {
event.attr.branch_sample_type = bhrb_filter_map_valid_p10[i];
FAIL_IF(event_open(&event));
event_close(&event);
}
- } else {
+ break;
+ default:
for (i = 0; i < ARRAY_SIZE(bhrb_filter_map_valid_p10); i++) {
event.attr.branch_sample_type = bhrb_filter_map_valid_p10[i];
FAIL_IF(!event_open(&event));
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/check_extended_reg_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/check_extended_reg_test.c
new file mode 100644
index 000000000000..865bc69f920c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/check_extended_reg_test.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2024, Kajol Jain, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+/*
+ * A perf sampling test to check extended
+ * reg support.
+ */
+static int check_extended_reg_test(void)
+{
+ /* Check for platform support for the test */
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
+
+ /* Skip for Generic compat PMU */
+ SKIP_IF(check_for_generic_compat_pmu());
+
+ /* Check if platform supports extended regs */
+ platform_extended_mask = perf_get_platform_reg_mask();
+ FAIL_IF(check_extended_regs_support());
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(check_extended_reg_test, "check_extended_reg_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
index eac6420abdf1..8a538b6182a1 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
@@ -59,6 +59,7 @@ static void init_ev_encodes(void)
ev_shift_thd_stop = 32;
switch (pvr) {
+ case POWER11:
case POWER10:
ev_mask_thd_cmp = 0x3ffff;
ev_shift_thd_cmp = 0;
@@ -91,7 +92,7 @@ static void init_ev_encodes(void)
}
/* Return the extended regs mask value */
-static u64 perf_get_platform_reg_mask(void)
+u64 perf_get_platform_reg_mask(void)
{
if (have_hwcap2(PPC_FEATURE2_ARCH_3_1))
return PERF_POWER10_MASK;
@@ -129,8 +130,14 @@ int platform_check_for_tests(void)
* Check for supported platforms
* for sampling test
*/
- if ((pvr != POWER10) && (pvr != POWER9))
+ switch (pvr) {
+ case POWER11:
+ case POWER10:
+ case POWER9:
+ break;
+ default:
goto out;
+ }
/*
* Check PMU driver registered by looking for
@@ -490,6 +497,13 @@ int get_thresh_cmp_val(struct event event)
* Utility function to check for generic compat PMU
* by comparing base_platform value from auxv and real
* PVR value.
+ * auxv_base_platform() func gives information of "base platform"
+ * corresponding to PVR value. Incase, if the distro doesn't
+ * support platform PVR (missing cputable support), base platform
+ * in auxv will have a default value other than the real PVR's.
+ * In this case, ISAv3 PMU (generic compat PMU) will be registered
+ * in the system. auxv_generic_compat_pmu() makes use of the base
+ * platform value from auxv to do this check.
*/
static bool auxv_generic_compat_pmu(void)
{
@@ -499,6 +513,8 @@ static bool auxv_generic_compat_pmu(void)
base_pvr = POWER9;
else if (!strcmp(auxv_base_platform(), "power10"))
base_pvr = POWER10;
+ else if (!strcmp(auxv_base_platform(), "power11"))
+ base_pvr = POWER11;
return (!base_pvr);
}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
index 64e25cce1435..357e9f0fc0f7 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
@@ -8,10 +8,12 @@
#include <sys/stat.h>
#include "../event.h"
+#define POWER11 0x82
#define POWER10 0x80
#define POWER9 0x4e
#define PERF_POWER9_MASK 0x7f8ffffffffffff
#define PERF_POWER10_MASK 0x7ffffffffffffff
+#define PERF_POWER11_MASK PERF_POWER10_MASK
#define MMCR0_FC56 0x00000010UL /* freeze counters 5 and 6 */
#define MMCR0_PMCCEXT 0x00000200UL /* PMCCEXT control */
@@ -37,6 +39,8 @@ extern int pvr;
extern u64 platform_extended_mask;
extern int check_pvr_for_sampling_tests(void);
extern int platform_check_for_tests(void);
+extern int check_extended_regs_support(void);
+extern u64 perf_get_platform_reg_mask(void);
/*
* Event code field extraction macro.
@@ -165,21 +169,21 @@ static inline int get_mmcr2_fcta(u64 mmcr2, int pmc)
static inline int get_mmcr2_l2l3(u64 mmcr2, int pmc)
{
- if (pvr == POWER10)
+ if (have_hwcap2(PPC_FEATURE2_ARCH_3_1))
return ((mmcr2 & 0xf8) >> 3);
return 0;
}
static inline int get_mmcr3_src(u64 mmcr3, int pmc)
{
- if (pvr != POWER10)
+ if (!have_hwcap2(PPC_FEATURE2_ARCH_3_1))
return 0;
return ((mmcr3 >> ((49 - (15 * ((pmc) - 1))))) & 0x7fff);
}
static inline int get_mmcra_thd_cmp(u64 mmcra, int pmc)
{
- if (pvr == POWER10)
+ if (have_hwcap2(PPC_FEATURE2_ARCH_3_1))
return ((mmcra >> 45) & 0x7ff);
return ((mmcra >> 45) & 0x3ff);
}
@@ -191,7 +195,7 @@ static inline int get_mmcra_sm(u64 mmcra, int pmc)
static inline u64 get_mmcra_bhrb_disable(u64 mmcra, int pmc)
{
- if (pvr == POWER10)
+ if (have_hwcap2(PPC_FEATURE2_ARCH_3_1))
return mmcra & BHRB_DISABLE;
return 0;
}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c
index 3e08176eb7f8..809de8d58b3b 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c
@@ -29,7 +29,7 @@ static int mmcra_bhrb_cond_test(void)
/*
* Check for platform support for the test.
- * This test is only aplicable on power10
+ * This test is only aplicable on ISA v3.1
*/
SKIP_IF(check_pvr_for_sampling_tests());
SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c
index 488c865387e4..fa0dc15f9123 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c
@@ -26,7 +26,7 @@ static int mmcra_bhrb_disable_no_branch_test(void)
/*
* Check for platform support for the test.
- * This test is only aplicable on power10
+ * This test is only aplicable on ISA v3.1
*/
SKIP_IF(check_pvr_for_sampling_tests());
SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c
index 186a853c0f62..bc3161ab003d 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c
@@ -26,7 +26,7 @@ static int mmcra_bhrb_disable_test(void)
/*
* Check for platform support for the test.
- * This test is only aplicable on power10
+ * This test is only aplicable on ISA v3.1
*/
SKIP_IF(check_pvr_for_sampling_tests());
SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c
index f0706730c099..fd6c9f12212c 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c
@@ -29,7 +29,7 @@ static int mmcra_bhrb_ind_call_test(void)
/*
* Check for platform support for the test.
- * This test is only aplicable on power10
+ * This test is only aplicable on ISA v3.1
*/
SKIP_IF(check_pvr_for_sampling_tests());
SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
index f061434af452..7ff53caeb4aa 100644
--- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -95,16 +95,16 @@ static int child(struct shared_info *info)
/* Get some pkeys so that we can change their bits in the AMR. */
pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
if (pkey1 < 0) {
- pkey1 = sys_pkey_alloc(0, 0);
+ pkey1 = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
FAIL_IF(pkey1 < 0);
disable_execute = false;
}
- pkey2 = sys_pkey_alloc(0, 0);
+ pkey2 = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
FAIL_IF(pkey2 < 0);
- pkey3 = sys_pkey_alloc(0, 0);
+ pkey3 = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
FAIL_IF(pkey3 < 0);
info->amr |= 3ul << pkeyshift(pkey1) | 2ul << pkeyshift(pkey2);
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
index fc633014424f..10f63042cf91 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -57,16 +57,16 @@ static int child(struct shared_info *info)
/* Get some pkeys so that we can change their bits in the AMR. */
pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
if (pkey1 < 0) {
- pkey1 = sys_pkey_alloc(0, 0);
+ pkey1 = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
CHILD_FAIL_IF(pkey1 < 0, &info->child_sync);
disable_execute = false;
}
- pkey2 = sys_pkey_alloc(0, 0);
+ pkey2 = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
CHILD_FAIL_IF(pkey2 < 0, &info->child_sync);
- pkey3 = sys_pkey_alloc(0, 0);
+ pkey3 = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
CHILD_FAIL_IF(pkey3 < 0, &info->child_sync);
info->amr1 |= 3ul << pkeyshift(pkey1);
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index 58064151f2c8..edc08a4433fd 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -140,6 +140,7 @@ static void usage(char *progname)
" -H val set output phase to 'val' nanoseconds (requires -p)\n"
" -w val set output pulse width to 'val' nanoseconds (requires -p)\n"
" -P val enable or disable (val=1|0) the system clock PPS\n"
+ " -r open the ptp clock in readonly mode\n"
" -s set the ptp clock time from the system time\n"
" -S set the system time from the ptp clock time\n"
" -t val shift the ptp clock time by 'val' seconds\n"
@@ -188,6 +189,7 @@ int main(int argc, char *argv[])
int pin_index = -1, pin_func;
int pps = -1;
int seconds = 0;
+ int readonly = 0;
int settime = 0;
int channel = -1;
clockid_t ext_clockid = CLOCK_REALTIME;
@@ -200,7 +202,7 @@ int main(int argc, char *argv[])
progname = strrchr(argv[0], '/');
progname = progname ? 1+progname : argv[0];
- while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:sSt:T:w:x:Xy:z"))) {
+ while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xy:z"))) {
switch (c) {
case 'c':
capabilities = 1;
@@ -252,6 +254,9 @@ int main(int argc, char *argv[])
case 'P':
pps = atoi(optarg);
break;
+ case 'r':
+ readonly = 1;
+ break;
case 's':
settime = 1;
break;
@@ -308,7 +313,7 @@ int main(int argc, char *argv[])
}
}
- fd = open(device, O_RDWR);
+ fd = open(device, readonly ? O_RDONLY : O_RDWR);
if (fd < 0) {
fprintf(stderr, "opening %s: %s\n", device, strerror(errno));
return -1;
@@ -436,14 +441,16 @@ int main(int argc, char *argv[])
}
if (extts) {
- memset(&extts_request, 0, sizeof(extts_request));
- extts_request.index = index;
- extts_request.flags = PTP_ENABLE_FEATURE;
- if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
- perror("PTP_EXTTS_REQUEST");
- extts = 0;
- } else {
- puts("external time stamp request okay");
+ if (!readonly) {
+ memset(&extts_request, 0, sizeof(extts_request));
+ extts_request.index = index;
+ extts_request.flags = PTP_ENABLE_FEATURE;
+ if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
+ perror("PTP_EXTTS_REQUEST");
+ extts = 0;
+ } else {
+ puts("external time stamp request okay");
+ }
}
for (; extts; extts--) {
cnt = read(fd, &event, sizeof(event));
@@ -455,10 +462,12 @@ int main(int argc, char *argv[])
event.t.sec, event.t.nsec);
fflush(stdout);
}
- /* Disable the feature again. */
- extts_request.flags = 0;
- if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
- perror("PTP_EXTTS_REQUEST");
+ if (!readonly) {
+ /* Disable the feature again. */
+ extts_request.flags = 0;
+ if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
+ perror("PTP_EXTTS_REQUEST");
+ }
}
}
diff --git a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
index 2e63ef009d59..2db12c5cad9c 100755
--- a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
+++ b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
@@ -49,7 +49,7 @@ do
do
err=
val=$((d*1000+t*10+c))
- tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --bootargs "rcutorture.test_srcu_lockdep=$val" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1
+ tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.test_srcu_lockdep=$val rcutorture.reader_flavor=0x2" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1
ret=$?
mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val"
if test "$d" -ne 0 && test "$ret" -eq 0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
index 2db39f298d18..fb61703690cb 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
@@ -2,3 +2,4 @@ rcutorture.torture_type=srcud
rcupdate.rcu_self_test=1
rcutorture.fwd_progress=3
srcutree.big_cpu_lim=5
+rcutorture.reader_flavor=0x8
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
index c419cac233ee..54f5c9053474 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
@@ -2,3 +2,9 @@ rcutree.gp_preinit_delay=3
rcutree.gp_init_delay=3
rcutree.gp_cleanup_delay=3
rcupdate.rcu_self_test=1
+
+# This part is for synchronize_rcu() testing
+rcutorture.nfakewriters=-1
+rcutorture.gp_sync=1
+rcupdate.rcu_normal=1
+rcutree.rcu_normal_wake_from_gp=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
index d30922d8c883..352393bc5c56 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -1,7 +1,8 @@
CONFIG_SMP=y
CONFIG_NR_CPUS=16
-CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT_LAZY=y
CONFIG_PREEMPT=n
CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TREE_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
index 759ee51d3ddc..420632b030dc 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
@@ -1,6 +1,7 @@
CONFIG_SMP=y
CONFIG_NR_CPUS=74
-CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_LAZY=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
CONFIG_PREEMPT_DYNAMIC=n
diff --git a/tools/testing/selftests/riscv/vector/.gitignore b/tools/testing/selftests/riscv/vector/.gitignore
index 9ae7964491d5..7d9c87cd0649 100644
--- a/tools/testing/selftests/riscv/vector/.gitignore
+++ b/tools/testing/selftests/riscv/vector/.gitignore
@@ -1,3 +1,4 @@
vstate_exec_nolibc
vstate_prctl
-v_initval_nolibc
+v_initval
+v_exec_initval_nolibc
diff --git a/tools/testing/selftests/riscv/vector/Makefile b/tools/testing/selftests/riscv/vector/Makefile
index bfff0ff4f3be..6f7497f4e7b3 100644
--- a/tools/testing/selftests/riscv/vector/Makefile
+++ b/tools/testing/selftests/riscv/vector/Makefile
@@ -2,18 +2,27 @@
# Copyright (C) 2021 ARM Limited
# Originally tools/testing/arm64/abi/Makefile
-TEST_GEN_PROGS := vstate_prctl v_initval_nolibc
-TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc
+TEST_GEN_PROGS := v_initval vstate_prctl
+TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc v_exec_initval_nolibc
include ../../lib.mk
-$(OUTPUT)/vstate_prctl: vstate_prctl.c ../hwprobe/sys_hwprobe.S
+$(OUTPUT)/sys_hwprobe.o: ../hwprobe/sys_hwprobe.S
+ $(CC) -static -c -o$@ $(CFLAGS) $^
+
+$(OUTPUT)/v_helpers.o: v_helpers.c
+ $(CC) -static -c -o$@ $(CFLAGS) $^
+
+$(OUTPUT)/vstate_prctl: vstate_prctl.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o
$(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
$(OUTPUT)/vstate_exec_nolibc: vstate_exec_nolibc.c
$(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \
-Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc
-$(OUTPUT)/v_initval_nolibc: v_initval_nolibc.c
+$(OUTPUT)/v_initval: v_initval.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o
+ $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+
+$(OUTPUT)/v_exec_initval_nolibc: v_exec_initval_nolibc.c
$(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \
-Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc
diff --git a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c
new file mode 100644
index 000000000000..35c0812e32de
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Get values of vector registers as soon as the program starts to test if
+ * is properly cleaning the values before starting a new program. Vector
+ * registers are caller saved, so no function calls may happen before reading
+ * the values. To further ensure consistency, this file is compiled without
+ * libc and without auto-vectorization.
+ *
+ * To be "clean" all values must be either all ones or all zeroes.
+ */
+
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+
+int main(int argc, char **argv)
+{
+ char prev_value = 0, value;
+ unsigned long vl;
+ int first = 1;
+
+ if (argc > 2 && strcmp(argv[2], "x"))
+ asm volatile (
+ // 0 | zimm[10:0] | rs1 | 1 1 1 | rd |1010111| vsetvli
+ // vsetvli t4, x0, e8, m1, d1
+ ".4byte 0b00000000000000000111111011010111\n\t"
+ "mv %[vl], t4\n\t"
+ : [vl] "=r" (vl) : : "t4"
+ );
+ else
+ asm volatile (
+ ".option push\n\t"
+ ".option arch, +v\n\t"
+ "vsetvli %[vl], x0, e8, m1, ta, ma\n\t"
+ ".option pop\n\t"
+ : [vl] "=r" (vl)
+ );
+
+#define CHECK_VECTOR_REGISTER(register) ({ \
+ for (int i = 0; i < vl; i++) { \
+ asm volatile ( \
+ ".option push\n\t" \
+ ".option arch, +v\n\t" \
+ "vmv.x.s %0, " __stringify(register) "\n\t" \
+ "vsrl.vi " __stringify(register) ", " __stringify(register) ", 8\n\t" \
+ ".option pop\n\t" \
+ : "=r" (value)); \
+ if (first) { \
+ first = 0; \
+ } else if (value != prev_value || !(value == 0x00 || value == 0xff)) { \
+ printf("Register " __stringify(register) \
+ " values not clean! value: %u\n", value); \
+ exit(-1); \
+ } \
+ prev_value = value; \
+ } \
+})
+
+ CHECK_VECTOR_REGISTER(v0);
+ CHECK_VECTOR_REGISTER(v1);
+ CHECK_VECTOR_REGISTER(v2);
+ CHECK_VECTOR_REGISTER(v3);
+ CHECK_VECTOR_REGISTER(v4);
+ CHECK_VECTOR_REGISTER(v5);
+ CHECK_VECTOR_REGISTER(v6);
+ CHECK_VECTOR_REGISTER(v7);
+ CHECK_VECTOR_REGISTER(v8);
+ CHECK_VECTOR_REGISTER(v9);
+ CHECK_VECTOR_REGISTER(v10);
+ CHECK_VECTOR_REGISTER(v11);
+ CHECK_VECTOR_REGISTER(v12);
+ CHECK_VECTOR_REGISTER(v13);
+ CHECK_VECTOR_REGISTER(v14);
+ CHECK_VECTOR_REGISTER(v15);
+ CHECK_VECTOR_REGISTER(v16);
+ CHECK_VECTOR_REGISTER(v17);
+ CHECK_VECTOR_REGISTER(v18);
+ CHECK_VECTOR_REGISTER(v19);
+ CHECK_VECTOR_REGISTER(v20);
+ CHECK_VECTOR_REGISTER(v21);
+ CHECK_VECTOR_REGISTER(v22);
+ CHECK_VECTOR_REGISTER(v23);
+ CHECK_VECTOR_REGISTER(v24);
+ CHECK_VECTOR_REGISTER(v25);
+ CHECK_VECTOR_REGISTER(v26);
+ CHECK_VECTOR_REGISTER(v27);
+ CHECK_VECTOR_REGISTER(v28);
+ CHECK_VECTOR_REGISTER(v29);
+ CHECK_VECTOR_REGISTER(v30);
+ CHECK_VECTOR_REGISTER(v31);
+
+#undef CHECK_VECTOR_REGISTER
+
+ return 0;
+}
diff --git a/tools/testing/selftests/riscv/vector/v_helpers.c b/tools/testing/selftests/riscv/vector/v_helpers.c
new file mode 100644
index 000000000000..01a8799dcb78
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/v_helpers.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "../hwprobe/hwprobe.h"
+#include <asm/vendor/thead.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+bool is_xtheadvector_supported(void)
+{
+ struct riscv_hwprobe pair;
+
+ pair.key = RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0;
+ riscv_hwprobe(&pair, 1, 0, NULL, 0);
+ return pair.value & RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR;
+}
+
+bool is_vector_supported(void)
+{
+ struct riscv_hwprobe pair;
+
+ pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
+ riscv_hwprobe(&pair, 1, 0, NULL, 0);
+ return pair.value & RISCV_HWPROBE_EXT_ZVE32X;
+}
+
+int launch_test(char *next_program, int test_inherit, int xtheadvector)
+{
+ char *exec_argv[4], *exec_envp[1];
+ int rc, pid, status;
+
+ pid = fork();
+ if (pid < 0) {
+ printf("fork failed %d", pid);
+ return -1;
+ }
+
+ if (!pid) {
+ exec_argv[0] = next_program;
+ exec_argv[1] = test_inherit != 0 ? "x" : NULL;
+ exec_argv[2] = xtheadvector != 0 ? "x" : NULL;
+ exec_argv[3] = NULL;
+ exec_envp[0] = NULL;
+ /* launch the program again to check inherit */
+ rc = execve(next_program, exec_argv, exec_envp);
+ if (rc) {
+ perror("execve");
+ printf("child execve failed %d\n", rc);
+ exit(-1);
+ }
+ }
+
+ rc = waitpid(-1, &status, 0);
+ if (rc < 0) {
+ printf("waitpid failed\n");
+ return -3;
+ }
+
+ if ((WIFEXITED(status) && WEXITSTATUS(status) == -1) ||
+ WIFSIGNALED(status)) {
+ printf("child exited abnormally\n");
+ return -4;
+ }
+
+ return WEXITSTATUS(status);
+}
diff --git a/tools/testing/selftests/riscv/vector/v_helpers.h b/tools/testing/selftests/riscv/vector/v_helpers.h
new file mode 100644
index 000000000000..763cddfe26da
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/v_helpers.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <stdbool.h>
+
+bool is_xtheadvector_supported(void);
+
+bool is_vector_supported(void);
+
+int launch_test(char *next_program, int test_inherit, int xtheadvector);
diff --git a/tools/testing/selftests/riscv/vector/v_initval.c b/tools/testing/selftests/riscv/vector/v_initval.c
new file mode 100644
index 000000000000..be9e1d18ad29
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/v_initval.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "../../kselftest_harness.h"
+#include "v_helpers.h"
+
+#define NEXT_PROGRAM "./v_exec_initval_nolibc"
+
+TEST(v_initval)
+{
+ int xtheadvector = 0;
+
+ if (!is_vector_supported()) {
+ if (is_xtheadvector_supported())
+ xtheadvector = 1;
+ else
+ SKIP(return, "Vector not supported");
+ }
+
+ ASSERT_EQ(0, launch_test(NEXT_PROGRAM, 0, xtheadvector));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c
deleted file mode 100644
index 6174ffe016dc..000000000000
--- a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c
+++ /dev/null
@@ -1,72 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-#include "../../kselftest.h"
-#define MAX_VSIZE (8192 * 32)
-
-void dump(char *ptr, int size)
-{
- int i = 0;
-
- for (i = 0; i < size; i++) {
- if (i != 0) {
- if (i % 16 == 0)
- printf("\n");
- else if (i % 8 == 0)
- printf(" ");
- }
- printf("%02x ", ptr[i]);
- }
- printf("\n");
-}
-
-int main(void)
-{
- int i;
- unsigned long vl;
- char *datap, *tmp;
-
- ksft_set_plan(1);
-
- datap = malloc(MAX_VSIZE);
- if (!datap) {
- ksft_test_result_fail("fail to allocate memory for size = %d\n", MAX_VSIZE);
- exit(-1);
- }
-
- tmp = datap;
- asm volatile (
- ".option push\n\t"
- ".option arch, +v\n\t"
- "vsetvli %0, x0, e8, m8, ta, ma\n\t"
- "vse8.v v0, (%2)\n\t"
- "add %1, %2, %0\n\t"
- "vse8.v v8, (%1)\n\t"
- "add %1, %1, %0\n\t"
- "vse8.v v16, (%1)\n\t"
- "add %1, %1, %0\n\t"
- "vse8.v v24, (%1)\n\t"
- ".option pop\n\t"
- : "=&r" (vl), "=r" (tmp) : "r" (datap) : "memory");
-
- ksft_print_msg("vl = %lu\n", vl);
-
- if (datap[0] != 0x00 && datap[0] != 0xff) {
- ksft_test_result_fail("v-regesters are not properly initialized\n");
- dump(datap, vl * 4);
- exit(-1);
- }
-
- for (i = 1; i < vl * 4; i++) {
- if (datap[i] != datap[0]) {
- ksft_test_result_fail("detect stale values on v-regesters\n");
- dump(datap, vl * 4);
- exit(-2);
- }
- }
-
- free(datap);
-
- ksft_test_result_pass("tests for v_initval_nolibc pass\n");
- ksft_exit_pass();
- return 0;
-}
diff --git a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
index 1f9969bed235..7b7d6f21acb4 100644
--- a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
+++ b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
@@ -6,13 +6,16 @@
int main(int argc, char **argv)
{
- int rc, pid, status, test_inherit = 0;
+ int rc, pid, status, test_inherit = 0, xtheadvector = 0;
long ctrl, ctrl_c;
char *exec_argv[2], *exec_envp[2];
- if (argc > 1)
+ if (argc > 1 && strcmp(argv[1], "x"))
test_inherit = 1;
+ if (argc > 2 && strcmp(argv[2], "x"))
+ xtheadvector = 1;
+
ctrl = my_syscall1(__NR_prctl, PR_RISCV_V_GET_CONTROL);
if (ctrl < 0) {
puts("PR_RISCV_V_GET_CONTROL is not supported\n");
@@ -53,11 +56,14 @@ int main(int argc, char **argv)
puts("child's vstate_ctrl not equal to parent's\n");
exit(-1);
}
- asm volatile (".option push\n\t"
- ".option arch, +v\n\t"
- "vsetvli x0, x0, e32, m8, ta, ma\n\t"
- ".option pop\n\t"
- );
+ if (xtheadvector)
+ asm volatile (".4byte 0x00007ed7");
+ else
+ asm volatile (".option push\n\t"
+ ".option arch, +v\n\t"
+ "vsetvli x0, x0, e32, m8, ta, ma\n\t"
+ ".option pop\n\t"
+ );
exit(ctrl);
}
}
diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c
index 40b3bffcbb40..62fbb17a0556 100644
--- a/tools/testing/selftests/riscv/vector/vstate_prctl.c
+++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c
@@ -3,181 +3,244 @@
#include <unistd.h>
#include <errno.h>
#include <sys/wait.h>
+#include <sys/types.h>
+#include <stdlib.h>
-#include "../hwprobe/hwprobe.h"
-#include "../../kselftest.h"
+#include "../../kselftest_harness.h"
+#include "v_helpers.h"
#define NEXT_PROGRAM "./vstate_exec_nolibc"
-static int launch_test(int test_inherit)
-{
- char *exec_argv[3], *exec_envp[1];
- int rc, pid, status;
-
- pid = fork();
- if (pid < 0) {
- ksft_test_result_fail("fork failed %d", pid);
- return -1;
- }
- if (!pid) {
- exec_argv[0] = NEXT_PROGRAM;
- exec_argv[1] = test_inherit != 0 ? "x" : NULL;
- exec_argv[2] = NULL;
- exec_envp[0] = NULL;
- /* launch the program again to check inherit */
- rc = execve(NEXT_PROGRAM, exec_argv, exec_envp);
- if (rc) {
- perror("execve");
- ksft_test_result_fail("child execve failed %d\n", rc);
- exit(-1);
- }
- }
-
- rc = waitpid(-1, &status, 0);
- if (rc < 0) {
- ksft_test_result_fail("waitpid failed\n");
- return -3;
- }
-
- if ((WIFEXITED(status) && WEXITSTATUS(status) == -1) ||
- WIFSIGNALED(status)) {
- ksft_test_result_fail("child exited abnormally\n");
- return -4;
- }
-
- return WEXITSTATUS(status);
-}
-
-int test_and_compare_child(long provided, long expected, int inherit)
+int test_and_compare_child(long provided, long expected, int inherit, int xtheadvector)
{
int rc;
rc = prctl(PR_RISCV_V_SET_CONTROL, provided);
if (rc != 0) {
- ksft_test_result_fail("prctl with provided arg %lx failed with code %d\n",
- provided, rc);
+ printf("prctl with provided arg %lx failed with code %d\n",
+ provided, rc);
return -1;
}
- rc = launch_test(inherit);
+ rc = launch_test(NEXT_PROGRAM, inherit, xtheadvector);
if (rc != expected) {
- ksft_test_result_fail("Test failed, check %d != %ld\n", rc,
- expected);
+ printf("Test failed, check %d != %ld\n", rc, expected);
return -2;
}
return 0;
}
-#define PR_RISCV_V_VSTATE_CTRL_CUR_SHIFT 0
-#define PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT 2
+#define PR_RISCV_V_VSTATE_CTRL_CUR_SHIFT 0
+#define PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT 2
-int main(void)
+TEST(get_control_no_v)
{
- struct riscv_hwprobe pair;
- long flag, expected;
long rc;
- ksft_set_plan(1);
+ if (is_vector_supported() || is_xtheadvector_supported())
+ SKIP(return, "Test expects vector to be not supported");
- pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
- rc = riscv_hwprobe(&pair, 1, 0, NULL, 0);
- if (rc < 0) {
- ksft_test_result_fail("hwprobe() failed with %ld\n", rc);
- return -1;
- }
+ rc = prctl(PR_RISCV_V_GET_CONTROL);
+ EXPECT_EQ(-1, rc)
+ TH_LOG("GET_CONTROL should fail on kernel/hw without ZVE32X");
+ EXPECT_EQ(EINVAL, errno)
+ TH_LOG("GET_CONTROL should fail on kernel/hw without ZVE32X");
+}
- if (pair.key != RISCV_HWPROBE_KEY_IMA_EXT_0) {
- ksft_test_result_fail("hwprobe cannot probe RISCV_HWPROBE_KEY_IMA_EXT_0\n");
- return -2;
- }
+TEST(set_control_no_v)
+{
+ long rc;
- if (!(pair.value & RISCV_HWPROBE_EXT_ZVE32X)) {
- rc = prctl(PR_RISCV_V_GET_CONTROL);
- if (rc != -1 || errno != EINVAL) {
- ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without ZVE32X\n");
- return -3;
- }
-
- rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON);
- if (rc != -1 || errno != EINVAL) {
- ksft_test_result_fail("SET_CONTROL should fail on kernel/hw without ZVE32X\n");
- return -4;
- }
-
- ksft_test_result_skip("Vector not supported\n");
- return 0;
- }
+ if (is_vector_supported() || is_xtheadvector_supported())
+ SKIP(return, "Test expects vector to be not supported");
+
+ rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON);
+ EXPECT_EQ(-1, rc)
+ TH_LOG("SET_CONTROL should fail on kernel/hw without ZVE32X");
+ EXPECT_EQ(EINVAL, errno)
+ TH_LOG("SET_CONTROL should fail on kernel/hw without ZVE32X");
+}
+
+TEST(vstate_on_current)
+{
+ long flag;
+ long rc;
+
+ if (!is_vector_supported() && !is_xtheadvector_supported())
+ SKIP(return, "Vector not supported");
flag = PR_RISCV_V_VSTATE_CTRL_ON;
rc = prctl(PR_RISCV_V_SET_CONTROL, flag);
- if (rc != 0) {
- ksft_test_result_fail("Enabling V for current should always success\n");
- return -5;
- }
+ EXPECT_EQ(0, rc) TH_LOG("Enabling V for current should always succeed");
+}
+
+TEST(vstate_off_eperm)
+{
+ long flag;
+ long rc;
+
+ if (!is_vector_supported() && !is_xtheadvector_supported())
+ SKIP(return, "Vector not supported");
flag = PR_RISCV_V_VSTATE_CTRL_OFF;
rc = prctl(PR_RISCV_V_SET_CONTROL, flag);
- if (rc != -1 || errno != EPERM) {
- ksft_test_result_fail("Disabling current's V alive must fail with EPERM(%d)\n",
- errno);
- return -5;
+ EXPECT_EQ(EPERM, errno)
+ TH_LOG("Disabling V in current thread with V enabled must fail with EPERM(%d)", errno);
+ EXPECT_EQ(-1, rc)
+ TH_LOG("Disabling V in current thread with V enabled must fail with EPERM(%d)", errno);
+}
+
+TEST(vstate_on_no_nesting)
+{
+ long flag;
+ int xtheadvector = 0;
+
+ if (!is_vector_supported()) {
+ if (is_xtheadvector_supported())
+ xtheadvector = 1;
+ else
+ SKIP(return, "Vector not supported");
}
/* Turn on next's vector explicitly and test */
flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
- if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0))
- return -6;
+
+ EXPECT_EQ(0, test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0, xtheadvector));
+}
+
+TEST(vstate_off_nesting)
+{
+ long flag;
+ int xtheadvector = 0;
+
+ if (!is_vector_supported()) {
+ if (is_xtheadvector_supported())
+ xtheadvector = 1;
+ else
+ SKIP(return, "Vector not supported");
+ }
/* Turn off next's vector explicitly and test */
flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
- if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 0))
- return -7;
+
+ EXPECT_EQ(0, test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 1, xtheadvector));
+}
+
+TEST(vstate_on_inherit_no_nesting)
+{
+ long flag, expected;
+ int xtheadvector = 0;
+
+ if (!is_vector_supported()) {
+ if (is_xtheadvector_supported())
+ xtheadvector = 1;
+ else
+ SKIP(return, "Vector not supported");
+ }
+
+ /* Turn on next's vector explicitly and test no inherit */
+ flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
+ flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
+ expected = flag | PR_RISCV_V_VSTATE_CTRL_ON;
+
+ EXPECT_EQ(0, test_and_compare_child(flag, expected, 0, xtheadvector));
+}
+
+TEST(vstate_on_inherit)
+{
+ long flag, expected;
+ int xtheadvector = 0;
+
+ if (!is_vector_supported()) {
+ if (is_xtheadvector_supported())
+ xtheadvector = 1;
+ else
+ SKIP(return, "Vector not supported");
+ }
/* Turn on next's vector explicitly and test inherit */
flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
expected = flag | PR_RISCV_V_VSTATE_CTRL_ON;
- if (test_and_compare_child(flag, expected, 0))
- return -8;
- if (test_and_compare_child(flag, expected, 1))
- return -9;
+ EXPECT_EQ(0, test_and_compare_child(flag, expected, 1, xtheadvector));
+}
+
+TEST(vstate_off_inherit_no_nesting)
+{
+ long flag, expected;
+ int xtheadvector = 0;
+
+ if (!is_vector_supported()) {
+ if (is_xtheadvector_supported())
+ xtheadvector = 1;
+ else
+ SKIP(return, "Vector not supported");
+ }
+ /* Turn off next's vector explicitly and test no inherit */
+ flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
+ flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
+ expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF;
+
+ EXPECT_EQ(0, test_and_compare_child(flag, expected, 0, xtheadvector));
+}
+
+TEST(vstate_off_inherit)
+{
+ long flag, expected;
+ int xtheadvector = 0;
+
+ if (!is_vector_supported()) {
+ if (is_xtheadvector_supported())
+ xtheadvector = 1;
+ else
+ SKIP(return, "Vector not supported");
+ }
/* Turn off next's vector explicitly and test inherit */
flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF;
- if (test_and_compare_child(flag, expected, 0))
- return -10;
- if (test_and_compare_child(flag, expected, 1))
- return -11;
+ EXPECT_EQ(0, test_and_compare_child(flag, expected, 1, xtheadvector));
+}
+
+/* arguments should fail with EINVAL */
+TEST(inval_set_control_1)
+{
+ int rc;
+
+ if (!is_vector_supported() && !is_xtheadvector_supported())
+ SKIP(return, "Vector not supported");
- /* arguments should fail with EINVAL */
rc = prctl(PR_RISCV_V_SET_CONTROL, 0xff0);
- if (rc != -1 || errno != EINVAL) {
- ksft_test_result_fail("Undefined control argument should return EINVAL\n");
- return -12;
- }
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+/* arguments should fail with EINVAL */
+TEST(inval_set_control_2)
+{
+ int rc;
+
+ if (!is_vector_supported() && !is_xtheadvector_supported())
+ SKIP(return, "Vector not supported");
rc = prctl(PR_RISCV_V_SET_CONTROL, 0x3);
- if (rc != -1 || errno != EINVAL) {
- ksft_test_result_fail("Undefined control argument should return EINVAL\n");
- return -12;
- }
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(EINVAL, errno);
+}
- rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc);
- if (rc != -1 || errno != EINVAL) {
- ksft_test_result_fail("Undefined control argument should return EINVAL\n");
- return -12;
- }
+/* arguments should fail with EINVAL */
+TEST(inval_set_control_3)
+{
+ int rc;
- rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc);
- if (rc != -1 || errno != EINVAL) {
- ksft_test_result_fail("Undefined control argument should return EINVAL\n");
- return -12;
- }
+ if (!is_vector_supported() && !is_xtheadvector_supported())
+ SKIP(return, "Vector not supported");
- ksft_test_result_pass("tests for riscv_v_vstate_ctrl pass\n");
- ksft_exit_pass();
- return 0;
+ rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc);
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(EINVAL, errno);
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore
index 16496de5f6ce..0fda241fa62b 100644
--- a/tools/testing/selftests/rseq/.gitignore
+++ b/tools/testing/selftests/rseq/.gitignore
@@ -9,3 +9,4 @@ param_test_compare_twice
param_test_mm_cid
param_test_mm_cid_benchmark
param_test_mm_cid_compare_twice
+syscall_errors_test
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
index 5a3432fceb58..0d0a5fae5954 100644
--- a/tools/testing/selftests/rseq/Makefile
+++ b/tools/testing/selftests/rseq/Makefile
@@ -16,11 +16,12 @@ OVERRIDE_TARGETS = 1
TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \
param_test_benchmark param_test_compare_twice param_test_mm_cid \
- param_test_mm_cid_benchmark param_test_mm_cid_compare_twice
+ param_test_mm_cid_benchmark param_test_mm_cid_compare_twice \
+ syscall_errors_test
TEST_GEN_PROGS_EXTENDED = librseq.so
-TEST_PROGS = run_param_test.sh
+TEST_PROGS = run_param_test.sh run_syscall_errors_test.sh
TEST_FILES := settings
@@ -54,3 +55,7 @@ $(OUTPUT)/param_test_mm_cid_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
$(OUTPUT)/param_test_mm_cid_compare_twice: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
rseq.h rseq-*.h
$(CC) $(CFLAGS) -DBUILDOPT_RSEQ_PERCPU_MM_CID -DRSEQ_COMPARE_TWICE $< $(LDLIBS) -lrseq -o $@
+
+$(OUTPUT)/syscall_errors_test: syscall_errors_test.c $(TEST_GEN_PROGS_EXTENDED) \
+ rseq.h rseq-*.h
+ $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index 2f37961240ca..05d03e679e06 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -226,8 +226,32 @@ unsigned int yield_mod_cnt, nr_abort;
"addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
"bnez " INJECT_ASM_REG ", 222b\n\t" \
"333:\n\t"
+#elif defined(__or1k__)
+#define RSEQ_INJECT_INPUT \
+ , [loop_cnt_1]"m"(loop_cnt[1]) \
+ , [loop_cnt_2]"m"(loop_cnt[2]) \
+ , [loop_cnt_3]"m"(loop_cnt[3]) \
+ , [loop_cnt_4]"m"(loop_cnt[4]) \
+ , [loop_cnt_5]"m"(loop_cnt[5]) \
+ , [loop_cnt_6]"m"(loop_cnt[6])
+#define INJECT_ASM_REG "r31"
+
+#define RSEQ_INJECT_CLOBBER \
+ , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+ "l.lwz " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+ "l.sfeqi " INJECT_ASM_REG ", 0\n\t" \
+ "l.bf 333f\n\t" \
+ " l.nop\n\t" \
+ "222:\n\t" \
+ "l.addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
+ "l.sfeqi " INJECT_ASM_REG ", 0\n\t" \
+ "l.bf 222f\n\t" \
+ " l.nop\n\t" \
+ "333:\n\t"
#else
#error unsupported target
#endif
diff --git a/tools/testing/selftests/rseq/rseq-or1k-bits.h b/tools/testing/selftests/rseq/rseq-or1k-bits.h
new file mode 100644
index 000000000000..15d0e8200cd1
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-or1k-bits.h
@@ -0,0 +1,412 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+
+#include "rseq-bits-template.h"
+
+#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv,
+ int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+ RSEQ_ASM_OP_FINAL_STORE(v, newv, 3)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+ RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[error2]")
+#endif
+ RSEQ_ASM_OP_R_LOAD(v)
+ RSEQ_ASM_OP_R_STORE(load)
+ RSEQ_ASM_OP_R_LOAD_OFF(voffp)
+ RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [load] "m" (*load),
+ [voffp] "Ir" (voffp)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+#endif
+ RSEQ_ASM_OP_R_LOAD(v)
+ RSEQ_ASM_OP_R_ADD(count)
+ RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [count] "r" (count)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error3]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+ RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[error3]")
+#endif
+ RSEQ_ASM_OP_FINAL_STORE(v, newv, 3)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+error3:
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+
+/*
+ * pval = *(ptr+off)
+ * *pval += inc;
+ */
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, intptr_t inc,
+ int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+#endif
+ RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, 3)
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [ptr] "r" (ptr),
+ [off] "r" (off),
+ [inc] "r" (inc)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) &&
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+ RSEQ_ASM_OP_STORE(v2, newv2)
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_TEMPLATE_MO_RELEASE
+ RSEQ_ASM_OP_FINAL_STORE_RELEASE(v, newv, 3)
+#else
+ RSEQ_ASM_OP_FINAL_STORE(v, newv, 3)
+#endif
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [expect] "r" (expect),
+ [v] "m" (*v),
+ [newv] "r" (newv),
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+ RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_TEMPLATE_MO_RELEASE
+ RSEQ_ASM_OP_FINAL_STORE_RELEASE(v, newv, 3)
+#else
+ RSEQ_ASM_OP_FINAL_STORE(v, newv, 3)
+#endif
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [expect] "r" (expect),
+ [v] "m" (*v),
+ [newv] "r" (newv),
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1, RSEQ_ASM_TMP_REG_2,
+ RSEQ_ASM_TMP_REG_3, RSEQ_ASM_TMP_REG_4
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) &&
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#include "rseq-bits-reset.h"
diff --git a/tools/testing/selftests/rseq/rseq-or1k-thread-pointer.h b/tools/testing/selftests/rseq/rseq-or1k-thread-pointer.h
new file mode 100644
index 000000000000..cda740f7aff3
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-or1k-thread-pointer.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
+#ifndef _RSEQ_OR1K_THREAD_POINTER
+#define _RSEQ_OR1K_THREAD_POINTER
+
+static inline void *rseq_thread_pointer(void)
+{
+ void *__thread_register;
+
+ __asm__ ("l.or %0, r10, r0" : "=r" (__thread_register));
+ return __thread_register;
+}
+
+#endif
diff --git a/tools/testing/selftests/rseq/rseq-or1k.h b/tools/testing/selftests/rseq/rseq-or1k.h
new file mode 100644
index 000000000000..9e78eebdf79a
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-or1k.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+
+/*
+ * Select the instruction "l.nop 0x35" as the RSEQ_SIG.
+ */
+#define RSEQ_SIG 0x15000035
+
+#define rseq_smp_mb() __asm__ __volatile__ ("l.msync" ::: "memory")
+#define rseq_smp_rmb() rseq_smp_mb()
+#define rseq_smp_wmb() rseq_smp_mb()
+#define RSEQ_ASM_TMP_REG_1 "r31"
+#define RSEQ_ASM_TMP_REG_2 "r29"
+#define RSEQ_ASM_TMP_REG_3 "r27"
+#define RSEQ_ASM_TMP_REG_4 "r25"
+
+#define rseq_smp_load_acquire(p) \
+__extension__ ({ \
+ rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \
+ rseq_smp_mb(); \
+ ____p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v) \
+do { \
+ rseq_smp_mb(); \
+ RSEQ_WRITE_ONCE(*(p), v); \
+} while (0)
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \
+ post_commit_offset, abort_ip) \
+ ".pushsection __rseq_cs, \"aw\"\n" \
+ ".balign 32\n" \
+ __rseq_str(label) ":\n" \
+ ".long " __rseq_str(version) ", " __rseq_str(flags) "\n" \
+ ".long 0x0, " __rseq_str(start_ip) ", " \
+ "0x0, " __rseq_str(post_commit_offset) ", " \
+ "0x0, " __rseq_str(abort_ip) "\n" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n" \
+ ".long 0x0, " __rseq_str(label) "b\n" \
+ ".popsection\n"
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
+ ((post_commit_ip) - (start_ip)), abort_ip)
+
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n" \
+ ".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(exit_ip) "\n" \
+ ".popsection\n"
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ "l.movhi " RSEQ_ASM_TMP_REG_1 ", hi(" __rseq_str(cs_label) ")\n"\
+ "l.ori " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \
+ ", lo(" __rseq_str(cs_label) ")\n"\
+ "l.sw %[" __rseq_str(rseq_cs) "], " RSEQ_ASM_TMP_REG_1 "\n" \
+ __rseq_str(label) ":\n"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \
+ "l.j 222f\n" \
+ " l.nop\n" \
+ ".balign 4\n" \
+ ".long " __rseq_str(RSEQ_SIG) "\n" \
+ __rseq_str(label) ":\n" \
+ "l.j %l[" __rseq_str(abort_label) "]\n" \
+ " l.nop\n" \
+ "222:\n"
+
+#define RSEQ_ASM_OP_STORE(var, value) \
+ "l.sw %[" __rseq_str(var) "], %[" __rseq_str(value) "]\n"
+
+#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \
+ "l.lwz " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
+ "l.sfne " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "]\n" \
+ "l.bf " __rseq_str(label) "\n" \
+ " l.nop\n"
+
+#define RSEQ_ASM_OP_CMPNE(var, expect, label) \
+ "l.lwz " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
+ "l.sfeq " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "]\n" \
+ "l.bf " __rseq_str(label) "\n" \
+ " l.nop\n"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+ RSEQ_INJECT_ASM(2) \
+ RSEQ_ASM_OP_CMPEQ(current_cpu_id, cpu_id, label)
+
+#define RSEQ_ASM_OP_R_LOAD(var) \
+ "l.lwz " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_STORE(var) \
+ "l.sw %[" __rseq_str(var) "], " RSEQ_ASM_TMP_REG_1 "\n"
+
+#define RSEQ_ASM_OP_R_LOAD_OFF(offset) \
+ "l.lwz " RSEQ_ASM_TMP_REG_1 ", " \
+ "%[" __rseq_str(offset) "](" RSEQ_ASM_TMP_REG_1 ")\n"
+
+#define RSEQ_ASM_OP_R_ADD(count) \
+ "l.add " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \
+ ", %[" __rseq_str(count) "]\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE(var, value, post_commit_label) \
+ RSEQ_ASM_OP_STORE(var, value) \
+ __rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(var, value, post_commit_label) \
+ "l.msync\n" \
+ RSEQ_ASM_OP_STORE(var, value) \
+ __rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \
+ "l.sw %[" __rseq_str(var) "], " RSEQ_ASM_TMP_REG_1 "\n" \
+ __rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) \
+ "l.sfeq %[" __rseq_str(len) "], r0\n" \
+ "l.bf 333f\n" \
+ " l.nop\n" \
+ "l.ori " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(len) "], 0\n" \
+ "l.ori " RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(src) "], 0\n" \
+ "l.ori " RSEQ_ASM_TMP_REG_3 ", %[" __rseq_str(dst) "], 0\n" \
+ "222:\n" \
+ "l.lbz " RSEQ_ASM_TMP_REG_4 ", 0(" RSEQ_ASM_TMP_REG_2 ")\n" \
+ "l.sb 0(" RSEQ_ASM_TMP_REG_3 "), " RSEQ_ASM_TMP_REG_4 "\n" \
+ "l.addi " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 ", -1\n" \
+ "l.addi " RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", 1\n" \
+ "l.addi " RSEQ_ASM_TMP_REG_3 ", " RSEQ_ASM_TMP_REG_3 ", 1\n" \
+ "l.sfne " RSEQ_ASM_TMP_REG_1 ", r0\n" \
+ "l.bf 222b\n" \
+ " l.nop\n" \
+ "333:\n"
+
+#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, post_commit_label) \
+ "l.ori " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(ptr) "], 0\n" \
+ RSEQ_ASM_OP_R_ADD(off) \
+ "l.lwz " RSEQ_ASM_TMP_REG_1 ", 0(" RSEQ_ASM_TMP_REG_1 ")\n" \
+ RSEQ_ASM_OP_R_ADD(inc) \
+ __rseq_str(post_commit_label) ":\n"
+
+/* Per-cpu-id indexing. */
+
+#define RSEQ_TEMPLATE_CPU_ID
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-or1k-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
+
+#define RSEQ_TEMPLATE_MO_RELEASE
+#include "rseq-or1k-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELEASE
+#undef RSEQ_TEMPLATE_CPU_ID
+
+/* Per-mm-cid indexing. */
+
+#define RSEQ_TEMPLATE_MM_CID
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-or1k-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
+
+#define RSEQ_TEMPLATE_MO_RELEASE
+#include "rseq-or1k-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELEASE
+#undef RSEQ_TEMPLATE_MM_CID
+
+/* APIs which are not based on cpu ids. */
+
+#define RSEQ_TEMPLATE_CPU_ID_NONE
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-or1k-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
+#undef RSEQ_TEMPLATE_CPU_ID_NONE
diff --git a/tools/testing/selftests/rseq/rseq-riscv-bits.h b/tools/testing/selftests/rseq/rseq-riscv-bits.h
index de31a0143139..f02f411d550d 100644
--- a/tools/testing/selftests/rseq/rseq-riscv-bits.h
+++ b/tools/testing/selftests/rseq/rseq-riscv-bits.h
@@ -243,7 +243,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, i
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
#endif
- RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, 3)
+ RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, 3)
RSEQ_INJECT_ASM(4)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
@@ -251,8 +251,8 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, i
[current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
[rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[ptr] "r" (ptr),
- [off] "er" (off),
- [inc] "er" (inc)
+ [off] "r" (off),
+ [inc] "r" (inc)
RSEQ_INJECT_INPUT
: "memory", RSEQ_ASM_TMP_REG_1
RSEQ_INJECT_CLOBBER
diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h
index 37e598d0a365..67d544aaa9a3 100644
--- a/tools/testing/selftests/rseq/rseq-riscv.h
+++ b/tools/testing/selftests/rseq/rseq-riscv.h
@@ -158,7 +158,7 @@ do { \
"bnez " RSEQ_ASM_TMP_REG_1 ", 222b\n" \
"333:\n"
-#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, post_commit_label) \
+#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, post_commit_label) \
"mv " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(ptr) "]\n" \
RSEQ_ASM_OP_R_ADD(off) \
REG_L RSEQ_ASM_TMP_REG_1 ", 0(" RSEQ_ASM_TMP_REG_1 ")\n" \
diff --git a/tools/testing/selftests/rseq/rseq-thread-pointer.h b/tools/testing/selftests/rseq/rseq-thread-pointer.h
index 977c25d758b2..3d5019307a1b 100644
--- a/tools/testing/selftests/rseq/rseq-thread-pointer.h
+++ b/tools/testing/selftests/rseq/rseq-thread-pointer.h
@@ -12,6 +12,8 @@
#include "rseq-x86-thread-pointer.h"
#elif defined(__PPC__)
#include "rseq-ppc-thread-pointer.h"
+#elif defined(__or1k__)
+#include "rseq-or1k-thread-pointer.h"
#else
#include "rseq-generic-thread-pointer.h"
#endif
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index f6156790c3b4..663a9cef1952 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -71,9 +71,20 @@ static int rseq_ownership;
/* Original struct rseq allocation size is 32 bytes. */
#define ORIG_RSEQ_ALLOC_SIZE 32
+/*
+ * Use a union to ensure we allocate a TLS area of 1024 bytes to accomodate an
+ * rseq registration that is larger than the current rseq ABI.
+ */
+union rseq_tls {
+ struct rseq_abi abi;
+ char dummy[RSEQ_THREAD_AREA_ALLOC_SIZE];
+};
+
static
-__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = {
- .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
+__thread union rseq_tls __rseq __attribute__((tls_model("initial-exec"))) = {
+ .abi = {
+ .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
+ },
};
static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
@@ -87,7 +98,7 @@ static int sys_getcpu(unsigned *cpu, unsigned *node)
return syscall(__NR_getcpu, cpu, node, NULL);
}
-int rseq_available(void)
+bool rseq_available(void)
{
int rc;
@@ -96,9 +107,9 @@ int rseq_available(void)
abort();
switch (errno) {
case ENOSYS:
- return 0;
+ return false;
case EINVAL:
- return 1;
+ return true;
default:
abort();
}
@@ -149,7 +160,7 @@ int rseq_register_current_thread(void)
/* Treat libc's ownership as a successful registration. */
return 0;
}
- rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG);
+ rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG);
if (rc) {
/*
* After at least one thread has registered successfully
@@ -183,7 +194,7 @@ int rseq_unregister_current_thread(void)
/* Treat libc's ownership as a successful unregistration. */
return 0;
}
- rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
+ rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
if (rc)
return -1;
return 0;
@@ -249,7 +260,7 @@ void rseq_init(void)
rseq_ownership = 1;
/* Calculate the offset of the rseq area from the thread pointer. */
- rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer();
+ rseq_offset = (void *)&__rseq.abi - rseq_thread_pointer();
/* rseq flags are deprecated, always set to 0. */
rseq_flags = 0;
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index 062d10925a10..f51a5fdb0444 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -129,6 +129,8 @@ static inline struct rseq_abi *rseq_get_abi(void)
#include <rseq-s390.h>
#elif defined(__riscv)
#include <rseq-riscv.h>
+#elif defined(__or1k__)
+#include <rseq-or1k.h>
#else
#error unsupported target
#endif
@@ -158,6 +160,11 @@ int32_t rseq_fallback_current_cpu(void);
int32_t rseq_fallback_current_node(void);
/*
+ * Returns true if rseq is supported.
+ */
+bool rseq_available(void);
+
+/*
* Values returned can be either the current CPU number, -1 (rseq is
* uninitialized), or -2 (rseq initialization has failed).
*/
diff --git a/tools/testing/selftests/rseq/run_syscall_errors_test.sh b/tools/testing/selftests/rseq/run_syscall_errors_test.sh
new file mode 100755
index 000000000000..9272246b39f2
--- /dev/null
+++ b/tools/testing/selftests/rseq/run_syscall_errors_test.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2024 Michael Jeanson <mjeanson@efficios.com>
+
+GLIBC_TUNABLES="${GLIBC_TUNABLES:-}:glibc.pthread.rseq=0" ./syscall_errors_test
diff --git a/tools/testing/selftests/rseq/syscall_errors_test.c b/tools/testing/selftests/rseq/syscall_errors_test.c
new file mode 100644
index 000000000000..a5d9e1f8a2dc
--- /dev/null
+++ b/tools/testing/selftests/rseq/syscall_errors_test.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: 2024 Michael Jeanson <mjeanson@efficios.com>
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <assert.h>
+#include <stdint.h>
+#include <syscall.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "rseq.h"
+
+static int sys_rseq(void *rseq_abi, uint32_t rseq_len,
+ int flags, uint32_t sig)
+{
+ return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
+}
+
+/*
+ * Check the value of errno on some expected failures of the rseq syscall.
+ */
+
+int main(void)
+{
+ struct rseq_abi *global_rseq = rseq_get_abi();
+ int ret;
+ int errno_copy;
+
+ if (!rseq_available()) {
+ fprintf(stderr, "rseq syscall unavailable");
+ goto error;
+ }
+
+ /* The current thread is NOT registered. */
+
+ /* EINVAL */
+ errno = 0;
+ ret = sys_rseq(global_rseq, 32, -1, RSEQ_SIG);
+ errno_copy = errno;
+ fprintf(stderr, "Registration with invalid flag fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+ if (ret == 0 || errno_copy != EINVAL)
+ goto error;
+
+ errno = 0;
+ ret = sys_rseq((char *) global_rseq + 1, 32, 0, RSEQ_SIG);
+ errno_copy = errno;
+ fprintf(stderr, "Registration with unaligned rseq_abi fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+ if (ret == 0 || errno_copy != EINVAL)
+ goto error;
+
+ errno = 0;
+ ret = sys_rseq(global_rseq, 31, 0, RSEQ_SIG);
+ errno_copy = errno;
+ fprintf(stderr, "Registration with invalid size fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+ if (ret == 0 || errno_copy != EINVAL)
+ goto error;
+
+
+#if defined(__LP64__) && (!defined(__s390__) && !defined(__s390x__))
+ /*
+ * We haven't found a reliable way to find an invalid address when
+ * running a 32bit userspace on a 64bit kernel, so only run this test
+ * on 64bit builds for the moment.
+ *
+ * Also exclude architectures that select
+ * CONFIG_ALTERNATE_USER_ADDRESS_SPACE where the kernel and userspace
+ * have their own address space and this failure can't happen.
+ */
+
+ /* EFAULT */
+ errno = 0;
+ ret = sys_rseq((void *) -4096UL, 32, 0, RSEQ_SIG);
+ errno_copy = errno;
+ fprintf(stderr, "Registration with invalid address fails with errno set to EFAULT (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+ if (ret == 0 || errno_copy != EFAULT)
+ goto error;
+#endif
+
+ errno = 0;
+ ret = sys_rseq(global_rseq, 32, 0, RSEQ_SIG);
+ errno_copy = errno;
+ fprintf(stderr, "Registration succeeds for the current thread (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+ if (ret != 0 && errno != 0)
+ goto error;
+
+ /* The current thread is registered. */
+
+ /* EBUSY */
+ errno = 0;
+ ret = sys_rseq(global_rseq, 32, 0, RSEQ_SIG);
+ errno_copy = errno;
+ fprintf(stderr, "Double registration fails with errno set to EBUSY (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+ if (ret == 0 || errno_copy != EBUSY)
+ goto error;
+
+ /* EPERM */
+ errno = 0;
+ ret = sys_rseq(global_rseq, 32, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG + 1);
+ errno_copy = errno;
+ fprintf(stderr, "Unregistration with wrong RSEQ_SIG fails with errno to EPERM (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+ if (ret == 0 || errno_copy != EPERM)
+ goto error;
+
+ errno = 0;
+ ret = sys_rseq(global_rseq, 32, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
+ errno_copy = errno;
+ fprintf(stderr, "Unregistration succeeds for the current thread (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+ if (ret != 0)
+ goto error;
+
+ errno = 0;
+ ret = sys_rseq(global_rseq, 32, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
+ errno_copy = errno;
+ fprintf(stderr, "Double unregistration fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+ if (ret == 0 || errno_copy != EINVAL)
+ goto error;
+
+ return 0;
+error:
+ return -1;
+}
diff --git a/tools/testing/selftests/sched/config b/tools/testing/selftests/sched/config
index e8b09aa7c0c4..1bb8bf6d7fd4 100644
--- a/tools/testing/selftests/sched/config
+++ b/tools/testing/selftests/sched/config
@@ -1 +1 @@
-CONFIG_SCHED_DEBUG=y
+# empty
diff --git a/tools/testing/selftests/sched_ext/config b/tools/testing/selftests/sched_ext/config
index 0de9b4ee249d..aa901b05c8ad 100644
--- a/tools/testing/selftests/sched_ext/config
+++ b/tools/testing/selftests/sched_ext/config
@@ -1,4 +1,3 @@
-CONFIG_SCHED_DEBUG=y
CONFIG_SCHED_CLASS_EXT=y
CONFIG_CGROUPS=y
CONFIG_CGROUP_SCHED=y
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 8c3a73461475..b2f76a52215a 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -47,6 +47,7 @@
#include <linux/kcmp.h>
#include <sys/resource.h>
#include <sys/capability.h>
+#include <linux/perf_event.h>
#include <unistd.h>
#include <sys/syscall.h>
@@ -68,6 +69,10 @@
# define PR_SET_PTRACER 0x59616d61
#endif
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
#ifndef PR_SET_NO_NEW_PRIVS
#define PR_SET_NO_NEW_PRIVS 38
#define PR_GET_NO_NEW_PRIVS 39
@@ -150,6 +155,12 @@ struct seccomp_data {
# endif
#endif
+#ifndef __NR_uretprobe
+# if defined(__x86_64__)
+# define __NR_uretprobe 335
+# endif
+#endif
+
#ifndef SECCOMP_SET_MODE_STRICT
#define SECCOMP_SET_MODE_STRICT 0
#endif
@@ -4888,6 +4899,200 @@ TEST(tsync_vs_dead_thread_leader)
EXPECT_EQ(0, status);
}
+noinline int probed(void)
+{
+ return 1;
+}
+
+static int parse_uint_from_file(const char *file, const char *fmt)
+{
+ int err = -1, ret;
+ FILE *f;
+
+ f = fopen(file, "re");
+ if (f) {
+ err = fscanf(f, fmt, &ret);
+ fclose(f);
+ }
+ return err == 1 ? ret : err;
+}
+
+static int determine_uprobe_perf_type(void)
+{
+ const char *file = "/sys/bus/event_source/devices/uprobe/type";
+
+ return parse_uint_from_file(file, "%d\n");
+}
+
+static int determine_uprobe_retprobe_bit(void)
+{
+ const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
+
+ return parse_uint_from_file(file, "config:%d\n");
+}
+
+static ssize_t get_uprobe_offset(const void *addr)
+{
+ size_t start, base, end;
+ bool found = false;
+ char buf[256];
+ FILE *f;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f)
+ return -1;
+
+ while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) {
+ if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) {
+ found = true;
+ break;
+ }
+ }
+ fclose(f);
+ return found ? (uintptr_t)addr - start + base : -1;
+}
+
+FIXTURE(URETPROBE) {
+ int fd;
+};
+
+FIXTURE_VARIANT(URETPROBE) {
+ /*
+ * All of the URETPROBE behaviors can be tested with either
+ * uretprobe attached or not
+ */
+ bool attach;
+};
+
+FIXTURE_VARIANT_ADD(URETPROBE, attached) {
+ .attach = true,
+};
+
+FIXTURE_VARIANT_ADD(URETPROBE, not_attached) {
+ .attach = false,
+};
+
+FIXTURE_SETUP(URETPROBE)
+{
+ const size_t attr_sz = sizeof(struct perf_event_attr);
+ struct perf_event_attr attr;
+ ssize_t offset;
+ int type, bit;
+
+#ifndef __NR_uretprobe
+ SKIP(return, "__NR_uretprobe syscall not defined");
+#endif
+
+ if (!variant->attach)
+ return;
+
+ memset(&attr, 0, attr_sz);
+
+ type = determine_uprobe_perf_type();
+ ASSERT_GE(type, 0);
+ bit = determine_uprobe_retprobe_bit();
+ ASSERT_GE(bit, 0);
+ offset = get_uprobe_offset(probed);
+ ASSERT_GE(offset, 0);
+
+ attr.config |= 1 << bit;
+ attr.size = attr_sz;
+ attr.type = type;
+ attr.config1 = ptr_to_u64("/proc/self/exe");
+ attr.config2 = offset;
+
+ self->fd = syscall(__NR_perf_event_open, &attr,
+ getpid() /* pid */, -1 /* cpu */, -1 /* group_fd */,
+ PERF_FLAG_FD_CLOEXEC);
+}
+
+FIXTURE_TEARDOWN(URETPROBE)
+{
+ /* we could call close(self->fd), but we'd need extra filter for
+ * that and since we are calling _exit right away..
+ */
+}
+
+static int run_probed_with_filter(struct sock_fprog *prog)
+{
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
+ seccomp(SECCOMP_SET_MODE_FILTER, 0, prog)) {
+ return -1;
+ }
+
+ probed();
+ return 0;
+}
+
+TEST_F(URETPROBE, uretprobe_default_allow)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ ASSERT_EQ(0, run_probed_with_filter(&prog));
+}
+
+TEST_F(URETPROBE, uretprobe_default_block)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ ASSERT_EQ(0, run_probed_with_filter(&prog));
+}
+
+TEST_F(URETPROBE, uretprobe_block_uretprobe_syscall)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+#ifdef __NR_uretprobe
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 0, 1),
+#endif
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ ASSERT_EQ(0, run_probed_with_filter(&prog));
+}
+
+TEST_F(URETPROBE, uretprobe_default_block_with_uretprobe_syscall)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+#ifdef __NR_uretprobe
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 2, 0),
+#endif
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ ASSERT_EQ(0, run_probed_with_filter(&prog));
+}
+
/*
* TODO:
* - expand NNP testing
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index 84472b436c07..db1616857d89 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -21,7 +21,7 @@ TEST_FILE=$(mktemp)
# ENABLED: 1 if enabled, 0 otherwise
# TARGET: test target file required on the test_sysctl module
# SKIP_NO_TARGET: 1 skip if TARGET not there
-# 0 run eventhough TARGET not there
+# 0 run even though TARGET not there
#
# Once these are enabled please leave them as-is. Write your own test,
# we have tons of space.
@@ -764,7 +764,7 @@ sysctl_test_0007()
fi
if [ ! -f /proc/cmdline ]; then
- echo -e "SKIPPING\nThere is no /proc/cmdline to check for paramter"
+ echo -e "SKIPPING\nThere is no /proc/cmdline to check for parameter"
return $ksft_skip
fi
@@ -857,7 +857,7 @@ list_tests()
echo
echo "TEST_ID x NUM_TEST"
echo "TEST_ID: Test ID"
- echo "NUM_TESTS: Number of recommended times to run the test"
+ echo "NUM_TESTS: Recommended number of times to run the test"
echo
echo "0001 x $(get_test_count 0001) - tests proc_dointvec_minmax()"
echo "0002 x $(get_test_count 0002) - tests proc_dostring()"
@@ -884,7 +884,7 @@ usage()
echo "Valid tests: 0001-$MAX_TEST"
echo ""
echo " all Runs all tests (default)"
- echo " -t Run test ID the number amount of times is recommended"
+ echo " -t Run test ID the recommended number of times"
echo " -w Watch test ID run until it runs into an error"
echo " -c Run test ID once"
echo " -s Run test ID x test-count number of times"
@@ -898,7 +898,7 @@ usage()
echo Example uses:
echo
echo "$TEST_NAME.sh -- executes all tests"
- echo "$TEST_NAME.sh -t 0002 -- Executes test ID 0002 number of times is recomended"
+ echo "$TEST_NAME.sh -t 0002 -- Executes test ID 0002 the recommended number of times"
echo "$TEST_NAME.sh -w 0002 -- Watch test ID 0002 run until an error occurs"
echo "$TEST_NAME.sh -s 0002 -- Run test ID 0002 once"
echo "$TEST_NAME.sh -c 0002 3 -- Run test ID 0002 three times"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
index dd8109768f8f..5596f4df0e9f 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
@@ -689,7 +689,7 @@
"cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m continue index 1",
"expExitCode": "0",
"verifyCmd": "$TC actions get action police index 1",
- "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action continue",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst (1024Kb|1Mb) mtu 2Kb action continue",
"matchCount": "1",
"teardown": [
"$TC actions flush action police"
@@ -716,7 +716,7 @@
"cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m drop index 1",
"expExitCode": "0",
"verifyCmd": "$TC actions ls action police",
- "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action drop",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst (1024Kb|1Mb) mtu 2Kb action drop",
"matchCount": "1",
"teardown": [
"$TC actions flush action police"
@@ -743,7 +743,7 @@
"cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m ok index 1",
"expExitCode": "0",
"verifyCmd": "$TC actions ls action police",
- "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action pass",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst (1024Kb|1Mb) mtu 2Kb action pass",
"matchCount": "1",
"teardown": [
"$TC actions flush action police"
@@ -770,7 +770,7 @@
"cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m reclassify index 1",
"expExitCode": "0",
"verifyCmd": "$TC actions get action police index 1",
- "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action reclassify",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst (1024Kb|1Mb) mtu 2Kb action reclassify",
"matchCount": "1",
"teardown": [
"$TC actions flush action police"
@@ -797,7 +797,7 @@
"cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m pipe index 1",
"expExitCode": "0",
"verifyCmd": "$TC actions ls action police",
- "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action pipe",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst (1024Kb|1Mb) mtu 2Kb action pipe",
"matchCount": "1",
"teardown": [
"$TC actions flush action police"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index d3dd65b05b5f..9044ac054167 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -94,5 +94,37 @@
"$TC qdisc del dev $DUMMY ingress",
"$IP addr del 10.10.10.10/24 dev $DUMMY"
]
- }
+ },
+ {
+ "id": "a4b9",
+ "name": "Test class qlen notification",
+ "category": [
+ "qdisc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: drr",
+ "$TC filter add dev $DUMMY parent 1: basic classid 1:1",
+ "$TC class add dev $DUMMY parent 1: classid 1:1 drr",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2: netem",
+ "$TC qdisc add dev $DUMMY parent 2: handle 3: drr",
+ "$TC filter add dev $DUMMY parent 3: basic action drop",
+ "$TC class add dev $DUMMY parent 3: classid 3:1 drr",
+ "$TC class del dev $DUMMY classid 1:1",
+ "$TC class add dev $DUMMY parent 1: classid 1:1 drr"
+ ],
+ "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc ls dev $DUMMY",
+ "matchPattern": "drr 1: root",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root handle 1: drr",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY"
+ ]
+ }
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json
index 7126ec3485cb..2b61d8d79bde 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json
@@ -61,5 +61,30 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "4009",
+ "name": "Reject creation of DRR class with classid TC_H_ROOT",
+ "category": [
+ "qdisc",
+ "drr"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle ffff: drr",
+ "$TC filter add dev $DUMMY parent ffff: basic classid ffff:1",
+ "$TC class add dev $DUMMY parent ffff: classid ffff:1 drr",
+ "$TC filter add dev $DUMMY parent ffff: prio 1 u32 match u16 0x0000 0xfe00 at 2 flowid ffff:ffff"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent ffff: classid ffff:ffff drr",
+ "expExitCode": "2",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class drr ffff:ffff",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json
index ae3d286a32b2..6f20d033670d 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json
@@ -313,6 +313,29 @@
"matchPattern": "qdisc bfifo 1: root",
"matchCount": "0",
"teardown": [
+ ]
+ },
+ {
+ "id": "d774",
+ "name": "Check pfifo_head_drop qdisc enqueue behaviour when limit == 0",
+ "category": [
+ "qdisc",
+ "pfifo_head_drop"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: pfifo_head_drop limit 0",
+ "$IP link set dev $DUMMY up || true"
+ ],
+ "cmdUnderTest": "ping -c2 -W0.01 -I $DUMMY 10.10.10.1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "dropped 2",
+ "matchCount": "1",
+ "teardown": [
]
}
]
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index 9814b3a1c77d..f0eceb0faf34 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -7,6 +7,7 @@
* Kernel loop code stolen from Steven Rostedt <srostedt@redhat.com>
*/
#define _GNU_SOURCE
+#include <sys/prctl.h>
#include <sys/time.h>
#include <sys/types.h>
#include <stdio.h>
@@ -599,14 +600,84 @@ static void check_overrun(int which, const char *name)
"check_overrun %s\n", name);
}
+#include <sys/syscall.h>
+
+static int do_timer_create(int *id)
+{
+ return syscall(__NR_timer_create, CLOCK_MONOTONIC, NULL, id);
+}
+
+static int do_timer_delete(int id)
+{
+ return syscall(__NR_timer_delete, id);
+}
+
+#ifndef PR_TIMER_CREATE_RESTORE_IDS
+# define PR_TIMER_CREATE_RESTORE_IDS 77
+# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0
+# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
+# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
+#endif
+
+static void check_timer_create_exact(void)
+{
+ int id;
+
+ if (prctl(PR_TIMER_CREATE_RESTORE_IDS, PR_TIMER_CREATE_RESTORE_IDS_ON, 0, 0, 0)) {
+ switch (errno) {
+ case EINVAL:
+ ksft_test_result_skip("check timer create exact, not supported\n");
+ return;
+ default:
+ ksft_test_result_skip("check timer create exact, errno = %d\n", errno);
+ return;
+ }
+ }
+
+ if (prctl(PR_TIMER_CREATE_RESTORE_IDS, PR_TIMER_CREATE_RESTORE_IDS_GET, 0, 0, 0) != 1)
+ fatal_error(NULL, "prctl(GET) failed\n");
+
+ id = 8;
+ if (do_timer_create(&id) < 0)
+ fatal_error(NULL, "timer_create()");
+
+ if (do_timer_delete(id))
+ fatal_error(NULL, "timer_delete()");
+
+ if (prctl(PR_TIMER_CREATE_RESTORE_IDS, PR_TIMER_CREATE_RESTORE_IDS_OFF, 0, 0, 0))
+ fatal_error(NULL, "prctl(OFF)");
+
+ if (prctl(PR_TIMER_CREATE_RESTORE_IDS, PR_TIMER_CREATE_RESTORE_IDS_GET, 0, 0, 0) != 0)
+ fatal_error(NULL, "prctl(GET) failed\n");
+
+ if (id != 8) {
+ ksft_test_result_fail("check timer create exact %d != 8\n", id);
+ return;
+ }
+
+ /* Validate that it went back to normal mode and allocates ID 9 */
+ if (do_timer_create(&id) < 0)
+ fatal_error(NULL, "timer_create()");
+
+ if (do_timer_delete(id))
+ fatal_error(NULL, "timer_delete()");
+
+ if (id == 9)
+ ksft_test_result_pass("check timer create exact\n");
+ else
+ ksft_test_result_fail("check timer create exact. Disabling failed.\n");
+}
+
int main(int argc, char **argv)
{
ksft_print_header();
- ksft_set_plan(18);
+ ksft_set_plan(19);
ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n");
ksft_print_msg("based timers if other threads run on the CPU...\n");
+ check_timer_create_exact();
+
check_itimer(ITIMER_VIRTUAL, "ITIMER_VIRTUAL");
check_itimer(ITIMER_PROF, "ITIMER_PROF");
check_itimer(ITIMER_REAL, "ITIMER_REAL");
diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c
index 83450145fe65..46c391d7f45d 100644
--- a/tools/testing/selftests/timers/skew_consistency.c
+++ b/tools/testing/selftests/timers/skew_consistency.c
@@ -47,7 +47,7 @@ int main(int argc, char **argv)
pid = fork();
if (!pid)
- return system("./inconsistency-check -c 1 -t 600");
+ return system("./inconsistency-check -t 60");
ppm = 500;
ret = 0;
diff --git a/tools/testing/selftests/ublk/.gitignore b/tools/testing/selftests/ublk/.gitignore
new file mode 100644
index 000000000000..8b2871ea7751
--- /dev/null
+++ b/tools/testing/selftests/ublk/.gitignore
@@ -0,0 +1,3 @@
+kublk
+/tools
+*-verify.state
diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile
new file mode 100644
index 000000000000..7817afe29005
--- /dev/null
+++ b/tools/testing/selftests/ublk/Makefile
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir)
+LDLIBS += -lpthread -lm -luring
+
+TEST_PROGS := test_generic_01.sh
+
+TEST_PROGS += test_null_01.sh
+TEST_PROGS += test_null_02.sh
+TEST_PROGS += test_loop_01.sh
+TEST_PROGS += test_loop_02.sh
+TEST_PROGS += test_loop_03.sh
+TEST_PROGS += test_loop_04.sh
+TEST_PROGS += test_stripe_01.sh
+TEST_PROGS += test_stripe_02.sh
+
+TEST_PROGS += test_stress_01.sh
+TEST_PROGS += test_stress_02.sh
+
+TEST_GEN_PROGS_EXTENDED = kublk
+
+include ../lib.mk
+
+$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c
+
+check:
+ shellcheck -x -f gcc *.sh
diff --git a/tools/testing/selftests/ublk/common.c b/tools/testing/selftests/ublk/common.c
new file mode 100644
index 000000000000..01580a6f8519
--- /dev/null
+++ b/tools/testing/selftests/ublk/common.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "kublk.h"
+
+void backing_file_tgt_deinit(struct ublk_dev *dev)
+{
+ int i;
+
+ for (i = 1; i < dev->nr_fds; i++) {
+ fsync(dev->fds[i]);
+ close(dev->fds[i]);
+ }
+}
+
+int backing_file_tgt_init(struct ublk_dev *dev)
+{
+ int fd, i;
+
+ assert(dev->nr_fds == 1);
+
+ for (i = 0; i < dev->tgt.nr_backing_files; i++) {
+ char *file = dev->tgt.backing_file[i];
+ unsigned long bytes;
+ struct stat st;
+
+ ublk_dbg(UBLK_DBG_DEV, "%s: file %d: %s\n", __func__, i, file);
+
+ fd = open(file, O_RDWR | O_DIRECT);
+ if (fd < 0) {
+ ublk_err("%s: backing file %s can't be opened: %s\n",
+ __func__, file, strerror(errno));
+ return -EBADF;
+ }
+
+ if (fstat(fd, &st) < 0) {
+ close(fd);
+ return -EBADF;
+ }
+
+ if (S_ISREG(st.st_mode))
+ bytes = st.st_size;
+ else if (S_ISBLK(st.st_mode)) {
+ if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
+ return -1;
+ } else {
+ return -EINVAL;
+ }
+
+ dev->tgt.backing_file_size[i] = bytes;
+ dev->fds[dev->nr_fds] = fd;
+ dev->nr_fds += 1;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/ublk/config b/tools/testing/selftests/ublk/config
new file mode 100644
index 000000000000..592b0ba4d661
--- /dev/null
+++ b/tools/testing/selftests/ublk/config
@@ -0,0 +1 @@
+CONFIG_BLK_DEV_UBLK=m
diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c
new file mode 100644
index 000000000000..6f34eabfae97
--- /dev/null
+++ b/tools/testing/selftests/ublk/file_backed.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "kublk.h"
+
+static enum io_uring_op ublk_to_uring_op(const struct ublksrv_io_desc *iod, int zc)
+{
+ unsigned ublk_op = ublksrv_get_op(iod);
+
+ if (ublk_op == UBLK_IO_OP_READ)
+ return zc ? IORING_OP_READ_FIXED : IORING_OP_READ;
+ else if (ublk_op == UBLK_IO_OP_WRITE)
+ return zc ? IORING_OP_WRITE_FIXED : IORING_OP_WRITE;
+ assert(0);
+}
+
+static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
+{
+ unsigned ublk_op = ublksrv_get_op(iod);
+ struct io_uring_sqe *sqe[1];
+
+ ublk_queue_alloc_sqes(q, sqe, 1);
+ io_uring_prep_fsync(sqe[0], 1 /*fds[1]*/, IORING_FSYNC_DATASYNC);
+ io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
+ /* bit63 marks us as tgt io */
+ sqe[0]->user_data = build_user_data(tag, ublk_op, 0, 1);
+ return 1;
+}
+
+static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
+{
+ unsigned ublk_op = ublksrv_get_op(iod);
+ int zc = ublk_queue_use_zc(q);
+ enum io_uring_op op = ublk_to_uring_op(iod, zc);
+ struct io_uring_sqe *sqe[3];
+
+ if (!zc) {
+ ublk_queue_alloc_sqes(q, sqe, 1);
+ if (!sqe[0])
+ return -ENOMEM;
+
+ io_uring_prep_rw(op, sqe[0], 1 /*fds[1]*/,
+ (void *)iod->addr,
+ iod->nr_sectors << 9,
+ iod->start_sector << 9);
+ io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
+ /* bit63 marks us as tgt io */
+ sqe[0]->user_data = build_user_data(tag, ublk_op, 0, 1);
+ return 1;
+ }
+
+ ublk_queue_alloc_sqes(q, sqe, 3);
+
+ io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag);
+ sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
+ sqe[0]->user_data = build_user_data(tag,
+ ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1);
+
+ io_uring_prep_rw(op, sqe[1], 1 /*fds[1]*/, 0,
+ iod->nr_sectors << 9,
+ iod->start_sector << 9);
+ sqe[1]->buf_index = tag;
+ sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK;
+ sqe[1]->user_data = build_user_data(tag, ublk_op, 0, 1);
+
+ io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, tag);
+ sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, 1);
+
+ return 2;
+}
+
+static int loop_queue_tgt_io(struct ublk_queue *q, int tag)
+{
+ const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
+ unsigned ublk_op = ublksrv_get_op(iod);
+ int ret;
+
+ switch (ublk_op) {
+ case UBLK_IO_OP_FLUSH:
+ ret = loop_queue_flush_io(q, iod, tag);
+ break;
+ case UBLK_IO_OP_WRITE_ZEROES:
+ case UBLK_IO_OP_DISCARD:
+ ret = -ENOTSUP;
+ break;
+ case UBLK_IO_OP_READ:
+ case UBLK_IO_OP_WRITE:
+ ret = loop_queue_tgt_rw_io(q, iod, tag);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag,
+ iod->op_flags, iod->start_sector, iod->nr_sectors << 9);
+ return ret;
+}
+
+static int ublk_loop_queue_io(struct ublk_queue *q, int tag)
+{
+ int queued = loop_queue_tgt_io(q, tag);
+
+ ublk_queued_tgt_io(q, tag, queued);
+ return 0;
+}
+
+static void ublk_loop_io_done(struct ublk_queue *q, int tag,
+ const struct io_uring_cqe *cqe)
+{
+ unsigned op = user_data_to_op(cqe->user_data);
+ struct ublk_io *io = ublk_get_io(q, tag);
+
+ if (cqe->res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) {
+ if (!io->result)
+ io->result = cqe->res;
+ if (cqe->res < 0)
+ ublk_err("%s: io failed op %x user_data %lx\n",
+ __func__, op, cqe->user_data);
+ }
+
+ /* buffer register op is IOSQE_CQE_SKIP_SUCCESS */
+ if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF))
+ io->tgt_ios += 1;
+
+ if (ublk_completed_tgt_io(q, tag))
+ ublk_complete_io(q, tag, io->result);
+}
+
+static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
+{
+ unsigned long long bytes;
+ int ret;
+ struct ublk_params p = {
+ .types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DMA_ALIGN,
+ .basic = {
+ .attrs = UBLK_ATTR_VOLATILE_CACHE,
+ .logical_bs_shift = 9,
+ .physical_bs_shift = 12,
+ .io_opt_shift = 12,
+ .io_min_shift = 9,
+ .max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
+ },
+ .dma = {
+ .alignment = 511,
+ },
+ };
+
+ ret = backing_file_tgt_init(dev);
+ if (ret)
+ return ret;
+
+ if (dev->tgt.nr_backing_files != 1)
+ return -EINVAL;
+
+ bytes = dev->tgt.backing_file_size[0];
+ dev->tgt.dev_size = bytes;
+ p.basic.dev_sectors = bytes >> 9;
+ dev->tgt.params = p;
+
+ return 0;
+}
+
+const struct ublk_tgt_ops loop_tgt_ops = {
+ .name = "loop",
+ .init_tgt = ublk_loop_tgt_init,
+ .deinit_tgt = backing_file_tgt_deinit,
+ .queue_io = ublk_loop_queue_io,
+ .tgt_io_done = ublk_loop_io_done,
+};
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
new file mode 100644
index 000000000000..05147b53c361
--- /dev/null
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -0,0 +1,1138 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: uring_cmd based ublk
+ */
+
+#include "kublk.h"
+
+unsigned int ublk_dbg_mask = UBLK_LOG;
+static const struct ublk_tgt_ops *tgt_ops_list[] = {
+ &null_tgt_ops,
+ &loop_tgt_ops,
+ &stripe_tgt_ops,
+};
+
+static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
+{
+ const struct ublk_tgt_ops *ops;
+ int i;
+
+ if (name == NULL)
+ return NULL;
+
+ for (i = 0; sizeof(tgt_ops_list) / sizeof(ops); i++)
+ if (strcmp(tgt_ops_list[i]->name, name) == 0)
+ return tgt_ops_list[i];
+ return NULL;
+}
+
+static inline int ublk_setup_ring(struct io_uring *r, int depth,
+ int cq_depth, unsigned flags)
+{
+ struct io_uring_params p;
+
+ memset(&p, 0, sizeof(p));
+ p.flags = flags | IORING_SETUP_CQSIZE;
+ p.cq_entries = cq_depth;
+
+ return io_uring_queue_init_params(depth, r, &p);
+}
+
+static void ublk_ctrl_init_cmd(struct ublk_dev *dev,
+ struct io_uring_sqe *sqe,
+ struct ublk_ctrl_cmd_data *data)
+{
+ struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
+ struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe);
+
+ sqe->fd = dev->ctrl_fd;
+ sqe->opcode = IORING_OP_URING_CMD;
+ sqe->ioprio = 0;
+
+ if (data->flags & CTRL_CMD_HAS_BUF) {
+ cmd->addr = data->addr;
+ cmd->len = data->len;
+ }
+
+ if (data->flags & CTRL_CMD_HAS_DATA)
+ cmd->data[0] = data->data[0];
+
+ cmd->dev_id = info->dev_id;
+ cmd->queue_id = -1;
+
+ ublk_set_sqe_cmd_op(sqe, data->cmd_op);
+
+ io_uring_sqe_set_data(sqe, cmd);
+}
+
+static int __ublk_ctrl_cmd(struct ublk_dev *dev,
+ struct ublk_ctrl_cmd_data *data)
+{
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+ int ret = -EINVAL;
+
+ sqe = io_uring_get_sqe(&dev->ring);
+ if (!sqe) {
+ ublk_err("%s: can't get sqe ret %d\n", __func__, ret);
+ return ret;
+ }
+
+ ublk_ctrl_init_cmd(dev, sqe, data);
+
+ ret = io_uring_submit(&dev->ring);
+ if (ret < 0) {
+ ublk_err("uring submit ret %d\n", ret);
+ return ret;
+ }
+
+ ret = io_uring_wait_cqe(&dev->ring, &cqe);
+ if (ret < 0) {
+ ublk_err("wait cqe: %s\n", strerror(-ret));
+ return ret;
+ }
+ io_uring_cqe_seen(&dev->ring, cqe);
+
+ return cqe->res;
+}
+
+static int ublk_ctrl_stop_dev(struct ublk_dev *dev)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_CMD_STOP_DEV,
+ };
+
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
+static int ublk_ctrl_start_dev(struct ublk_dev *dev,
+ int daemon_pid)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_START_DEV,
+ .flags = CTRL_CMD_HAS_DATA,
+ };
+
+ dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
+
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
+static int ublk_ctrl_add_dev(struct ublk_dev *dev)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_ADD_DEV,
+ .flags = CTRL_CMD_HAS_BUF,
+ .addr = (__u64) (uintptr_t) &dev->dev_info,
+ .len = sizeof(struct ublksrv_ctrl_dev_info),
+ };
+
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
+static int ublk_ctrl_del_dev(struct ublk_dev *dev)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_DEL_DEV,
+ .flags = 0,
+ };
+
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
+static int ublk_ctrl_get_info(struct ublk_dev *dev)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_GET_DEV_INFO,
+ .flags = CTRL_CMD_HAS_BUF,
+ .addr = (__u64) (uintptr_t) &dev->dev_info,
+ .len = sizeof(struct ublksrv_ctrl_dev_info),
+ };
+
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
+static int ublk_ctrl_set_params(struct ublk_dev *dev,
+ struct ublk_params *params)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_SET_PARAMS,
+ .flags = CTRL_CMD_HAS_BUF,
+ .addr = (__u64) (uintptr_t) params,
+ .len = sizeof(*params),
+ };
+ params->len = sizeof(*params);
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
+static int ublk_ctrl_get_params(struct ublk_dev *dev,
+ struct ublk_params *params)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_CMD_GET_PARAMS,
+ .flags = CTRL_CMD_HAS_BUF,
+ .addr = (__u64)params,
+ .len = sizeof(*params),
+ };
+
+ params->len = sizeof(*params);
+
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
+static int ublk_ctrl_get_features(struct ublk_dev *dev,
+ __u64 *features)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_GET_FEATURES,
+ .flags = CTRL_CMD_HAS_BUF,
+ .addr = (__u64) (uintptr_t) features,
+ .len = sizeof(*features),
+ };
+
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
+static const char *ublk_dev_state_desc(struct ublk_dev *dev)
+{
+ switch (dev->dev_info.state) {
+ case UBLK_S_DEV_DEAD:
+ return "DEAD";
+ case UBLK_S_DEV_LIVE:
+ return "LIVE";
+ case UBLK_S_DEV_QUIESCED:
+ return "QUIESCED";
+ default:
+ return "UNKNOWN";
+ };
+}
+
+static void ublk_ctrl_dump(struct ublk_dev *dev)
+{
+ struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
+ struct ublk_params p;
+ int ret;
+
+ ret = ublk_ctrl_get_params(dev, &p);
+ if (ret < 0) {
+ ublk_err("failed to get params %m\n");
+ return;
+ }
+
+ ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n",
+ info->dev_id, info->nr_hw_queues, info->queue_depth,
+ 1 << p.basic.logical_bs_shift, p.basic.dev_sectors);
+ ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n",
+ info->max_io_buf_bytes, info->ublksrv_pid, info->flags,
+ ublk_dev_state_desc(dev));
+ fflush(stdout);
+}
+
+static void ublk_ctrl_deinit(struct ublk_dev *dev)
+{
+ close(dev->ctrl_fd);
+ free(dev);
+}
+
+static struct ublk_dev *ublk_ctrl_init(void)
+{
+ struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev));
+ struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
+ int ret;
+
+ dev->ctrl_fd = open(CTRL_DEV, O_RDWR);
+ if (dev->ctrl_fd < 0) {
+ free(dev);
+ return NULL;
+ }
+
+ info->max_io_buf_bytes = UBLK_IO_MAX_BYTES;
+
+ ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH,
+ UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128);
+ if (ret < 0) {
+ ublk_err("queue_init: %s\n", strerror(-ret));
+ free(dev);
+ return NULL;
+ }
+ dev->nr_fds = 1;
+
+ return dev;
+}
+
+static int __ublk_queue_cmd_buf_sz(unsigned depth)
+{
+ int size = depth * sizeof(struct ublksrv_io_desc);
+ unsigned int page_sz = getpagesize();
+
+ return round_up(size, page_sz);
+}
+
+static int ublk_queue_max_cmd_buf_sz(void)
+{
+ return __ublk_queue_cmd_buf_sz(UBLK_MAX_QUEUE_DEPTH);
+}
+
+static int ublk_queue_cmd_buf_sz(struct ublk_queue *q)
+{
+ return __ublk_queue_cmd_buf_sz(q->q_depth);
+}
+
+static void ublk_queue_deinit(struct ublk_queue *q)
+{
+ int i;
+ int nr_ios = q->q_depth;
+
+ io_uring_unregister_buffers(&q->ring);
+
+ io_uring_unregister_ring_fd(&q->ring);
+
+ if (q->ring.ring_fd > 0) {
+ io_uring_unregister_files(&q->ring);
+ close(q->ring.ring_fd);
+ q->ring.ring_fd = -1;
+ }
+
+ if (q->io_cmd_buf)
+ munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q));
+
+ for (i = 0; i < nr_ios; i++)
+ free(q->ios[i].buf_addr);
+}
+
+static int ublk_queue_init(struct ublk_queue *q)
+{
+ struct ublk_dev *dev = q->dev;
+ int depth = dev->dev_info.queue_depth;
+ int i, ret = -1;
+ int cmd_buf_size, io_buf_size;
+ unsigned long off;
+ int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth;
+
+ q->tgt_ops = dev->tgt.ops;
+ q->state = 0;
+ q->q_depth = depth;
+ q->cmd_inflight = 0;
+ q->tid = gettid();
+
+ if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) {
+ q->state |= UBLKSRV_NO_BUF;
+ q->state |= UBLKSRV_ZC;
+ }
+
+ cmd_buf_size = ublk_queue_cmd_buf_sz(q);
+ off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz();
+ q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ,
+ MAP_SHARED | MAP_POPULATE, dev->fds[0], off);
+ if (q->io_cmd_buf == MAP_FAILED) {
+ ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n",
+ q->dev->dev_info.dev_id, q->q_id);
+ goto fail;
+ }
+
+ io_buf_size = dev->dev_info.max_io_buf_bytes;
+ for (i = 0; i < q->q_depth; i++) {
+ q->ios[i].buf_addr = NULL;
+ q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE;
+
+ if (q->state & UBLKSRV_NO_BUF)
+ continue;
+
+ if (posix_memalign((void **)&q->ios[i].buf_addr,
+ getpagesize(), io_buf_size)) {
+ ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n",
+ dev->dev_info.dev_id, q->q_id, i);
+ goto fail;
+ }
+ }
+
+ ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
+ IORING_SETUP_COOP_TASKRUN);
+ if (ret < 0) {
+ ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
+ q->dev->dev_info.dev_id, q->q_id, ret);
+ goto fail;
+ }
+
+ if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) {
+ ret = io_uring_register_buffers_sparse(&q->ring, q->q_depth);
+ if (ret) {
+ ublk_err("ublk dev %d queue %d register spare buffers failed %d",
+ dev->dev_info.dev_id, q->q_id, ret);
+ goto fail;
+ }
+ }
+
+ io_uring_register_ring_fd(&q->ring);
+
+ ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds);
+ if (ret) {
+ ublk_err("ublk dev %d queue %d register files failed %d\n",
+ q->dev->dev_info.dev_id, q->q_id, ret);
+ goto fail;
+ }
+
+ return 0;
+ fail:
+ ublk_queue_deinit(q);
+ ublk_err("ublk dev %d queue %d failed\n",
+ dev->dev_info.dev_id, q->q_id);
+ return -ENOMEM;
+}
+
+#define WAIT_USEC 100000
+#define MAX_WAIT_USEC (3 * 1000000)
+static int ublk_dev_prep(const struct dev_ctx *ctx, struct ublk_dev *dev)
+{
+ int dev_id = dev->dev_info.dev_id;
+ unsigned int wait_usec = 0;
+ int ret = 0, fd = -1;
+ char buf[64];
+
+ snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id);
+
+ while (wait_usec < MAX_WAIT_USEC) {
+ fd = open(buf, O_RDWR);
+ if (fd >= 0)
+ break;
+ usleep(WAIT_USEC);
+ wait_usec += WAIT_USEC;
+ }
+ if (fd < 0) {
+ ublk_err("can't open %s %s\n", buf, strerror(errno));
+ return -1;
+ }
+
+ dev->fds[0] = fd;
+ if (dev->tgt.ops->init_tgt)
+ ret = dev->tgt.ops->init_tgt(ctx, dev);
+ if (ret)
+ close(dev->fds[0]);
+ return ret;
+}
+
+static void ublk_dev_unprep(struct ublk_dev *dev)
+{
+ if (dev->tgt.ops->deinit_tgt)
+ dev->tgt.ops->deinit_tgt(dev);
+ close(dev->fds[0]);
+}
+
+int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag)
+{
+ struct ublksrv_io_cmd *cmd;
+ struct io_uring_sqe *sqe[1];
+ unsigned int cmd_op = 0;
+ __u64 user_data;
+
+ /* only freed io can be issued */
+ if (!(io->flags & UBLKSRV_IO_FREE))
+ return 0;
+
+ /* we issue because we need either fetching or committing */
+ if (!(io->flags &
+ (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP)))
+ return 0;
+
+ if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
+ cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
+ else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
+ cmd_op = UBLK_U_IO_FETCH_REQ;
+
+ if (io_uring_sq_space_left(&q->ring) < 1)
+ io_uring_submit(&q->ring);
+
+ ublk_queue_alloc_sqes(q, sqe, 1);
+ if (!sqe[0]) {
+ ublk_err("%s: run out of sqe %d, tag %d\n",
+ __func__, q->q_id, tag);
+ return -1;
+ }
+
+ cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe[0]);
+
+ if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ)
+ cmd->result = io->result;
+
+ /* These fields should be written once, never change */
+ ublk_set_sqe_cmd_op(sqe[0], cmd_op);
+ sqe[0]->fd = 0; /* dev->fds[0] */
+ sqe[0]->opcode = IORING_OP_URING_CMD;
+ sqe[0]->flags = IOSQE_FIXED_FILE;
+ sqe[0]->rw_flags = 0;
+ cmd->tag = tag;
+ cmd->q_id = q->q_id;
+ if (!(q->state & UBLKSRV_NO_BUF))
+ cmd->addr = (__u64) (uintptr_t) io->buf_addr;
+ else
+ cmd->addr = 0;
+
+ user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0);
+ io_uring_sqe_set_data64(sqe[0], user_data);
+
+ io->flags = 0;
+
+ q->cmd_inflight += 1;
+
+ ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n",
+ __func__, q->q_id, tag, cmd_op,
+ io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING));
+ return 1;
+}
+
+static void ublk_submit_fetch_commands(struct ublk_queue *q)
+{
+ int i = 0;
+
+ for (i = 0; i < q->q_depth; i++)
+ ublk_queue_io_cmd(q, &q->ios[i], i);
+}
+
+static int ublk_queue_is_idle(struct ublk_queue *q)
+{
+ return !io_uring_sq_ready(&q->ring) && !q->io_inflight;
+}
+
+static int ublk_queue_is_done(struct ublk_queue *q)
+{
+ return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q);
+}
+
+static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
+ struct io_uring_cqe *cqe)
+{
+ unsigned tag = user_data_to_tag(cqe->user_data);
+
+ if (cqe->res < 0 && cqe->res != -EAGAIN)
+ ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n",
+ __func__, cqe->res, q->q_id,
+ user_data_to_tag(cqe->user_data),
+ user_data_to_op(cqe->user_data));
+
+ if (q->tgt_ops->tgt_io_done)
+ q->tgt_ops->tgt_io_done(q, tag, cqe);
+}
+
+static void ublk_handle_cqe(struct io_uring *r,
+ struct io_uring_cqe *cqe, void *data)
+{
+ struct ublk_queue *q = container_of(r, struct ublk_queue, ring);
+ unsigned tag = user_data_to_tag(cqe->user_data);
+ unsigned cmd_op = user_data_to_op(cqe->user_data);
+ int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
+ !(q->state & UBLKSRV_QUEUE_STOPPING);
+ struct ublk_io *io;
+
+ if (cqe->res < 0 && cqe->res != -ENODEV)
+ ublk_err("%s: res %d userdata %llx queue state %x\n", __func__,
+ cqe->res, cqe->user_data, q->state);
+
+ ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d/%d) stopping %d\n",
+ __func__, cqe->res, q->q_id, tag, cmd_op,
+ is_target_io(cqe->user_data),
+ user_data_to_tgt_data(cqe->user_data),
+ (q->state & UBLKSRV_QUEUE_STOPPING));
+
+ /* Don't retrieve io in case of target io */
+ if (is_target_io(cqe->user_data)) {
+ ublksrv_handle_tgt_cqe(q, cqe);
+ return;
+ }
+
+ io = &q->ios[tag];
+ q->cmd_inflight--;
+
+ if (!fetch) {
+ q->state |= UBLKSRV_QUEUE_STOPPING;
+ io->flags &= ~UBLKSRV_NEED_FETCH_RQ;
+ }
+
+ if (cqe->res == UBLK_IO_RES_OK) {
+ assert(tag < q->q_depth);
+ if (q->tgt_ops->queue_io)
+ q->tgt_ops->queue_io(q, tag);
+ } else {
+ /*
+ * COMMIT_REQ will be completed immediately since no fetching
+ * piggyback is required.
+ *
+ * Marking IO_FREE only, then this io won't be issued since
+ * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*)
+ *
+ * */
+ io->flags = UBLKSRV_IO_FREE;
+ }
+}
+
+static int ublk_reap_events_uring(struct io_uring *r)
+{
+ struct io_uring_cqe *cqe;
+ unsigned head;
+ int count = 0;
+
+ io_uring_for_each_cqe(r, head, cqe) {
+ ublk_handle_cqe(r, cqe, NULL);
+ count += 1;
+ }
+ io_uring_cq_advance(r, count);
+
+ return count;
+}
+
+static int ublk_process_io(struct ublk_queue *q)
+{
+ int ret, reapped;
+
+ ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n",
+ q->dev->dev_info.dev_id,
+ q->q_id, io_uring_sq_ready(&q->ring),
+ q->cmd_inflight,
+ (q->state & UBLKSRV_QUEUE_STOPPING));
+
+ if (ublk_queue_is_done(q))
+ return -ENODEV;
+
+ ret = io_uring_submit_and_wait(&q->ring, 1);
+ reapped = ublk_reap_events_uring(&q->ring);
+
+ ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n",
+ ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING),
+ (q->state & UBLKSRV_QUEUE_IDLE));
+
+ return reapped;
+}
+
+static void *ublk_io_handler_fn(void *data)
+{
+ struct ublk_queue *q = data;
+ int dev_id = q->dev->dev_info.dev_id;
+ int ret;
+
+ ret = ublk_queue_init(q);
+ if (ret) {
+ ublk_err("ublk dev %d queue %d init queue failed\n",
+ dev_id, q->q_id);
+ return NULL;
+ }
+ ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
+ q->tid, dev_id, q->q_id);
+
+ /* submit all io commands to ublk driver */
+ ublk_submit_fetch_commands(q);
+ do {
+ if (ublk_process_io(q) < 0)
+ break;
+ } while (1);
+
+ ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id);
+ ublk_queue_deinit(q);
+ return NULL;
+}
+
+static void ublk_set_parameters(struct ublk_dev *dev)
+{
+ int ret;
+
+ ret = ublk_ctrl_set_params(dev, &dev->tgt.params);
+ if (ret)
+ ublk_err("dev %d set basic parameter failed %d\n",
+ dev->dev_info.dev_id, ret);
+}
+
+static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id)
+{
+ uint64_t id;
+ int evtfd = ctx->_evtfd;
+
+ if (evtfd < 0)
+ return -EBADF;
+
+ if (dev_id >= 0)
+ id = dev_id + 1;
+ else
+ id = ERROR_EVTFD_DEVID;
+
+ if (write(evtfd, &id, sizeof(id)) != sizeof(id))
+ return -EINVAL;
+
+ return 0;
+}
+
+
+static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
+{
+ int ret, i;
+ void *thread_ret;
+ const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
+
+ ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
+
+ ret = ublk_dev_prep(ctx, dev);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < dinfo->nr_hw_queues; i++) {
+ dev->q[i].dev = dev;
+ dev->q[i].q_id = i;
+ pthread_create(&dev->q[i].thread, NULL,
+ ublk_io_handler_fn,
+ &dev->q[i]);
+ }
+
+ /* everything is fine now, start us */
+ ublk_set_parameters(dev);
+ ret = ublk_ctrl_start_dev(dev, getpid());
+ if (ret < 0) {
+ ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
+ goto fail;
+ }
+
+ ublk_ctrl_get_info(dev);
+ if (ctx->fg)
+ ublk_ctrl_dump(dev);
+ else
+ ublk_send_dev_event(ctx, dev->dev_info.dev_id);
+
+ /* wait until we are terminated */
+ for (i = 0; i < dinfo->nr_hw_queues; i++)
+ pthread_join(dev->q[i].thread, &thread_ret);
+ fail:
+ ublk_dev_unprep(dev);
+ ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__);
+
+ return ret;
+}
+
+static int wait_ublk_dev(const char *path, int evt_mask, unsigned timeout)
+{
+#define EV_SIZE (sizeof(struct inotify_event))
+#define EV_BUF_LEN (128 * (EV_SIZE + 16))
+ struct pollfd pfd;
+ int fd, wd;
+ int ret = -EINVAL;
+ const char *dev_name = basename(path);
+
+ fd = inotify_init();
+ if (fd < 0) {
+ ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__);
+ return fd;
+ }
+
+ wd = inotify_add_watch(fd, "/dev", evt_mask);
+ if (wd == -1) {
+ ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__);
+ goto fail;
+ }
+
+ pfd.fd = fd;
+ pfd.events = POLL_IN;
+ while (1) {
+ int i = 0;
+ char buffer[EV_BUF_LEN];
+ ret = poll(&pfd, 1, 1000 * timeout);
+
+ if (ret == -1) {
+ ublk_err("%s: poll inotify failed: %d\n", __func__, ret);
+ goto rm_watch;
+ } else if (ret == 0) {
+ ublk_err("%s: poll inotify timeout\n", __func__);
+ ret = -ETIMEDOUT;
+ goto rm_watch;
+ }
+
+ ret = read(fd, buffer, EV_BUF_LEN);
+ if (ret < 0) {
+ ublk_err("%s: read inotify fd failed\n", __func__);
+ goto rm_watch;
+ }
+
+ while (i < ret) {
+ struct inotify_event *event = (struct inotify_event *)&buffer[i];
+
+ ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n",
+ __func__, event->mask, event->name);
+ if (event->mask & evt_mask) {
+ if (!strcmp(event->name, dev_name)) {
+ ret = 0;
+ goto rm_watch;
+ }
+ }
+ i += EV_SIZE + event->len;
+ }
+ }
+rm_watch:
+ inotify_rm_watch(fd, wd);
+fail:
+ close(fd);
+ return ret;
+}
+
+static int ublk_stop_io_daemon(const struct ublk_dev *dev)
+{
+ int daemon_pid = dev->dev_info.ublksrv_pid;
+ int dev_id = dev->dev_info.dev_id;
+ char ublkc[64];
+ int ret = 0;
+
+ if (daemon_pid < 0)
+ return 0;
+
+ /* daemon may be dead already */
+ if (kill(daemon_pid, 0) < 0)
+ goto wait;
+
+ snprintf(ublkc, sizeof(ublkc), "/dev/%s%d", "ublkc", dev_id);
+
+ /* ublk char device may be gone already */
+ if (access(ublkc, F_OK) != 0)
+ goto wait;
+
+ /* Wait until ublk char device is closed, when the daemon is shutdown */
+ ret = wait_ublk_dev(ublkc, IN_CLOSE, 10);
+ /* double check and since it may be closed before starting inotify */
+ if (ret == -ETIMEDOUT)
+ ret = kill(daemon_pid, 0) < 0;
+wait:
+ waitpid(daemon_pid, NULL, 0);
+ ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n",
+ __func__, daemon_pid, dev_id, ret);
+
+ return ret;
+}
+
+static int __cmd_dev_add(const struct dev_ctx *ctx)
+{
+ unsigned nr_queues = ctx->nr_hw_queues;
+ const char *tgt_type = ctx->tgt_type;
+ unsigned depth = ctx->queue_depth;
+ __u64 features;
+ const struct ublk_tgt_ops *ops;
+ struct ublksrv_ctrl_dev_info *info;
+ struct ublk_dev *dev;
+ int dev_id = ctx->dev_id;
+ int ret, i;
+
+ ops = ublk_find_tgt(tgt_type);
+ if (!ops) {
+ ublk_err("%s: no such tgt type, type %s\n",
+ __func__, tgt_type);
+ return -ENODEV;
+ }
+
+ if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) {
+ ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n",
+ __func__, nr_queues, depth);
+ return -EINVAL;
+ }
+
+ dev = ublk_ctrl_init();
+ if (!dev) {
+ ublk_err("%s: can't alloc dev id %d, type %s\n",
+ __func__, dev_id, tgt_type);
+ return -ENOMEM;
+ }
+
+ /* kernel doesn't support get_features */
+ ret = ublk_ctrl_get_features(dev, &features);
+ if (ret < 0)
+ return -EINVAL;
+
+ if (!(features & UBLK_F_CMD_IOCTL_ENCODE))
+ return -ENOTSUP;
+
+ info = &dev->dev_info;
+ info->dev_id = ctx->dev_id;
+ info->nr_hw_queues = nr_queues;
+ info->queue_depth = depth;
+ info->flags = ctx->flags;
+ dev->tgt.ops = ops;
+ dev->tgt.sq_depth = depth;
+ dev->tgt.cq_depth = depth;
+
+ for (i = 0; i < MAX_BACK_FILES; i++) {
+ if (ctx->files[i]) {
+ strcpy(dev->tgt.backing_file[i], ctx->files[i]);
+ dev->tgt.nr_backing_files++;
+ }
+ }
+
+ ret = ublk_ctrl_add_dev(dev);
+ if (ret < 0) {
+ ublk_err("%s: can't add dev id %d, type %s ret %d\n",
+ __func__, dev_id, tgt_type, ret);
+ goto fail;
+ }
+
+ ret = ublk_start_daemon(ctx, dev);
+ ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\b", ret);
+ if (ret < 0)
+ ublk_ctrl_del_dev(dev);
+
+fail:
+ if (ret < 0)
+ ublk_send_dev_event(ctx, -1);
+ ublk_ctrl_deinit(dev);
+ return ret;
+}
+
+static int __cmd_dev_list(struct dev_ctx *ctx);
+
+static int cmd_dev_add(struct dev_ctx *ctx)
+{
+ int res;
+
+ if (ctx->fg)
+ goto run;
+
+ ctx->_evtfd = eventfd(0, 0);
+ if (ctx->_evtfd < 0) {
+ ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno));
+ exit(-1);
+ }
+
+ setsid();
+ res = fork();
+ if (res == 0) {
+run:
+ res = __cmd_dev_add(ctx);
+ return res;
+ } else if (res > 0) {
+ uint64_t id;
+
+ res = read(ctx->_evtfd, &id, sizeof(id));
+ close(ctx->_evtfd);
+ if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) {
+ ctx->dev_id = id - 1;
+ return __cmd_dev_list(ctx);
+ }
+ exit(EXIT_FAILURE);
+ } else {
+ return res;
+ }
+}
+
+static int __cmd_dev_del(struct dev_ctx *ctx)
+{
+ int number = ctx->dev_id;
+ struct ublk_dev *dev;
+ int ret;
+
+ dev = ublk_ctrl_init();
+ dev->dev_info.dev_id = number;
+
+ ret = ublk_ctrl_get_info(dev);
+ if (ret < 0)
+ goto fail;
+
+ ret = ublk_ctrl_stop_dev(dev);
+ if (ret < 0)
+ ublk_err("%s: stop dev %d failed ret %d\n", __func__, number, ret);
+
+ ret = ublk_stop_io_daemon(dev);
+ if (ret < 0)
+ ublk_err("%s: stop daemon id %d dev %d, ret %d\n",
+ __func__, dev->dev_info.ublksrv_pid, number, ret);
+ ublk_ctrl_del_dev(dev);
+fail:
+ ublk_ctrl_deinit(dev);
+
+ return (ret >= 0) ? 0 : ret;
+}
+
+static int cmd_dev_del(struct dev_ctx *ctx)
+{
+ int i;
+
+ if (ctx->dev_id >= 0 || !ctx->all)
+ return __cmd_dev_del(ctx);
+
+ for (i = 0; i < 255; i++) {
+ ctx->dev_id = i;
+ __cmd_dev_del(ctx);
+ }
+ return 0;
+}
+
+static int __cmd_dev_list(struct dev_ctx *ctx)
+{
+ struct ublk_dev *dev = ublk_ctrl_init();
+ int ret;
+
+ if (!dev)
+ return -ENODEV;
+
+ dev->dev_info.dev_id = ctx->dev_id;
+
+ ret = ublk_ctrl_get_info(dev);
+ if (ret < 0) {
+ if (ctx->logging)
+ ublk_err("%s: can't get dev info from %d: %d\n",
+ __func__, ctx->dev_id, ret);
+ } else {
+ ublk_ctrl_dump(dev);
+ }
+
+ ublk_ctrl_deinit(dev);
+
+ return ret;
+}
+
+static int cmd_dev_list(struct dev_ctx *ctx)
+{
+ int i;
+
+ if (ctx->dev_id >= 0 || !ctx->all)
+ return __cmd_dev_list(ctx);
+
+ ctx->logging = false;
+ for (i = 0; i < 255; i++) {
+ ctx->dev_id = i;
+ __cmd_dev_list(ctx);
+ }
+ return 0;
+}
+
+static int cmd_dev_get_features(void)
+{
+#define const_ilog2(x) (63 - __builtin_clzll(x))
+ static const char *feat_map[] = {
+ [const_ilog2(UBLK_F_SUPPORT_ZERO_COPY)] = "ZERO_COPY",
+ [const_ilog2(UBLK_F_URING_CMD_COMP_IN_TASK)] = "COMP_IN_TASK",
+ [const_ilog2(UBLK_F_NEED_GET_DATA)] = "GET_DATA",
+ [const_ilog2(UBLK_F_USER_RECOVERY)] = "USER_RECOVERY",
+ [const_ilog2(UBLK_F_USER_RECOVERY_REISSUE)] = "RECOVERY_REISSUE",
+ [const_ilog2(UBLK_F_UNPRIVILEGED_DEV)] = "UNPRIVILEGED_DEV",
+ [const_ilog2(UBLK_F_CMD_IOCTL_ENCODE)] = "CMD_IOCTL_ENCODE",
+ [const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY",
+ [const_ilog2(UBLK_F_ZONED)] = "ZONED",
+ [const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO",
+ };
+ struct ublk_dev *dev;
+ __u64 features = 0;
+ int ret;
+
+ dev = ublk_ctrl_init();
+ if (!dev) {
+ fprintf(stderr, "ublksrv_ctrl_init failed id\n");
+ return -EOPNOTSUPP;
+ }
+
+ ret = ublk_ctrl_get_features(dev, &features);
+ if (!ret) {
+ int i;
+
+ printf("ublk_drv features: 0x%llx\n", features);
+
+ for (i = 0; i < sizeof(features) * 8; i++) {
+ const char *feat;
+
+ if (!((1ULL << i) & features))
+ continue;
+ if (i < sizeof(feat_map) / sizeof(feat_map[0]))
+ feat = feat_map[i];
+ else
+ feat = "unknown";
+ printf("\t%-20s: 0x%llx\n", feat, 1ULL << i);
+ }
+ }
+
+ return ret;
+}
+
+static int cmd_dev_help(char *exe)
+{
+ printf("%s add -t [null|loop] [-q nr_queues] [-d depth] [-n dev_id] [backfile1] [backfile2] ...\n", exe);
+ printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n");
+ printf("%s del [-n dev_id] -a \n", exe);
+ printf("\t -a delete all devices -n delete specified device\n");
+ printf("%s list [-n dev_id] -a \n", exe);
+ printf("\t -a list all devices, -n list specified device, default -a \n");
+ printf("%s features\n", exe);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "all", 0, NULL, 'a' },
+ { "type", 1, NULL, 't' },
+ { "number", 1, NULL, 'n' },
+ { "queues", 1, NULL, 'q' },
+ { "depth", 1, NULL, 'd' },
+ { "debug_mask", 1, NULL, 0 },
+ { "quiet", 0, NULL, 0 },
+ { "zero_copy", 0, NULL, 'z' },
+ { "foreground", 0, NULL, 0 },
+ { "chunk_size", 1, NULL, 0 },
+ { 0, 0, 0, 0 }
+ };
+ int option_idx, opt;
+ const char *cmd = argv[1];
+ struct dev_ctx ctx = {
+ .queue_depth = 128,
+ .nr_hw_queues = 2,
+ .dev_id = -1,
+ .tgt_type = "unknown",
+ .chunk_size = 65536, /* def chunk size is 64K */
+ };
+ int ret = -EINVAL, i;
+
+ if (argc == 1)
+ return ret;
+
+ optind = 2;
+ while ((opt = getopt_long(argc, argv, "t:n:d:q:az",
+ longopts, &option_idx)) != -1) {
+ switch (opt) {
+ case 'a':
+ ctx.all = 1;
+ break;
+ case 'n':
+ ctx.dev_id = strtol(optarg, NULL, 10);
+ break;
+ case 't':
+ if (strlen(optarg) < sizeof(ctx.tgt_type))
+ strcpy(ctx.tgt_type, optarg);
+ break;
+ case 'q':
+ ctx.nr_hw_queues = strtol(optarg, NULL, 10);
+ break;
+ case 'd':
+ ctx.queue_depth = strtol(optarg, NULL, 10);
+ break;
+ case 'z':
+ ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY;
+ break;
+ case 0:
+ if (!strcmp(longopts[option_idx].name, "debug_mask"))
+ ublk_dbg_mask = strtol(optarg, NULL, 16);
+ if (!strcmp(longopts[option_idx].name, "quiet"))
+ ublk_dbg_mask = 0;
+ if (!strcmp(longopts[option_idx].name, "foreground"))
+ ctx.fg = 1;
+ if (!strcmp(longopts[option_idx].name, "chunk_size"))
+ ctx.chunk_size = strtol(optarg, NULL, 10);
+ }
+ }
+
+ i = optind;
+ while (i < argc && ctx.nr_files < MAX_BACK_FILES) {
+ ctx.files[ctx.nr_files++] = argv[i++];
+ }
+
+ if (!strcmp(cmd, "add"))
+ ret = cmd_dev_add(&ctx);
+ else if (!strcmp(cmd, "del"))
+ ret = cmd_dev_del(&ctx);
+ else if (!strcmp(cmd, "list")) {
+ ctx.all = 1;
+ ret = cmd_dev_list(&ctx);
+ } else if (!strcmp(cmd, "help"))
+ ret = cmd_dev_help(argv[0]);
+ else if (!strcmp(cmd, "features"))
+ ret = cmd_dev_get_features();
+ else
+ cmd_dev_help(argv[0]);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
new file mode 100644
index 000000000000..f31a5c4d4143
--- /dev/null
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -0,0 +1,370 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef KUBLK_INTERNAL_H
+#define KUBLK_INTERNAL_H
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <pthread.h>
+#include <getopt.h>
+#include <limits.h>
+#include <poll.h>
+#include <fcntl.h>
+#include <sys/syscall.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/inotify.h>
+#include <sys/wait.h>
+#include <sys/eventfd.h>
+#include <sys/uio.h>
+#include <liburing.h>
+#include <linux/ublk_cmd.h>
+#include "ublk_dep.h"
+
+#define __maybe_unused __attribute__((unused))
+#define MAX_BACK_FILES 4
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+/****************** part 1: libublk ********************/
+
+#define CTRL_DEV "/dev/ublk-control"
+#define UBLKC_DEV "/dev/ublkc"
+#define UBLKB_DEV "/dev/ublkb"
+#define UBLK_CTRL_RING_DEPTH 32
+#define ERROR_EVTFD_DEVID -2
+
+/* queue idle timeout */
+#define UBLKSRV_IO_IDLE_SECS 20
+
+#define UBLK_IO_MAX_BYTES (1 << 20)
+#define UBLK_MAX_QUEUES 4
+#define UBLK_QUEUE_DEPTH 128
+
+#define UBLK_DBG_DEV (1U << 0)
+#define UBLK_DBG_QUEUE (1U << 1)
+#define UBLK_DBG_IO_CMD (1U << 2)
+#define UBLK_DBG_IO (1U << 3)
+#define UBLK_DBG_CTRL_CMD (1U << 4)
+#define UBLK_LOG (1U << 5)
+
+struct ublk_dev;
+struct ublk_queue;
+
+struct dev_ctx {
+ char tgt_type[16];
+ unsigned long flags;
+ unsigned nr_hw_queues;
+ unsigned queue_depth;
+ int dev_id;
+ int nr_files;
+ char *files[MAX_BACK_FILES];
+ unsigned int logging:1;
+ unsigned int all:1;
+ unsigned int fg:1;
+
+ /* stripe */
+ unsigned int chunk_size;
+
+ int _evtfd;
+};
+
+struct ublk_ctrl_cmd_data {
+ __u32 cmd_op;
+#define CTRL_CMD_HAS_DATA 1
+#define CTRL_CMD_HAS_BUF 2
+ __u32 flags;
+
+ __u64 data[2];
+ __u64 addr;
+ __u32 len;
+};
+
+struct ublk_io {
+ char *buf_addr;
+
+#define UBLKSRV_NEED_FETCH_RQ (1UL << 0)
+#define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1)
+#define UBLKSRV_IO_FREE (1UL << 2)
+ unsigned short flags;
+ unsigned short refs; /* used by target code only */
+
+ int result;
+
+ unsigned short tgt_ios;
+ void *private_data;
+};
+
+struct ublk_tgt_ops {
+ const char *name;
+ int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *);
+ void (*deinit_tgt)(struct ublk_dev *);
+
+ int (*queue_io)(struct ublk_queue *, int tag);
+ void (*tgt_io_done)(struct ublk_queue *,
+ int tag, const struct io_uring_cqe *);
+};
+
+struct ublk_tgt {
+ unsigned long dev_size;
+ unsigned int sq_depth;
+ unsigned int cq_depth;
+ const struct ublk_tgt_ops *ops;
+ struct ublk_params params;
+
+ int nr_backing_files;
+ unsigned long backing_file_size[MAX_BACK_FILES];
+ char backing_file[MAX_BACK_FILES][PATH_MAX];
+};
+
+struct ublk_queue {
+ int q_id;
+ int q_depth;
+ unsigned int cmd_inflight;
+ unsigned int io_inflight;
+ struct ublk_dev *dev;
+ const struct ublk_tgt_ops *tgt_ops;
+ char *io_cmd_buf;
+ struct io_uring ring;
+ struct ublk_io ios[UBLK_QUEUE_DEPTH];
+#define UBLKSRV_QUEUE_STOPPING (1U << 0)
+#define UBLKSRV_QUEUE_IDLE (1U << 1)
+#define UBLKSRV_NO_BUF (1U << 2)
+#define UBLKSRV_ZC (1U << 3)
+ unsigned state;
+ pid_t tid;
+ pthread_t thread;
+};
+
+struct ublk_dev {
+ struct ublk_tgt tgt;
+ struct ublksrv_ctrl_dev_info dev_info;
+ struct ublk_queue q[UBLK_MAX_QUEUES];
+
+ int fds[MAX_BACK_FILES + 1]; /* fds[0] points to /dev/ublkcN */
+ int nr_fds;
+ int ctrl_fd;
+ struct io_uring ring;
+
+ void *private_data;
+};
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
+#endif
+
+#ifndef container_of
+#define container_of(ptr, type, member) ({ \
+ unsigned long __mptr = (unsigned long)(ptr); \
+ ((type *)(__mptr - offsetof(type, member))); })
+#endif
+
+#define round_up(val, rnd) \
+ (((val) + ((rnd) - 1)) & ~((rnd) - 1))
+
+
+extern unsigned int ublk_dbg_mask;
+extern int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag);
+
+static inline int is_target_io(__u64 user_data)
+{
+ return (user_data & (1ULL << 63)) != 0;
+}
+
+static inline __u64 build_user_data(unsigned tag, unsigned op,
+ unsigned tgt_data, unsigned is_target_io)
+{
+ assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16));
+
+ return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63;
+}
+
+static inline unsigned int user_data_to_tag(__u64 user_data)
+{
+ return user_data & 0xffff;
+}
+
+static inline unsigned int user_data_to_op(__u64 user_data)
+{
+ return (user_data >> 16) & 0xff;
+}
+
+static inline unsigned int user_data_to_tgt_data(__u64 user_data)
+{
+ return (user_data >> 24) & 0xffff;
+}
+
+static inline unsigned short ublk_cmd_op_nr(unsigned int op)
+{
+ return _IOC_NR(op);
+}
+
+static inline void ublk_err(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+}
+
+static inline void ublk_log(const char *fmt, ...)
+{
+ if (ublk_dbg_mask & UBLK_LOG) {
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stdout, fmt, ap);
+ }
+}
+
+static inline void ublk_dbg(int level, const char *fmt, ...)
+{
+ if (level & ublk_dbg_mask) {
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stdout, fmt, ap);
+ }
+}
+
+static inline int ublk_queue_alloc_sqes(struct ublk_queue *q,
+ struct io_uring_sqe *sqes[], int nr_sqes)
+{
+ unsigned left = io_uring_sq_space_left(&q->ring);
+ int i;
+
+ if (left < nr_sqes)
+ io_uring_submit(&q->ring);
+
+ for (i = 0; i < nr_sqes; i++) {
+ sqes[i] = io_uring_get_sqe(&q->ring);
+ if (!sqes[i])
+ return i;
+ }
+
+ return nr_sqes;
+}
+
+static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe,
+ int dev_fd, int tag, int q_id, __u64 index)
+{
+ struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd;
+
+ io_uring_prep_read(sqe, dev_fd, 0, 0, 0);
+ sqe->opcode = IORING_OP_URING_CMD;
+ sqe->flags |= IOSQE_FIXED_FILE;
+ sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF;
+
+ cmd->tag = tag;
+ cmd->addr = index;
+ cmd->q_id = q_id;
+}
+
+static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe,
+ int dev_fd, int tag, int q_id, __u64 index)
+{
+ struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd;
+
+ io_uring_prep_read(sqe, dev_fd, 0, 0, 0);
+ sqe->opcode = IORING_OP_URING_CMD;
+ sqe->flags |= IOSQE_FIXED_FILE;
+ sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF;
+
+ cmd->tag = tag;
+ cmd->addr = index;
+ cmd->q_id = q_id;
+}
+
+static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
+{
+ return (void *)&sqe->cmd;
+}
+
+static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res)
+{
+ q->ios[tag].result = res;
+}
+
+static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag)
+{
+ return q->ios[tag].result;
+}
+
+static inline void ublk_mark_io_done(struct ublk_io *io, int res)
+{
+ io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE);
+ io->result = res;
+}
+
+static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag)
+{
+ return (struct ublksrv_io_desc *)&(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]);
+}
+
+static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op)
+{
+ __u32 *addr = (__u32 *)&sqe->off;
+
+ addr[0] = cmd_op;
+ addr[1] = 0;
+}
+
+static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag)
+{
+ return &q->ios[tag];
+}
+
+static inline int ublk_complete_io(struct ublk_queue *q, unsigned tag, int res)
+{
+ struct ublk_io *io = &q->ios[tag];
+
+ ublk_mark_io_done(io, res);
+
+ return ublk_queue_io_cmd(q, io, tag);
+}
+
+static inline void ublk_queued_tgt_io(struct ublk_queue *q, unsigned tag, int queued)
+{
+ if (queued < 0)
+ ublk_complete_io(q, tag, queued);
+ else {
+ struct ublk_io *io = ublk_get_io(q, tag);
+
+ q->io_inflight += queued;
+ io->tgt_ios = queued;
+ io->result = 0;
+ }
+}
+
+static inline int ublk_completed_tgt_io(struct ublk_queue *q, unsigned tag)
+{
+ struct ublk_io *io = ublk_get_io(q, tag);
+
+ q->io_inflight--;
+
+ return --io->tgt_ios == 0;
+}
+
+static inline int ublk_queue_use_zc(const struct ublk_queue *q)
+{
+ return q->state & UBLKSRV_ZC;
+}
+
+extern const struct ublk_tgt_ops null_tgt_ops;
+extern const struct ublk_tgt_ops loop_tgt_ops;
+extern const struct ublk_tgt_ops stripe_tgt_ops;
+
+void backing_file_tgt_deinit(struct ublk_dev *dev);
+int backing_file_tgt_init(struct ublk_dev *dev);
+
+static inline unsigned int ilog2(unsigned int x)
+{
+ if (x == 0)
+ return 0;
+ return (sizeof(x) * 8 - 1) - __builtin_clz(x);
+}
+#endif
diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c
new file mode 100644
index 000000000000..899875ff50fe
--- /dev/null
+++ b/tools/testing/selftests/ublk/null.c
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "kublk.h"
+
+#ifndef IORING_NOP_INJECT_RESULT
+#define IORING_NOP_INJECT_RESULT (1U << 0)
+#endif
+
+#ifndef IORING_NOP_FIXED_BUFFER
+#define IORING_NOP_FIXED_BUFFER (1U << 3)
+#endif
+
+static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
+{
+ const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
+ unsigned long dev_size = 250UL << 30;
+
+ dev->tgt.dev_size = dev_size;
+ dev->tgt.params = (struct ublk_params) {
+ .types = UBLK_PARAM_TYPE_BASIC,
+ .basic = {
+ .logical_bs_shift = 9,
+ .physical_bs_shift = 12,
+ .io_opt_shift = 12,
+ .io_min_shift = 9,
+ .max_sectors = info->max_io_buf_bytes >> 9,
+ .dev_sectors = dev_size >> 9,
+ },
+ };
+
+ if (info->flags & UBLK_F_SUPPORT_ZERO_COPY)
+ dev->tgt.sq_depth = dev->tgt.cq_depth = 2 * info->queue_depth;
+ return 0;
+}
+
+static int null_queue_zc_io(struct ublk_queue *q, int tag)
+{
+ const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
+ unsigned ublk_op = ublksrv_get_op(iod);
+ struct io_uring_sqe *sqe[3];
+
+ ublk_queue_alloc_sqes(q, sqe, 3);
+
+ io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag);
+ sqe[0]->user_data = build_user_data(tag,
+ ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1);
+ sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
+
+ io_uring_prep_nop(sqe[1]);
+ sqe[1]->buf_index = tag;
+ sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK;
+ sqe[1]->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT;
+ sqe[1]->len = iod->nr_sectors << 9; /* injected result */
+ sqe[1]->user_data = build_user_data(tag, ublk_op, 0, 1);
+
+ io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, tag);
+ sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, 1);
+
+ // buf register is marked as IOSQE_CQE_SKIP_SUCCESS
+ return 2;
+}
+
+static void ublk_null_io_done(struct ublk_queue *q, int tag,
+ const struct io_uring_cqe *cqe)
+{
+ unsigned op = user_data_to_op(cqe->user_data);
+ struct ublk_io *io = ublk_get_io(q, tag);
+
+ if (cqe->res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) {
+ if (!io->result)
+ io->result = cqe->res;
+ if (cqe->res < 0)
+ ublk_err("%s: io failed op %x user_data %lx\n",
+ __func__, op, cqe->user_data);
+ }
+
+ /* buffer register op is IOSQE_CQE_SKIP_SUCCESS */
+ if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF))
+ io->tgt_ios += 1;
+
+ if (ublk_completed_tgt_io(q, tag))
+ ublk_complete_io(q, tag, io->result);
+}
+
+static int ublk_null_queue_io(struct ublk_queue *q, int tag)
+{
+ const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
+ int zc = ublk_queue_use_zc(q);
+ int queued;
+
+ if (!zc) {
+ ublk_complete_io(q, tag, iod->nr_sectors << 9);
+ return 0;
+ }
+
+ queued = null_queue_zc_io(q, tag);
+ ublk_queued_tgt_io(q, tag, queued);
+ return 0;
+}
+
+const struct ublk_tgt_ops null_tgt_ops = {
+ .name = "null",
+ .init_tgt = ublk_null_tgt_init,
+ .queue_io = ublk_null_queue_io,
+ .tgt_io_done = ublk_null_io_done,
+};
diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c
new file mode 100644
index 000000000000..98c564b12f3c
--- /dev/null
+++ b/tools/testing/selftests/ublk/stripe.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "kublk.h"
+
+#define NR_STRIPE MAX_BACK_FILES
+
+struct stripe_conf {
+ unsigned nr_files;
+ unsigned shift;
+};
+
+struct stripe {
+ loff_t start;
+ unsigned nr_sects;
+ int seq;
+
+ struct iovec *vec;
+ unsigned nr_vec;
+ unsigned cap;
+};
+
+struct stripe_array {
+ struct stripe s[NR_STRIPE];
+ unsigned nr;
+ struct iovec _vec[];
+};
+
+static inline const struct stripe_conf *get_chunk_shift(const struct ublk_queue *q)
+{
+ return (struct stripe_conf *)q->dev->private_data;
+}
+
+static inline unsigned calculate_nr_vec(const struct stripe_conf *conf,
+ const struct ublksrv_io_desc *iod)
+{
+ const unsigned shift = conf->shift - 9;
+ const unsigned unit_sects = conf->nr_files << shift;
+ loff_t start = iod->start_sector;
+ loff_t end = start + iod->nr_sectors;
+
+ return (end / unit_sects) - (start / unit_sects) + 1;
+}
+
+static struct stripe_array *alloc_stripe_array(const struct stripe_conf *conf,
+ const struct ublksrv_io_desc *iod)
+{
+ unsigned nr_vecs = calculate_nr_vec(conf, iod);
+ unsigned total = nr_vecs * conf->nr_files;
+ struct stripe_array *s;
+ int i;
+
+ s = malloc(sizeof(*s) + total * sizeof(struct iovec));
+
+ s->nr = 0;
+ for (i = 0; i < conf->nr_files; i++) {
+ struct stripe *t = &s->s[i];
+
+ t->nr_vec = 0;
+ t->vec = &s->_vec[i * nr_vecs];
+ t->nr_sects = 0;
+ t->cap = nr_vecs;
+ }
+
+ return s;
+}
+
+static void free_stripe_array(struct stripe_array *s)
+{
+ free(s);
+}
+
+static void calculate_stripe_array(const struct stripe_conf *conf,
+ const struct ublksrv_io_desc *iod, struct stripe_array *s)
+{
+ const unsigned shift = conf->shift - 9;
+ const unsigned chunk_sects = 1 << shift;
+ const unsigned unit_sects = conf->nr_files << shift;
+ off64_t start = iod->start_sector;
+ off64_t end = start + iod->nr_sectors;
+ unsigned long done = 0;
+ unsigned idx = 0;
+
+ while (start < end) {
+ unsigned nr_sects = chunk_sects - (start & (chunk_sects - 1));
+ loff_t unit_off = (start / unit_sects) * unit_sects;
+ unsigned seq = (start - unit_off) >> shift;
+ struct stripe *this = &s->s[idx];
+ loff_t stripe_off = (unit_off / conf->nr_files) +
+ (start & (chunk_sects - 1));
+
+ if (nr_sects > end - start)
+ nr_sects = end - start;
+ if (this->nr_sects == 0) {
+ this->nr_sects = nr_sects;
+ this->start = stripe_off;
+ this->seq = seq;
+ s->nr += 1;
+ } else {
+ assert(seq == this->seq);
+ assert(this->start + this->nr_sects == stripe_off);
+ this->nr_sects += nr_sects;
+ }
+
+ assert(this->nr_vec < this->cap);
+ this->vec[this->nr_vec].iov_base = (void *)(iod->addr + done);
+ this->vec[this->nr_vec++].iov_len = nr_sects << 9;
+
+ start += nr_sects;
+ done += nr_sects << 9;
+ idx = (idx + 1) % conf->nr_files;
+ }
+}
+
+static inline enum io_uring_op stripe_to_uring_op(const struct ublksrv_io_desc *iod)
+{
+ unsigned ublk_op = ublksrv_get_op(iod);
+
+ if (ublk_op == UBLK_IO_OP_READ)
+ return IORING_OP_READV;
+ else if (ublk_op == UBLK_IO_OP_WRITE)
+ return IORING_OP_WRITEV;
+ assert(0);
+}
+
+static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
+{
+ const struct stripe_conf *conf = get_chunk_shift(q);
+ enum io_uring_op op = stripe_to_uring_op(iod);
+ struct io_uring_sqe *sqe[NR_STRIPE];
+ struct stripe_array *s = alloc_stripe_array(conf, iod);
+ struct ublk_io *io = ublk_get_io(q, tag);
+ int i;
+
+ io->private_data = s;
+ calculate_stripe_array(conf, iod, s);
+
+ ublk_queue_alloc_sqes(q, sqe, s->nr);
+ for (i = 0; i < s->nr; i++) {
+ struct stripe *t = &s->s[i];
+
+ io_uring_prep_rw(op, sqe[i],
+ t->seq + 1,
+ (void *)t->vec,
+ t->nr_vec,
+ t->start << 9);
+ io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
+ /* bit63 marks us as tgt io */
+ sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i, 1);
+ }
+ return s->nr;
+}
+
+static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
+{
+ const struct stripe_conf *conf = get_chunk_shift(q);
+ struct io_uring_sqe *sqe[NR_STRIPE];
+ int i;
+
+ ublk_queue_alloc_sqes(q, sqe, conf->nr_files);
+ for (i = 0; i < conf->nr_files; i++) {
+ io_uring_prep_fsync(sqe[i], i + 1, IORING_FSYNC_DATASYNC);
+ io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
+ sqe[i]->user_data = build_user_data(tag, UBLK_IO_OP_FLUSH, 0, 1);
+ }
+ return conf->nr_files;
+}
+
+static int stripe_queue_tgt_io(struct ublk_queue *q, int tag)
+{
+ const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
+ unsigned ublk_op = ublksrv_get_op(iod);
+ int ret = 0;
+
+ switch (ublk_op) {
+ case UBLK_IO_OP_FLUSH:
+ ret = handle_flush(q, iod, tag);
+ break;
+ case UBLK_IO_OP_WRITE_ZEROES:
+ case UBLK_IO_OP_DISCARD:
+ ret = -ENOTSUP;
+ break;
+ case UBLK_IO_OP_READ:
+ case UBLK_IO_OP_WRITE:
+ ret = stripe_queue_tgt_rw_io(q, iod, tag);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u ret %d\n", __func__, tag,
+ iod->op_flags, iod->start_sector, iod->nr_sectors << 9, ret);
+ return ret;
+}
+
+static int ublk_stripe_queue_io(struct ublk_queue *q, int tag)
+{
+ int queued = stripe_queue_tgt_io(q, tag);
+
+ ublk_queued_tgt_io(q, tag, queued);
+ return 0;
+}
+
+static void ublk_stripe_io_done(struct ublk_queue *q, int tag,
+ const struct io_uring_cqe *cqe)
+{
+ const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
+ unsigned op = user_data_to_op(cqe->user_data);
+ struct ublk_io *io = ublk_get_io(q, tag);
+ int res = cqe->res;
+
+ if (res < 0) {
+ if (!io->result)
+ io->result = res;
+ ublk_err("%s: io failure %d tag %u\n", __func__, res, tag);
+ }
+
+ /* fail short READ/WRITE simply */
+ if (op == UBLK_IO_OP_READ || op == UBLK_IO_OP_WRITE) {
+ unsigned seq = user_data_to_tgt_data(cqe->user_data);
+ struct stripe_array *s = io->private_data;
+
+ if (res < s->s[seq].vec->iov_len)
+ io->result = -EIO;
+ }
+
+ if (ublk_completed_tgt_io(q, tag)) {
+ int res = io->result;
+
+ if (!res)
+ res = iod->nr_sectors << 9;
+
+ ublk_complete_io(q, tag, res);
+
+ free_stripe_array(io->private_data);
+ io->private_data = NULL;
+ }
+}
+
+static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
+{
+ struct ublk_params p = {
+ .types = UBLK_PARAM_TYPE_BASIC,
+ .basic = {
+ .attrs = UBLK_ATTR_VOLATILE_CACHE,
+ .logical_bs_shift = 9,
+ .physical_bs_shift = 12,
+ .io_opt_shift = 12,
+ .io_min_shift = 9,
+ .max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
+ },
+ };
+ unsigned chunk_size = ctx->chunk_size;
+ struct stripe_conf *conf;
+ unsigned chunk_shift;
+ loff_t bytes = 0;
+ int ret, i;
+
+ if ((chunk_size & (chunk_size - 1)) || !chunk_size) {
+ ublk_err("invalid chunk size %u\n", chunk_size);
+ return -EINVAL;
+ }
+
+ if (chunk_size < 4096 || chunk_size > 512 * 1024) {
+ ublk_err("invalid chunk size %u\n", chunk_size);
+ return -EINVAL;
+ }
+
+ chunk_shift = ilog2(chunk_size);
+
+ ret = backing_file_tgt_init(dev);
+ if (ret)
+ return ret;
+
+ if (!dev->tgt.nr_backing_files || dev->tgt.nr_backing_files > NR_STRIPE)
+ return -EINVAL;
+
+ assert(dev->nr_fds == dev->tgt.nr_backing_files + 1);
+
+ for (i = 0; i < dev->tgt.nr_backing_files; i++)
+ dev->tgt.backing_file_size[i] &= ~((1 << chunk_shift) - 1);
+
+ for (i = 0; i < dev->tgt.nr_backing_files; i++) {
+ unsigned long size = dev->tgt.backing_file_size[i];
+
+ if (size != dev->tgt.backing_file_size[0])
+ return -EINVAL;
+ bytes += size;
+ }
+
+ conf = malloc(sizeof(*conf));
+ conf->shift = chunk_shift;
+ conf->nr_files = dev->tgt.nr_backing_files;
+
+ dev->private_data = conf;
+ dev->tgt.dev_size = bytes;
+ p.basic.dev_sectors = bytes >> 9;
+ dev->tgt.params = p;
+ dev->tgt.sq_depth = dev->dev_info.queue_depth * conf->nr_files;
+ dev->tgt.cq_depth = dev->dev_info.queue_depth * conf->nr_files;
+
+ printf("%s: shift %u files %u\n", __func__, conf->shift, conf->nr_files);
+
+ return 0;
+}
+
+static void ublk_stripe_tgt_deinit(struct ublk_dev *dev)
+{
+ free(dev->private_data);
+ backing_file_tgt_deinit(dev);
+}
+
+const struct ublk_tgt_ops stripe_tgt_ops = {
+ .name = "stripe",
+ .init_tgt = ublk_stripe_tgt_init,
+ .deinit_tgt = ublk_stripe_tgt_deinit,
+ .queue_io = ublk_stripe_queue_io,
+ .tgt_io_done = ublk_stripe_io_done,
+};
diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh
new file mode 100755
index 000000000000..75f54ac6b1c4
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_common.sh
@@ -0,0 +1,246 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+UBLK_SKIP_CODE=4
+
+_have_program() {
+ if command -v "$1" >/dev/null 2>&1; then
+ return 0
+ fi
+ return 1
+}
+
+_get_disk_dev_t() {
+ local dev_id=$1
+ local dev
+ local major
+ local minor
+
+ dev=/dev/ublkb"${dev_id}"
+ major=$(stat -c '%Hr' "$dev")
+ minor=$(stat -c '%Lr' "$dev")
+
+ echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) ))
+}
+
+_create_backfile() {
+ local my_size=$1
+ local my_file
+
+ my_file=$(mktemp ublk_file_"${my_size}"_XXXXX)
+ truncate -s "${my_size}" "${my_file}"
+ echo "$my_file"
+}
+
+_remove_backfile() {
+ local file=$1
+
+ [ -f "$file" ] && rm -f "$file"
+}
+
+_create_tmp_dir() {
+ local my_file;
+
+ my_file=$(mktemp -d ublk_dir_XXXXX)
+ echo "$my_file"
+}
+
+_remove_tmp_dir() {
+ local dir=$1
+
+ [ -d "$dir" ] && rmdir "$dir"
+}
+
+_mkfs_mount_test()
+{
+ local dev=$1
+ local err_code=0
+ local mnt_dir;
+
+ mnt_dir=$(_create_tmp_dir)
+ mkfs.ext4 -F "$dev" > /dev/null 2>&1
+ err_code=$?
+ if [ $err_code -ne 0 ]; then
+ return $err_code
+ fi
+
+ mount -t ext4 "$dev" "$mnt_dir" > /dev/null 2>&1
+ umount "$dev"
+ err_code=$?
+ _remove_tmp_dir "$mnt_dir"
+ if [ $err_code -ne 0 ]; then
+ return $err_code
+ fi
+}
+
+_check_root() {
+ local ksft_skip=4
+
+ if [ $UID != 0 ]; then
+ echo please run this as root >&2
+ exit $ksft_skip
+ fi
+}
+
+_remove_ublk_devices() {
+ ${UBLK_PROG} del -a
+ modprobe -r ublk_drv > /dev/null 2>&1
+}
+
+_get_ublk_dev_state() {
+ ${UBLK_PROG} list -n "$1" | grep "state" | awk '{print $11}'
+}
+
+_get_ublk_daemon_pid() {
+ ${UBLK_PROG} list -n "$1" | grep "pid" | awk '{print $7}'
+}
+
+_prep_test() {
+ _check_root
+ local type=$1
+ shift 1
+ modprobe ublk_drv > /dev/null 2>&1
+ [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "ublk $type: $*"
+}
+
+_remove_test_files()
+{
+ local files=$*
+
+ for file in ${files}; do
+ [ -f "${file}" ] && rm -f "${file}"
+ done
+}
+
+_show_result()
+{
+ if [ "$UBLK_TEST_SHOW_RESULT" -ne 0 ]; then
+ if [ "$2" -eq 0 ]; then
+ echo "$1 : [PASS]"
+ elif [ "$2" -eq 4 ]; then
+ echo "$1 : [SKIP]"
+ else
+ echo "$1 : [FAIL]"
+ fi
+ fi
+ [ "$2" -ne 0 ] && exit "$2"
+ return 0
+}
+
+# don't call from sub-shell, otherwise can't exit
+_check_add_dev()
+{
+ local tid=$1
+ local code=$2
+ shift 2
+ if [ "${code}" -ne 0 ]; then
+ _remove_test_files "$@"
+ _show_result "${tid}" "${code}"
+ fi
+}
+
+_cleanup_test() {
+ "${UBLK_PROG}" del -a
+ rm -f "$UBLK_TMP"
+}
+
+_have_feature()
+{
+ if $UBLK_PROG "features" | grep "$1" > /dev/null 2>&1; then
+ return 0
+ fi
+ return 1
+}
+
+_add_ublk_dev() {
+ local kublk_temp;
+ local dev_id;
+
+ if [ ! -c /dev/ublk-control ]; then
+ return ${UBLK_SKIP_CODE}
+ fi
+ if echo "$@" | grep -q "\-z"; then
+ if ! _have_feature "ZERO_COPY"; then
+ return ${UBLK_SKIP_CODE}
+ fi
+ fi
+
+ kublk_temp=$(mktemp /tmp/kublk-XXXXXX)
+ if ! "${UBLK_PROG}" add "$@" > "${kublk_temp}" 2>&1; then
+ echo "fail to add ublk dev $*"
+ rm -f "${kublk_temp}"
+ return 255
+ fi
+
+ dev_id=$(grep "dev id" "${kublk_temp}" | awk -F '[ :]' '{print $3}')
+ udevadm settle
+ rm -f "${kublk_temp}"
+ echo "${dev_id}"
+}
+
+# kill the ublk daemon and return ublk device state
+__ublk_kill_daemon()
+{
+ local dev_id=$1
+ local exp_state=$2
+ local daemon_pid
+ local state
+
+ daemon_pid=$(_get_ublk_daemon_pid "${dev_id}")
+ state=$(_get_ublk_dev_state "${dev_id}")
+
+ for ((j=0;j<50;j++)); do
+ [ "$state" == "$exp_state" ] && break
+ kill -9 "$daemon_pid" > /dev/null 2>&1
+ sleep 1
+ state=$(_get_ublk_dev_state "${dev_id}")
+ done
+ echo "$state"
+}
+
+__remove_ublk_dev_return() {
+ local dev_id=$1
+
+ ${UBLK_PROG} del -n "${dev_id}"
+ local res=$?
+ udevadm settle
+ return ${res}
+}
+
+__run_io_and_remove()
+{
+ local dev_id=$1
+ local size=$2
+ local kill_server=$3
+
+ fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
+ --rw=readwrite --iodepth=64 --size="${size}" --numjobs=4 \
+ --runtime=20 --time_based > /dev/null 2>&1 &
+ sleep 2
+ if [ "${kill_server}" = "yes" ]; then
+ local state
+ state=$(__ublk_kill_daemon "${dev_id}" "DEAD")
+ if [ "$state" != "DEAD" ]; then
+ echo "device isn't dead($state) after killing daemon"
+ return 255
+ fi
+ fi
+ if ! __remove_ublk_dev_return "${dev_id}"; then
+ echo "delete dev ${dev_id} failed"
+ return 255
+ fi
+ wait
+}
+
+_ublk_test_top_dir()
+{
+ cd "$(dirname "$0")" && pwd
+}
+
+UBLK_TMP=$(mktemp ublk_test_XXXXX)
+UBLK_PROG=$(_ublk_test_top_dir)/kublk
+UBLK_TEST_QUIET=1
+UBLK_TEST_SHOW_RESULT=1
+export UBLK_PROG
+export UBLK_TEST_QUIET
+export UBLK_TEST_SHOW_RESULT
diff --git a/tools/testing/selftests/ublk/test_generic_01.sh b/tools/testing/selftests/ublk/test_generic_01.sh
new file mode 100755
index 000000000000..9227a208ba53
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_01.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_01"
+ERR_CODE=0
+
+if ! _have_program bpftrace; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "null" "sequential io order"
+
+dev_id=$(_add_ublk_dev -t null)
+_check_add_dev $TID $?
+
+dev_t=$(_get_disk_dev_t "$dev_id")
+bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 &
+btrace_pid=$!
+sleep 2
+
+if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then
+ _cleanup_test "null"
+ exit "$UBLK_SKIP_CODE"
+fi
+
+# run fio over this ublk disk
+fio --name=write_seq \
+ --filename=/dev/ublkb"${dev_id}" \
+ --ioengine=libaio --iodepth=16 \
+ --rw=write \
+ --size=512M \
+ --direct=1 \
+ --bs=4k > /dev/null 2>&1
+ERR_CODE=$?
+kill "$btrace_pid"
+wait
+if grep -q "io_out_of_order" "$UBLK_TMP"; then
+ cat "$UBLK_TMP"
+ ERR_CODE=255
+fi
+_cleanup_test "null"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_01.sh b/tools/testing/selftests/ublk/test_loop_01.sh
new file mode 100755
index 000000000000..c882d2a08e13
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_loop_01.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="loop_01"
+ERR_CODE=0
+
+_prep_test "loop" "write and verify test"
+
+backfile_0=$(_create_backfile 256M)
+
+dev_id=$(_add_ublk_dev -t loop "$backfile_0")
+_check_add_dev $TID $? "${backfile_0}"
+
+# run fio over the ublk disk
+fio --name=write_and_verify \
+ --filename=/dev/ublkb"${dev_id}" \
+ --ioengine=libaio --iodepth=16 \
+ --rw=write \
+ --size=256M \
+ --direct=1 \
+ --verify=crc32c \
+ --do_verify=1 \
+ --bs=4k > /dev/null 2>&1
+ERR_CODE=$?
+
+_cleanup_test "loop"
+
+_remove_backfile "$backfile_0"
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_02.sh b/tools/testing/selftests/ublk/test_loop_02.sh
new file mode 100755
index 000000000000..03863d825e07
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_loop_02.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="loop_02"
+ERR_CODE=0
+
+_prep_test "loop" "mkfs & mount & umount"
+
+backfile_0=$(_create_backfile 256M)
+dev_id=$(_add_ublk_dev -t loop "$backfile_0")
+_check_add_dev $TID $? "$backfile_0"
+
+_mkfs_mount_test /dev/ublkb"${dev_id}"
+ERR_CODE=$?
+
+_cleanup_test "loop"
+
+_remove_backfile "$backfile_0"
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_03.sh b/tools/testing/selftests/ublk/test_loop_03.sh
new file mode 100755
index 000000000000..269c96787d7d
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_loop_03.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="loop_03"
+ERR_CODE=0
+
+_prep_test "loop" "write and verify over zero copy"
+
+backfile_0=$(_create_backfile 256M)
+dev_id=$(_add_ublk_dev -t loop -z "$backfile_0")
+_check_add_dev $TID $? "$backfile_0"
+
+# run fio over the ublk disk
+fio --name=write_and_verify \
+ --filename=/dev/ublkb"${dev_id}" \
+ --ioengine=libaio --iodepth=64 \
+ --rw=write \
+ --size=256M \
+ --direct=1 \
+ --verify=crc32c \
+ --do_verify=1 \
+ --bs=4k > /dev/null 2>&1
+ERR_CODE=$?
+
+_cleanup_test "loop"
+
+_remove_backfile "$backfile_0"
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_04.sh b/tools/testing/selftests/ublk/test_loop_04.sh
new file mode 100755
index 000000000000..1435422c38ec
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_loop_04.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="loop_04"
+ERR_CODE=0
+
+_prep_test "loop" "mkfs & mount & umount with zero copy"
+
+backfile_0=$(_create_backfile 256M)
+dev_id=$(_add_ublk_dev -t loop -z "$backfile_0")
+_check_add_dev $TID $? "$backfile_0"
+
+_mkfs_mount_test /dev/ublkb"${dev_id}"
+ERR_CODE=$?
+
+_cleanup_test "loop"
+
+_remove_backfile "$backfile_0"
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_null_01.sh b/tools/testing/selftests/ublk/test_null_01.sh
new file mode 100755
index 000000000000..a34203f72668
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_null_01.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="null_01"
+ERR_CODE=0
+
+_prep_test "null" "basic IO test"
+
+dev_id=$(_add_ublk_dev -t null)
+_check_add_dev $TID $?
+
+# run fio over the two disks
+fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1
+ERR_CODE=$?
+
+_cleanup_test "null"
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_null_02.sh b/tools/testing/selftests/ublk/test_null_02.sh
new file mode 100755
index 000000000000..5633ca876655
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_null_02.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="null_02"
+ERR_CODE=0
+
+_prep_test "null" "basic IO test with zero copy"
+
+dev_id=$(_add_ublk_dev -t null -z)
+_check_add_dev $TID $?
+
+# run fio over the two disks
+fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1
+ERR_CODE=$?
+
+_cleanup_test "null"
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh
new file mode 100755
index 000000000000..7177f6c57bc5
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stress_01.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+TID="stress_01"
+ERR_CODE=0
+DEV_ID=-1
+
+ublk_io_and_remove()
+{
+ local size=$1
+ shift 1
+ local backfile=""
+ if echo "$@" | grep -q "loop"; then
+ backfile=${*: -1}
+ fi
+ DEV_ID=$(_add_ublk_dev "$@")
+ _check_add_dev $TID $? "${backfile}"
+
+ [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)"
+ if ! __run_io_and_remove "${DEV_ID}" "${size}" "no"; then
+ echo "/dev/ublkc${DEV_ID} isn't removed"
+ _remove_backfile "${backfile}"
+ exit 255
+ fi
+}
+
+_prep_test "stress" "run IO and remove device"
+
+ublk_io_and_remove 8G -t null
+ERR_CODE=$?
+if [ ${ERR_CODE} -ne 0 ]; then
+ _show_result $TID $ERR_CODE
+fi
+
+BACK_FILE=$(_create_backfile 256M)
+ublk_io_and_remove 256M -t loop "${BACK_FILE}"
+ERR_CODE=$?
+if [ ${ERR_CODE} -ne 0 ]; then
+ _show_result $TID $ERR_CODE
+fi
+
+ublk_io_and_remove 256M -t loop -z "${BACK_FILE}"
+ERR_CODE=$?
+_cleanup_test "stress"
+_remove_backfile "${BACK_FILE}"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh
new file mode 100755
index 000000000000..2a8e60579a06
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stress_02.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+TID="stress_02"
+ERR_CODE=0
+DEV_ID=-1
+
+ublk_io_and_kill_daemon()
+{
+ local size=$1
+ shift 1
+ local backfile=""
+ if echo "$@" | grep -q "loop"; then
+ backfile=${*: -1}
+ fi
+ DEV_ID=$(_add_ublk_dev "$@")
+ _check_add_dev $TID $? "${backfile}"
+
+ [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)"
+ if ! __run_io_and_remove "${DEV_ID}" "${size}" "yes"; then
+ echo "/dev/ublkc${DEV_ID} isn't removed res ${res}"
+ _remove_backfile "${backfile}"
+ exit 255
+ fi
+}
+
+_prep_test "stress" "run IO and kill ublk server"
+
+ublk_io_and_kill_daemon 8G -t null
+ERR_CODE=$?
+if [ ${ERR_CODE} -ne 0 ]; then
+ _show_result $TID $ERR_CODE
+fi
+
+BACK_FILE=$(_create_backfile 256M)
+ublk_io_and_kill_daemon 256M -t loop "${BACK_FILE}"
+ERR_CODE=$?
+if [ ${ERR_CODE} -ne 0 ]; then
+ _show_result $TID $ERR_CODE
+fi
+
+ublk_io_and_kill_daemon 256M -t loop -z "${BACK_FILE}"
+ERR_CODE=$?
+_cleanup_test "stress"
+_remove_backfile "${BACK_FILE}"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_01.sh b/tools/testing/selftests/ublk/test_stripe_01.sh
new file mode 100755
index 000000000000..c01f3dc325ab
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stripe_01.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="stripe_01"
+ERR_CODE=0
+
+_prep_test "stripe" "write and verify test"
+
+backfile_0=$(_create_backfile 256M)
+backfile_1=$(_create_backfile 256M)
+
+dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1")
+_check_add_dev $TID $? "${backfile_0}"
+
+# run fio over the ublk disk
+fio --name=write_and_verify \
+ --filename=/dev/ublkb"${dev_id}" \
+ --ioengine=libaio --iodepth=32 \
+ --rw=write \
+ --size=512M \
+ --direct=1 \
+ --verify=crc32c \
+ --do_verify=1 \
+ --bs=4k > /dev/null 2>&1
+ERR_CODE=$?
+
+_cleanup_test "stripe"
+
+_remove_backfile "$backfile_0"
+_remove_backfile "$backfile_1"
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_02.sh b/tools/testing/selftests/ublk/test_stripe_02.sh
new file mode 100755
index 000000000000..e8a45fa82dde
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stripe_02.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="stripe_02"
+ERR_CODE=0
+
+_prep_test "stripe" "mkfs & mount & umount"
+
+backfile_0=$(_create_backfile 256M)
+backfile_1=$(_create_backfile 256M)
+dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1")
+_check_add_dev $TID $? "$backfile_0" "$backfile_1"
+
+_mkfs_mount_test /dev/ublkb"${dev_id}"
+ERR_CODE=$?
+
+_cleanup_test "stripe"
+
+_remove_backfile "$backfile_0"
+_remove_backfile "$backfile_1"
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/trace/seq_io.bt b/tools/testing/selftests/ublk/trace/seq_io.bt
new file mode 100644
index 000000000000..272ac54c9d5f
--- /dev/null
+++ b/tools/testing/selftests/ublk/trace/seq_io.bt
@@ -0,0 +1,25 @@
+/*
+ $1: dev_t
+ $2: RWBS
+ $3: strlen($2)
+*/
+BEGIN {
+ @last_rw[$1, str($2)] = 0;
+}
+tracepoint:block:block_rq_complete
+{
+ $dev = $1;
+ if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) {
+ $last = @last_rw[$dev, str($2)];
+ if ((uint64)args.sector != $last) {
+ printf("io_out_of_order: exp %llu actual %llu\n",
+ args.sector, $last);
+ }
+ @last_rw[$dev, str($2)] = (args.sector + args.nr_sector);
+ }
+ @ios = count();
+}
+
+END {
+ clear(@last_rw);
+}
diff --git a/tools/testing/selftests/ublk/ublk_dep.h b/tools/testing/selftests/ublk/ublk_dep.h
new file mode 100644
index 000000000000..f68fa7eac939
--- /dev/null
+++ b/tools/testing/selftests/ublk/ublk_dep.h
@@ -0,0 +1,18 @@
+#ifndef UBLK_DEP_H
+#define UBLK_DEP_H
+
+#ifndef UBLK_U_IO_REGISTER_IO_BUF
+#define UBLK_U_IO_REGISTER_IO_BUF \
+ _IOWR('u', 0x23, struct ublksrv_io_cmd)
+#define UBLK_U_IO_UNREGISTER_IO_BUF \
+ _IOWR('u', 0x24, struct ublksrv_io_cmd)
+#endif
+
+#ifndef UBLK_F_USER_RECOVERY_FAIL_IO
+#define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9)
+#endif
+
+#ifndef UBLK_F_ZONED
+#define UBLK_F_ZONED (1ULL << 8)
+#endif
+#endif
diff --git a/tools/testing/selftests/user_events/dyn_test.c b/tools/testing/selftests/user_events/dyn_test.c
index bdf9ab127488..54c9412f8dee 100644
--- a/tools/testing/selftests/user_events/dyn_test.c
+++ b/tools/testing/selftests/user_events/dyn_test.c
@@ -127,6 +127,8 @@ static int parse_abi(int *check, const char *value)
close(fd);
+ wait_for_delete();
+
return ret;
}
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 1cf14a8da438..12a0614b9fd4 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -19,13 +19,20 @@ LDLIBS += -lgcc_s
endif
include ../lib.mk
+
+CFLAGS += $(TOOLS_INCLUDES)
+
+CFLAGS_NOLIBC := -nostdlib -nostdinc -ffreestanding -fno-asynchronous-unwind-tables \
+ -fno-stack-protector -include $(top_srcdir)/tools/include/nolibc/nolibc.h \
+ -I$(top_srcdir)/tools/include/nolibc/ $(KHDR_INCLUDES)
+
$(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
$(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
$(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c
$(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c
-$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
-$(OUTPUT)/vdso_standalone_test_x86: CFLAGS +=-nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
+$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c | headers
+$(OUTPUT)/vdso_standalone_test_x86: CFLAGS:=$(CFLAGS_NOLIBC) $(CFLAGS)
$(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c
$(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl
diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c
index 2fe5e983cb22..3ff00fb624a4 100644
--- a/tools/testing/selftests/vDSO/parse_vdso.c
+++ b/tools/testing/selftests/vDSO/parse_vdso.c
@@ -19,13 +19,14 @@
#include <stdint.h>
#include <string.h>
#include <limits.h>
-#include <elf.h>
+#include <linux/auxvec.h>
+#include <linux/elf.h>
#include "parse_vdso.h"
/* And here's the code. */
#ifndef ELF_BITS
-# if ULONG_MAX > 0xffffffffUL
+# if __SIZEOF_LONG__ >= 8
# define ELF_BITS 64
# else
# define ELF_BITS 32
@@ -53,7 +54,7 @@ static struct vdso_info
/* Symbol table */
ELF(Sym) *symtab;
const char *symstrings;
- ELF(Word) *gnu_hash;
+ ELF(Word) *gnu_hash, *gnu_bucket;
ELF_HASH_ENTRY *bucket, *chain;
ELF_HASH_ENTRY nbucket, nchain;
@@ -185,8 +186,8 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base)
/* The bucket array is located after the header (4 uint32) and the bloom
* filter (size_t array of gnu_hash[2] elements).
*/
- vdso_info.bucket = vdso_info.gnu_hash + 4 +
- sizeof(size_t) / 4 * vdso_info.gnu_hash[2];
+ vdso_info.gnu_bucket = vdso_info.gnu_hash + 4 +
+ sizeof(size_t) / 4 * vdso_info.gnu_hash[2];
} else {
vdso_info.nbucket = hash[0];
vdso_info.nchain = hash[1];
@@ -268,11 +269,11 @@ void *vdso_sym(const char *version, const char *name)
if (vdso_info.gnu_hash) {
uint32_t h1 = gnu_hash(name), h2, *hashval;
- i = vdso_info.bucket[h1 % vdso_info.nbucket];
+ i = vdso_info.gnu_bucket[h1 % vdso_info.nbucket];
if (i == 0)
return 0;
h1 |= 1;
- hashval = vdso_info.bucket + vdso_info.nbucket +
+ hashval = vdso_info.gnu_bucket + vdso_info.nbucket +
(i - vdso_info.gnu_hash[1]);
for (;; i++) {
ELF(Sym) *sym = &vdso_info.symtab[i];
@@ -297,17 +298,3 @@ void *vdso_sym(const char *version, const char *name)
return 0;
}
-
-void vdso_init_from_auxv(void *auxv)
-{
- ELF(auxv_t) *elf_auxv = auxv;
- for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++)
- {
- if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) {
- vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val);
- return;
- }
- }
-
- vdso_info.valid = false;
-}
diff --git a/tools/testing/selftests/vDSO/parse_vdso.h b/tools/testing/selftests/vDSO/parse_vdso.h
index de0453067d7c..09d068ed11f9 100644
--- a/tools/testing/selftests/vDSO/parse_vdso.h
+++ b/tools/testing/selftests/vDSO/parse_vdso.h
@@ -26,6 +26,5 @@
*/
void *vdso_sym(const char *version, const char *name);
void vdso_init_from_sysinfo_ehdr(uintptr_t base);
-void vdso_init_from_auxv(void *auxv);
#endif
diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
index 644915862af8..9ce795b806f0 100644
--- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
+++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
@@ -1,142 +1,58 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * vdso_test.c: Sample code to test parse_vdso.c on x86
- * Copyright (c) 2011-2014 Andy Lutomirski
+ * vdso_test_gettimeofday.c: Sample code to test parse_vdso.c and
+ * vDSO gettimeofday()
+ * Copyright (c) 2014 Andy Lutomirski
*
- * You can amuse yourself by compiling with:
- * gcc -std=gnu99 -nostdlib
- * -Os -fno-asynchronous-unwind-tables -flto -lgcc_s
- * vdso_standalone_test_x86.c parse_vdso.c
- * to generate a small binary. On x86_64, you can omit -lgcc_s
- * if you want the binary to be completely standalone.
+ * Compile with:
+ * gcc -std=gnu99 vdso_test_gettimeofday.c parse_vdso_gettimeofday.c
+ *
+ * Tested on x86, 32-bit and 64-bit. It may work on other architectures, too.
*/
-#include <sys/syscall.h>
+#include <stdio.h>
+#ifndef NOLIBC
+#include <sys/auxv.h>
#include <sys/time.h>
-#include <unistd.h>
-#include <stdint.h>
-
-#include "parse_vdso.h"
-
-/* We need some libc functions... */
-int strcmp(const char *a, const char *b)
-{
- /* This implementation is buggy: it never returns -1. */
- while (*a || *b) {
- if (*a != *b)
- return 1;
- if (*a == 0 || *b == 0)
- return 1;
- a++;
- b++;
- }
-
- return 0;
-}
-
-/*
- * The clang build needs this, although gcc does not.
- * Stolen from lib/string.c.
- */
-void *memcpy(void *dest, const void *src, size_t count)
-{
- char *tmp = dest;
- const char *s = src;
-
- while (count--)
- *tmp++ = *s++;
- return dest;
-}
-
-/* ...and two syscalls. This is x86-specific. */
-static inline long x86_syscall3(long nr, long a0, long a1, long a2)
-{
- long ret;
-#ifdef __x86_64__
- asm volatile ("syscall" : "=a" (ret) : "a" (nr),
- "D" (a0), "S" (a1), "d" (a2) :
- "cc", "memory", "rcx",
- "r8", "r9", "r10", "r11" );
-#else
- asm volatile ("int $0x80" : "=a" (ret) : "a" (nr),
- "b" (a0), "c" (a1), "d" (a2) :
- "cc", "memory" );
#endif
- return ret;
-}
-static inline long linux_write(int fd, const void *data, size_t len)
-{
- return x86_syscall3(__NR_write, fd, (long)data, (long)len);
-}
+#include "../kselftest.h"
+#include "parse_vdso.h"
+#include "vdso_config.h"
+#include "vdso_call.h"
-static inline void linux_exit(int code)
+int main(int argc, char **argv)
{
- x86_syscall3(__NR_exit, code, 0, 0);
-}
+ const char *version = versions[VDSO_VERSION];
+ const char **name = (const char **)&names[VDSO_NAMES];
-void to_base10(char *lastdig, time_t n)
-{
- while (n) {
- *lastdig = (n % 10) + '0';
- n /= 10;
- lastdig--;
+ unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
+ if (!sysinfo_ehdr) {
+ printf("AT_SYSINFO_EHDR is not present!\n");
+ return KSFT_SKIP;
}
-}
-
-void c_main(void **stack)
-{
- /* Parse the stack */
- long argc = (long)*stack;
- stack += argc + 2;
-
- /* Now we're pointing at the environment. Skip it. */
- while(*stack)
- stack++;
- stack++;
- /* Now we're pointing at auxv. Initialize the vDSO parser. */
- vdso_init_from_auxv((void *)stack);
+ vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
/* Find gettimeofday. */
typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
- gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+ gtod_t gtod = (gtod_t)vdso_sym(version, name[0]);
- if (!gtod)
- linux_exit(1);
+ if (!gtod) {
+ printf("Could not find %s\n", name[0]);
+ return KSFT_SKIP;
+ }
struct timeval tv;
- long ret = gtod(&tv, 0);
+ long ret = VDSO_CALL(gtod, 2, &tv, 0);
if (ret == 0) {
- char buf[] = "The time is .000000\n";
- to_base10(buf + 31, tv.tv_sec);
- to_base10(buf + 38, tv.tv_usec);
- linux_write(1, buf, sizeof(buf) - 1);
+ printf("The time is %lld.%06lld\n",
+ (long long)tv.tv_sec, (long long)tv.tv_usec);
} else {
- linux_exit(ret);
+ printf("%s failed\n", name[0]);
+ return KSFT_FAIL;
}
- linux_exit(0);
+ return 0;
}
-
-/*
- * This is the real entry point. It passes the initial stack into
- * the C entry point.
- */
-asm (
- ".text\n"
- ".global _start\n"
- ".type _start,@function\n"
- "_start:\n\t"
-#ifdef __x86_64__
- "mov %rsp,%rdi\n\t"
- "and $-16,%rsp\n\t"
- "sub $8,%rsp\n\t"
- "jmp c_main"
-#else
- "push %esp\n\t"
- "call c_main\n\t"
- "int $3"
-#endif
- );
diff --git a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
index e31b18ffae33..9ce795b806f0 100644
--- a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
+++ b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
@@ -10,11 +10,11 @@
* Tested on x86, 32-bit and 64-bit. It may work on other architectures, too.
*/
-#include <stdint.h>
-#include <elf.h>
#include <stdio.h>
+#ifndef NOLIBC
#include <sys/auxv.h>
#include <sys/time.h>
+#endif
#include "../kselftest.h"
#include "parse_vdso.h"
diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
index 139fd9aa8b12..c305d2f613f0 100644
--- a/tools/testing/selftests/wireguard/qemu/debug.config
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -27,7 +27,6 @@ CONFIG_DEBUG_KMEMLEAK=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_SHIRQ=y
CONFIG_WQ_WATCHDOG=y
-CONFIG_SCHED_DEBUG=y
CONFIG_SCHED_INFO=y
CONFIG_SCHEDSTATS=y
CONFIG_SCHED_STACK_END_CHECK=y
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index d51249f14e2f..28422c32cc8f 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -19,7 +19,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
- corrupt_xstate_header amx lam test_shadow_stack
+ corrupt_xstate_header amx lam test_shadow_stack avx
# Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
@@ -132,3 +132,7 @@ $(OUTPUT)/check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static
$(OUTPUT)/nx_stack_32: CFLAGS += -Wl,-z,noexecstack
$(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack
+
+$(OUTPUT)/avx_64: CFLAGS += -mno-avx -mno-avx512f
+$(OUTPUT)/amx_64: EXTRA_FILES += xstate.c
+$(OUTPUT)/avx_64: EXTRA_FILES += xstate.c
diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c
index 1fdf35a4d7f6..40769c16de1b 100644
--- a/tools/testing/selftests/x86/amx.c
+++ b/tools/testing/selftests/x86/amx.c
@@ -3,7 +3,6 @@
#define _GNU_SOURCE
#include <err.h>
#include <errno.h>
-#include <pthread.h>
#include <setjmp.h>
#include <stdio.h>
#include <string.h>
@@ -14,169 +13,27 @@
#include <sys/auxv.h>
#include <sys/mman.h>
#include <sys/shm.h>
-#include <sys/ptrace.h>
#include <sys/syscall.h>
#include <sys/wait.h>
-#include <sys/uio.h>
-#include "../kselftest.h" /* For __cpuid_count() */
+#include "helpers.h"
+#include "xstate.h"
#ifndef __x86_64__
# error This test is 64-bit only
#endif
-#define XSAVE_HDR_OFFSET 512
-#define XSAVE_HDR_SIZE 64
-
-struct xsave_buffer {
- union {
- struct {
- char legacy[XSAVE_HDR_OFFSET];
- char header[XSAVE_HDR_SIZE];
- char extended[0];
- };
- char bytes[0];
- };
-};
-
-static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm)
-{
- uint32_t rfbm_lo = rfbm;
- uint32_t rfbm_hi = rfbm >> 32;
-
- asm volatile("xsave (%%rdi)"
- : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)
- : "memory");
-}
-
-static inline void xrstor(struct xsave_buffer *xbuf, uint64_t rfbm)
-{
- uint32_t rfbm_lo = rfbm;
- uint32_t rfbm_hi = rfbm >> 32;
-
- asm volatile("xrstor (%%rdi)"
- : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi));
-}
-
/* err() exits and will not return */
#define fatal_error(msg, ...) err(1, "[FAIL]\t" msg, ##__VA_ARGS__)
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
-
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- fatal_error("sigaction");
-}
-
-static void clearhandler(int sig)
-{
- struct sigaction sa;
-
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- fatal_error("sigaction");
-}
-
-#define XFEATURE_XTILECFG 17
-#define XFEATURE_XTILEDATA 18
#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG)
#define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA)
#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA)
-#define CPUID_LEAF1_ECX_XSAVE_MASK (1 << 26)
-#define CPUID_LEAF1_ECX_OSXSAVE_MASK (1 << 27)
-
-static uint32_t xbuf_size;
-
-static struct {
- uint32_t xbuf_offset;
- uint32_t size;
-} xtiledata;
-
-#define CPUID_LEAF_XSTATE 0xd
-#define CPUID_SUBLEAF_XSTATE_USER 0x0
-#define TILE_CPUID 0x1d
-#define TILE_PALETTE_ID 0x1
-
-static void check_cpuid_xtiledata(void)
-{
- uint32_t eax, ebx, ecx, edx;
-
- __cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER,
- eax, ebx, ecx, edx);
-
- /*
- * EBX enumerates the size (in bytes) required by the XSAVE
- * instruction for an XSAVE area containing all the user state
- * components corresponding to bits currently set in XCR0.
- *
- * Stash that off so it can be used to allocate buffers later.
- */
- xbuf_size = ebx;
-
- __cpuid_count(CPUID_LEAF_XSTATE, XFEATURE_XTILEDATA,
- eax, ebx, ecx, edx);
- /*
- * eax: XTILEDATA state component size
- * ebx: XTILEDATA state component offset in user buffer
- */
- if (!eax || !ebx)
- fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
- eax, ebx);
-
- xtiledata.size = eax;
- xtiledata.xbuf_offset = ebx;
-}
+struct xstate_info xtiledata;
/* The helpers for managing XSAVE buffer and tile states: */
-struct xsave_buffer *alloc_xbuf(void)
-{
- struct xsave_buffer *xbuf;
-
- /* XSAVE buffer should be 64B-aligned. */
- xbuf = aligned_alloc(64, xbuf_size);
- if (!xbuf)
- fatal_error("aligned_alloc()");
- return xbuf;
-}
-
-static inline void clear_xstate_header(struct xsave_buffer *buffer)
-{
- memset(&buffer->header, 0, sizeof(buffer->header));
-}
-
-static inline void set_xstatebv(struct xsave_buffer *buffer, uint64_t bv)
-{
- /* XSTATE_BV is at the beginning of the header: */
- *(uint64_t *)(&buffer->header) = bv;
-}
-
-static void set_rand_tiledata(struct xsave_buffer *xbuf)
-{
- int *ptr = (int *)&xbuf->bytes[xtiledata.xbuf_offset];
- int data;
- int i;
-
- /*
- * Ensure that 'data' is never 0. This ensures that
- * the registers are never in their initial configuration
- * and thus never tracked as being in the init state.
- */
- data = rand() | 1;
-
- for (i = 0; i < xtiledata.size / sizeof(int); i++, ptr++)
- *ptr = data;
-}
-
struct xsave_buffer *stashed_xsave;
static void init_stashed_xsave(void)
@@ -192,21 +49,6 @@ static void free_stashed_xsave(void)
free(stashed_xsave);
}
-/* See 'struct _fpx_sw_bytes' at sigcontext.h */
-#define SW_BYTES_OFFSET 464
-/* N.B. The struct's field name varies so read from the offset. */
-#define SW_BYTES_BV_OFFSET (SW_BYTES_OFFSET + 8)
-
-static inline struct _fpx_sw_bytes *get_fpx_sw_bytes(void *buffer)
-{
- return (struct _fpx_sw_bytes *)(buffer + SW_BYTES_OFFSET);
-}
-
-static inline uint64_t get_fpx_sw_bytes_features(void *buffer)
-{
- return *(uint64_t *)(buffer + SW_BYTES_BV_OFFSET);
-}
-
/* Work around printf() being unsafe in signals: */
#define SIGNAL_BUF_LEN 1000
char signal_message_buffer[SIGNAL_BUF_LEN];
@@ -304,17 +146,10 @@ static inline bool load_rand_tiledata(struct xsave_buffer *xbuf)
{
clear_xstate_header(xbuf);
set_xstatebv(xbuf, XFEATURE_MASK_XTILEDATA);
- set_rand_tiledata(xbuf);
+ set_rand_data(&xtiledata, xbuf);
return xrstor_safe(xbuf, XFEATURE_MASK_XTILEDATA);
}
-/* Return XTILEDATA to its initial configuration. */
-static inline void init_xtiledata(void)
-{
- clear_xstate_header(stashed_xsave);
- xrstor_safe(stashed_xsave, XFEATURE_MASK_XTILEDATA);
-}
-
enum expected_result { FAIL_EXPECTED, SUCCESS_EXPECTED };
/* arch_prctl() and sigaltstack() test */
@@ -587,14 +422,6 @@ static inline bool __validate_tiledata_regs(struct xsave_buffer *xbuf1)
return true;
}
-static inline void validate_tiledata_regs_same(struct xsave_buffer *xbuf)
-{
- int ret = __validate_tiledata_regs(xbuf);
-
- if (ret != 0)
- fatal_error("TILEDATA registers changed");
-}
-
static inline void validate_tiledata_regs_changed(struct xsave_buffer *xbuf)
{
int ret = __validate_tiledata_regs(xbuf);
@@ -651,251 +478,6 @@ static void test_fork(void)
_exit(0);
}
-/* Context switching test */
-
-static struct _ctxtswtest_cfg {
- unsigned int iterations;
- unsigned int num_threads;
-} ctxtswtest_config;
-
-struct futex_info {
- pthread_t thread;
- int nr;
- pthread_mutex_t mutex;
- struct futex_info *next;
-};
-
-static void *check_tiledata(void *info)
-{
- struct futex_info *finfo = (struct futex_info *)info;
- struct xsave_buffer *xbuf;
- int i;
-
- xbuf = alloc_xbuf();
- if (!xbuf)
- fatal_error("unable to allocate XSAVE buffer");
-
- /*
- * Load random data into 'xbuf' and then restore
- * it to the tile registers themselves.
- */
- load_rand_tiledata(xbuf);
- for (i = 0; i < ctxtswtest_config.iterations; i++) {
- pthread_mutex_lock(&finfo->mutex);
-
- /*
- * Ensure the register values have not
- * diverged from those recorded in 'xbuf'.
- */
- validate_tiledata_regs_same(xbuf);
-
- /* Load new, random values into xbuf and registers */
- load_rand_tiledata(xbuf);
-
- /*
- * The last thread's last unlock will be for
- * thread 0's mutex. However, thread 0 will
- * have already exited the loop and the mutex
- * will already be unlocked.
- *
- * Because this is not an ERRORCHECK mutex,
- * that inconsistency will be silently ignored.
- */
- pthread_mutex_unlock(&finfo->next->mutex);
- }
-
- free(xbuf);
- /*
- * Return this thread's finfo, which is
- * a unique value for this thread.
- */
- return finfo;
-}
-
-static int create_threads(int num, struct futex_info *finfo)
-{
- int i;
-
- for (i = 0; i < num; i++) {
- int next_nr;
-
- finfo[i].nr = i;
- /*
- * Thread 'i' will wait on this mutex to
- * be unlocked. Lock it immediately after
- * initialization:
- */
- pthread_mutex_init(&finfo[i].mutex, NULL);
- pthread_mutex_lock(&finfo[i].mutex);
-
- next_nr = (i + 1) % num;
- finfo[i].next = &finfo[next_nr];
-
- if (pthread_create(&finfo[i].thread, NULL, check_tiledata, &finfo[i]))
- fatal_error("pthread_create()");
- }
- return 0;
-}
-
-static void affinitize_cpu0(void)
-{
- cpu_set_t cpuset;
-
- CPU_ZERO(&cpuset);
- CPU_SET(0, &cpuset);
-
- if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
- fatal_error("sched_setaffinity to CPU 0");
-}
-
-static void test_context_switch(void)
-{
- struct futex_info *finfo;
- int i;
-
- /* Affinitize to one CPU to force context switches */
- affinitize_cpu0();
-
- req_xtiledata_perm();
-
- printf("[RUN]\tCheck tiledata context switches, %d iterations, %d threads.\n",
- ctxtswtest_config.iterations,
- ctxtswtest_config.num_threads);
-
-
- finfo = malloc(sizeof(*finfo) * ctxtswtest_config.num_threads);
- if (!finfo)
- fatal_error("malloc()");
-
- create_threads(ctxtswtest_config.num_threads, finfo);
-
- /*
- * This thread wakes up thread 0
- * Thread 0 will wake up 1
- * Thread 1 will wake up 2
- * ...
- * the last thread will wake up 0
- *
- * ... this will repeat for the configured
- * number of iterations.
- */
- pthread_mutex_unlock(&finfo[0].mutex);
-
- /* Wait for all the threads to finish: */
- for (i = 0; i < ctxtswtest_config.num_threads; i++) {
- void *thread_retval;
- int rc;
-
- rc = pthread_join(finfo[i].thread, &thread_retval);
-
- if (rc)
- fatal_error("pthread_join() failed for thread %d err: %d\n",
- i, rc);
-
- if (thread_retval != &finfo[i])
- fatal_error("unexpected thread retval for thread %d: %p\n",
- i, thread_retval);
-
- }
-
- printf("[OK]\tNo incorrect case was found.\n");
-
- free(finfo);
-}
-
-/* Ptrace test */
-
-/*
- * Make sure the ptracee has the expanded kernel buffer on the first
- * use. Then, initialize the state before performing the state
- * injection from the ptracer.
- */
-static inline void ptracee_firstuse_tiledata(void)
-{
- load_rand_tiledata(stashed_xsave);
- init_xtiledata();
-}
-
-/*
- * Ptracer injects the randomized tile data state. It also reads
- * before and after that, which will execute the kernel's state copy
- * functions. So, the tester is advised to double-check any emitted
- * kernel messages.
- */
-static void ptracer_inject_tiledata(pid_t target)
-{
- struct xsave_buffer *xbuf;
- struct iovec iov;
-
- xbuf = alloc_xbuf();
- if (!xbuf)
- fatal_error("unable to allocate XSAVE buffer");
-
- printf("\tRead the init'ed tiledata via ptrace().\n");
-
- iov.iov_base = xbuf;
- iov.iov_len = xbuf_size;
-
- memset(stashed_xsave, 0, xbuf_size);
-
- if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
- fatal_error("PTRACE_GETREGSET");
-
- if (!__compare_tiledata_state(stashed_xsave, xbuf))
- printf("[OK]\tThe init'ed tiledata was read from ptracee.\n");
- else
- printf("[FAIL]\tThe init'ed tiledata was not read from ptracee.\n");
-
- printf("\tInject tiledata via ptrace().\n");
-
- load_rand_tiledata(xbuf);
-
- memcpy(&stashed_xsave->bytes[xtiledata.xbuf_offset],
- &xbuf->bytes[xtiledata.xbuf_offset],
- xtiledata.size);
-
- if (ptrace(PTRACE_SETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
- fatal_error("PTRACE_SETREGSET");
-
- if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
- fatal_error("PTRACE_GETREGSET");
-
- if (!__compare_tiledata_state(stashed_xsave, xbuf))
- printf("[OK]\tTiledata was correctly written to ptracee.\n");
- else
- printf("[FAIL]\tTiledata was not correctly written to ptracee.\n");
-}
-
-static void test_ptrace(void)
-{
- pid_t child;
- int status;
-
- child = fork();
- if (child < 0) {
- err(1, "fork");
- } else if (!child) {
- if (ptrace(PTRACE_TRACEME, 0, NULL, NULL))
- err(1, "PTRACE_TRACEME");
-
- ptracee_firstuse_tiledata();
-
- raise(SIGTRAP);
- _exit(0);
- }
-
- do {
- wait(&status);
- } while (WSTOPSIG(status) != SIGTRAP);
-
- ptracer_inject_tiledata(child);
-
- ptrace(PTRACE_DETACH, child, NULL, NULL);
- wait(&status);
- if (!WIFEXITED(status) || WEXITSTATUS(status))
- err(1, "ptrace test");
-}
-
int main(void)
{
unsigned long features;
@@ -907,7 +489,11 @@ int main(void)
return KSFT_SKIP;
}
- check_cpuid_xtiledata();
+ xtiledata = get_xstate_info(XFEATURE_XTILEDATA);
+ if (!xtiledata.size || !xtiledata.xbuf_offset) {
+ fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
+ xtiledata.size, xtiledata.xbuf_offset);
+ }
init_stashed_xsave();
sethandler(SIGILL, handle_noperm, 0);
@@ -919,11 +505,11 @@ int main(void)
test_fork();
- ctxtswtest_config.iterations = 10;
- ctxtswtest_config.num_threads = 5;
- test_context_switch();
-
- test_ptrace();
+ /*
+ * Perform generic xstate tests for context switching, ptrace,
+ * and signal.
+ */
+ test_xstate(XFEATURE_XTILEDATA);
clearhandler(SIGILL);
free_stashed_xsave();
diff --git a/tools/testing/selftests/x86/avx.c b/tools/testing/selftests/x86/avx.c
new file mode 100644
index 000000000000..11d5367c235f
--- /dev/null
+++ b/tools/testing/selftests/x86/avx.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE /* Required for inline xstate helpers */
+#include "xstate.h"
+
+int main(void)
+{
+ test_xstate(XFEATURE_YMM);
+ test_xstate(XFEATURE_OPMASK);
+ test_xstate(XFEATURE_ZMM_Hi256);
+ test_xstate(XFEATURE_Hi16_ZMM);
+}
diff --git a/tools/testing/selftests/x86/corrupt_xstate_header.c b/tools/testing/selftests/x86/corrupt_xstate_header.c
index cf9ce8fbb656..93a89a5997ca 100644
--- a/tools/testing/selftests/x86/corrupt_xstate_header.c
+++ b/tools/testing/selftests/x86/corrupt_xstate_header.c
@@ -18,6 +18,7 @@
#include <sys/wait.h>
#include "../kselftest.h" /* For __cpuid_count() */
+#include "helpers.h"
static inline int xsave_enabled(void)
{
@@ -29,19 +30,6 @@ static inline int xsave_enabled(void)
return ecx & (1U << 27);
}
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
-
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static void sigusr1(int sig, siginfo_t *info, void *uc_void)
{
ucontext_t *uc = uc_void;
diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
index d1e919b0c1dc..5cb8393737d0 100644
--- a/tools/testing/selftests/x86/entry_from_vm86.c
+++ b/tools/testing/selftests/x86/entry_from_vm86.c
@@ -24,31 +24,11 @@
#include <errno.h>
#include <sys/vm86.h>
+#include "helpers.h"
+
static unsigned long load_addr = 0x10000;
static int nerrs = 0;
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
-static void clearhandler(int sig)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static sig_atomic_t got_signal;
static void sighandler(int sig, siginfo_t *info, void *ctx_void)
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 50cf32de6313..0a75252d31b6 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -28,6 +28,8 @@
#include <sys/wait.h>
#include <setjmp.h>
+#include "helpers.h"
+
#ifndef __x86_64__
# error This test is 64-bit only
#endif
@@ -39,28 +41,6 @@ static unsigned short *shared_scratch;
static int nerrs;
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
-static void clearhandler(int sig)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t*)ctx_void;
diff --git a/tools/testing/selftests/x86/helpers.h b/tools/testing/selftests/x86/helpers.h
index 4ef42c4559a9..6deaad035161 100644
--- a/tools/testing/selftests/x86/helpers.h
+++ b/tools/testing/selftests/x86/helpers.h
@@ -2,8 +2,13 @@
#ifndef __SELFTESTS_X86_HELPERS_H
#define __SELFTESTS_X86_HELPERS_H
+#include <signal.h>
+#include <string.h>
+
#include <asm/processor-flags.h>
+#include "../kselftest.h"
+
static inline unsigned long get_eflags(void)
{
#ifdef __x86_64__
@@ -22,4 +27,27 @@ static inline void set_eflags(unsigned long eflags)
#endif
}
+static inline void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ ksft_exit_fail_msg("sigaction failed");
+}
+
+static inline void clearhandler(int sig)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ ksft_exit_fail_msg("sigaction failed");
+}
+
#endif /* __SELFTESTS_X86_HELPERS_H */
diff --git a/tools/testing/selftests/x86/ioperm.c b/tools/testing/selftests/x86/ioperm.c
index 57ec5e99edb9..69d5fb7050c2 100644
--- a/tools/testing/selftests/x86/ioperm.c
+++ b/tools/testing/selftests/x86/ioperm.c
@@ -20,30 +20,9 @@
#include <sched.h>
#include <sys/io.h>
-static int nerrs = 0;
-
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-
-}
+#include "helpers.h"
-static void clearhandler(int sig)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
+static int nerrs = 0;
static jmp_buf jmpbuf;
diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c
index 7e3e09c1abac..457b6715542b 100644
--- a/tools/testing/selftests/x86/iopl.c
+++ b/tools/testing/selftests/x86/iopl.c
@@ -20,30 +20,9 @@
#include <sched.h>
#include <sys/io.h>
-static int nerrs = 0;
-
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-
-}
+#include "helpers.h"
-static void clearhandler(int sig)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
+static int nerrs = 0;
static jmp_buf jmpbuf;
diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
index 0ea4f6813930..18d736640ece 100644
--- a/tools/testing/selftests/x86/lam.c
+++ b/tools/testing/selftests/x86/lam.c
@@ -4,6 +4,7 @@
#include <stdlib.h>
#include <string.h>
#include <sys/syscall.h>
+#include <sys/ioctl.h>
#include <time.h>
#include <signal.h>
#include <setjmp.h>
@@ -43,7 +44,15 @@
#define FUNC_INHERITE 0x20
#define FUNC_PASID 0x40
+/* get_user() pointer test cases */
+#define GET_USER_USER 0
+#define GET_USER_KERNEL_TOP 1
+#define GET_USER_KERNEL_BOT 2
+#define GET_USER_KERNEL 3
+
#define TEST_MASK 0x7f
+#define L5_SIGN_EXT_MASK (0xFFUL << 56)
+#define L4_SIGN_EXT_MASK (0x1FFFFUL << 47)
#define LOW_ADDR (0x1UL << 30)
#define HIGH_ADDR (0x3UL << 48)
@@ -115,23 +124,42 @@ static void segv_handler(int sig)
siglongjmp(segv_env, 1);
}
-static inline int cpu_has_lam(void)
+static inline int lam_is_available(void)
{
unsigned int cpuinfo[4];
+ unsigned long bits = 0;
+ int ret;
__cpuid_count(0x7, 1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
- return (cpuinfo[0] & (1 << 26));
+ /* Check if cpu supports LAM */
+ if (!(cpuinfo[0] & (1 << 26))) {
+ ksft_print_msg("LAM is not supported!\n");
+ return 0;
+ }
+
+ /* Return 0 if CONFIG_ADDRESS_MASKING is not set */
+ ret = syscall(SYS_arch_prctl, ARCH_GET_MAX_TAG_BITS, &bits);
+ if (ret) {
+ ksft_print_msg("LAM is disabled in the kernel!\n");
+ return 0;
+ }
+
+ return 1;
}
-/* Check 5-level page table feature in CPUID.(EAX=07H, ECX=00H):ECX.[bit 16] */
-static inline int cpu_has_la57(void)
+static inline int la57_enabled(void)
{
- unsigned int cpuinfo[4];
+ int ret;
+ void *p;
+
+ p = mmap((void *)HIGH_ADDR, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
- __cpuid_count(0x7, 0, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
+ ret = p == MAP_FAILED ? 0 : 1;
- return (cpuinfo[2] & (1 << 16));
+ munmap(p, PAGE_SIZE);
+ return ret;
}
/*
@@ -237,7 +265,7 @@ static uint64_t set_metadata(uint64_t src, unsigned long lam)
* both pointers should point to the same address.
*
* @return:
- * 0: value on the pointer with metadate and value on original are same
+ * 0: value on the pointer with metadata and value on original are same
* 1: not same.
*/
static int handle_lam_test(void *src, unsigned int lam)
@@ -322,7 +350,7 @@ static int handle_mmap(struct testcases *test)
flags, -1, 0);
if (ptr == MAP_FAILED) {
if (test->addr == HIGH_ADDR)
- if (!cpu_has_la57())
+ if (!la57_enabled())
return 3; /* unsupport LA57 */
return 1;
}
@@ -370,6 +398,78 @@ static int handle_syscall(struct testcases *test)
return ret;
}
+static int get_user_syscall(struct testcases *test)
+{
+ uint64_t ptr_address, bitmask;
+ int fd, ret = 0;
+ void *ptr;
+
+ if (la57_enabled()) {
+ bitmask = L5_SIGN_EXT_MASK;
+ ptr_address = HIGH_ADDR;
+ } else {
+ bitmask = L4_SIGN_EXT_MASK;
+ ptr_address = LOW_ADDR;
+ }
+
+ ptr = mmap((void *)ptr_address, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+
+ if (ptr == MAP_FAILED) {
+ perror("failed to map byte to pass into get_user");
+ return 1;
+ }
+
+ if (set_lam(test->lam) != 0) {
+ ret = 2;
+ goto error;
+ }
+
+ fd = memfd_create("lam_ioctl", 0);
+ if (fd == -1) {
+ munmap(ptr, PAGE_SIZE);
+ exit(EXIT_FAILURE);
+ }
+
+ switch (test->later) {
+ case GET_USER_USER:
+ /* Control group - properly tagged user pointer */
+ ptr = (void *)set_metadata((uint64_t)ptr, test->lam);
+ break;
+ case GET_USER_KERNEL_TOP:
+ /* Kernel address with top bit cleared */
+ bitmask &= (bitmask >> 1);
+ ptr = (void *)((uint64_t)ptr | bitmask);
+ break;
+ case GET_USER_KERNEL_BOT:
+ /* Kernel address with bottom sign-extension bit cleared */
+ bitmask &= (bitmask << 1);
+ ptr = (void *)((uint64_t)ptr | bitmask);
+ break;
+ case GET_USER_KERNEL:
+ /* Try to pass a kernel address */
+ ptr = (void *)((uint64_t)ptr | bitmask);
+ break;
+ default:
+ printf("Invalid test case value passed!\n");
+ break;
+ }
+
+ /*
+ * Use FIOASYNC ioctl because it utilizes get_user() internally and is
+ * very non-invasive to the system. Pass differently tagged pointers to
+ * get_user() in order to verify that valid user pointers are going
+ * through and invalid kernel/non-canonical pointers are not.
+ */
+ if (ioctl(fd, FIOASYNC, ptr) != 0)
+ ret = 1;
+
+ close(fd);
+error:
+ munmap(ptr, PAGE_SIZE);
+ return ret;
+}
+
int sys_uring_setup(unsigned int entries, struct io_uring_params *p)
{
return (int)syscall(__NR_io_uring_setup, entries, p);
@@ -596,8 +696,10 @@ int do_uring(unsigned long lam)
fi->file_fd = file_fd;
ring = malloc(sizeof(*ring));
- if (!ring)
+ if (!ring) {
+ free(fi);
return 1;
+ }
memset(ring, 0, sizeof(struct io_ring));
@@ -883,6 +985,33 @@ static struct testcases syscall_cases[] = {
.test_func = handle_syscall,
.msg = "SYSCALL:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
},
+ {
+ .later = GET_USER_USER,
+ .lam = LAM_U57_BITS,
+ .test_func = get_user_syscall,
+ .msg = "GET_USER: get_user() and pass a properly tagged user pointer.\n",
+ },
+ {
+ .later = GET_USER_KERNEL_TOP,
+ .expected = 1,
+ .lam = LAM_U57_BITS,
+ .test_func = get_user_syscall,
+ .msg = "GET_USER:[Negative] get_user() with a kernel pointer and the top bit cleared.\n",
+ },
+ {
+ .later = GET_USER_KERNEL_BOT,
+ .expected = 1,
+ .lam = LAM_U57_BITS,
+ .test_func = get_user_syscall,
+ .msg = "GET_USER:[Negative] get_user() with a kernel pointer and the bottom sign-extension bit cleared.\n",
+ },
+ {
+ .later = GET_USER_KERNEL,
+ .expected = 1,
+ .lam = LAM_U57_BITS,
+ .test_func = get_user_syscall,
+ .msg = "GET_USER:[Negative] get_user() and pass a kernel pointer.\n",
+ },
};
static struct testcases mmap_cases[] = {
@@ -1181,10 +1310,8 @@ int main(int argc, char **argv)
tests_cnt = 0;
- if (!cpu_has_lam()) {
- ksft_print_msg("Unsupported LAM feature!\n");
+ if (!lam_is_available())
return KSFT_SKIP;
- }
while ((c = getopt(argc, argv, "ht:")) != -1) {
switch (c) {
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 3a29346e1452..bb99a71380a5 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -26,6 +26,8 @@
#include <asm/prctl.h>
#include <sys/prctl.h>
+#include "helpers.h"
+
#define AR_ACCESSED (1<<8)
#define AR_TYPE_RODATA (0 * (1<<9))
@@ -506,20 +508,6 @@ static void fix_sa_restorer(int sig)
}
#endif
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-
- fix_sa_restorer(sig);
-}
-
static jmp_buf jmpbuf;
static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
@@ -549,9 +537,11 @@ static void do_multicpu_tests(void)
}
sethandler(SIGSEGV, sigsegv, 0);
+ fix_sa_restorer(SIGSEGV);
#ifdef __i386__
/* True 32-bit kernels send SIGILL instead of SIGSEGV on IRET faults. */
sethandler(SIGILL, sigsegv, 0);
+ fix_sa_restorer(SIGILL);
#endif
printf("[RUN]\tCross-CPU LDT invalidation\n");
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c
index cc3de6ff9fba..f22cb6b382f9 100644
--- a/tools/testing/selftests/x86/mov_ss_trap.c
+++ b/tools/testing/selftests/x86/mov_ss_trap.c
@@ -36,7 +36,7 @@
#include <setjmp.h>
#include <sys/prctl.h>
-#define X86_EFLAGS_RF (1UL << 16)
+#include "helpers.h"
#if __x86_64__
# define REG_IP REG_RIP
@@ -94,18 +94,6 @@ static void enable_watchpoint(void)
}
}
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static char const * const signames[] = {
[SIGSEGV] = "SIGSEGV",
[SIGBUS] = "SIBGUS",
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
index 12aaa063196e..360ec88d5432 100644
--- a/tools/testing/selftests/x86/ptrace_syscall.c
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -15,6 +15,8 @@
#include <asm/ptrace-abi.h>
#include <sys/auxv.h>
+#include "helpers.h"
+
/* Bitness-agnostic defines for user_regs_struct fields. */
#ifdef __x86_64__
# define user_syscall_nr orig_rax
@@ -93,18 +95,6 @@ static siginfo_t wait_trap(pid_t chld)
return si;
}
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static void setsigign(int sig, int flags)
{
struct sigaction sa;
@@ -116,16 +106,6 @@ static void setsigign(int sig, int flags)
err(1, "sigaction");
}
-static void clearhandler(int sig)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
#ifdef __x86_64__
# define REG_BP REG_RBP
#else
diff --git a/tools/testing/selftests/x86/sigaltstack.c b/tools/testing/selftests/x86/sigaltstack.c
index f689af75e979..0ae1b784498c 100644
--- a/tools/testing/selftests/x86/sigaltstack.c
+++ b/tools/testing/selftests/x86/sigaltstack.c
@@ -14,6 +14,8 @@
#include <sys/resource.h>
#include <setjmp.h>
+#include "helpers.h"
+
/* sigaltstack()-enforced minimum stack */
#define ENFORCED_MINSIGSTKSZ 2048
@@ -27,30 +29,6 @@ static bool sigalrm_expected;
static unsigned long at_minstack_size;
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
-
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
-static void clearhandler(int sig)
-{
- struct sigaction sa;
-
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static int setup_altstack(void *start, unsigned long size)
{
stack_t ss;
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index 0b75b29f794b..26ef562f4232 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -46,6 +46,8 @@
#include <sys/ptrace.h>
#include <sys/user.h>
+#include "helpers.h"
+
/* Pull in AR_xyz defines. */
typedef unsigned int u32;
typedef unsigned short u16;
@@ -138,28 +140,6 @@ static unsigned short LDT3(int idx)
return (idx << 3) | 7;
}
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
-static void clearhandler(int sig)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static void add_ldt(const struct user_desc *desc, unsigned short *var,
const char *name)
{
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index 9a30f443e928..280d7a22b9c9 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -33,28 +33,6 @@
#include "helpers.h"
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
-static void clearhandler(int sig)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static volatile sig_atomic_t sig_traps, sig_eflags;
sigjmp_buf jmpbuf;
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
index 48ab065a76f9..f67a2df335ba 100644
--- a/tools/testing/selftests/x86/syscall_arg_fault.c
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -17,18 +17,6 @@
#include "helpers.h"
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static sigjmp_buf jmpbuf;
static volatile sig_atomic_t n_errs;
diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c
index a108b80dd082..f9c9814160f0 100644
--- a/tools/testing/selftests/x86/syscall_nt.c
+++ b/tools/testing/selftests/x86/syscall_nt.c
@@ -18,18 +18,6 @@
static unsigned int nerrs;
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
{
}
diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
index 991591718bb0..41c42b7b54a6 100644
--- a/tools/testing/selftests/x86/syscall_numbering.c
+++ b/tools/testing/selftests/x86/syscall_numbering.c
@@ -25,6 +25,7 @@
#include <sys/mman.h>
#include <linux/ptrace.h>
+#include "../kselftest.h"
/* Common system call numbers */
#define SYS_READ 0
@@ -313,7 +314,7 @@ static void test_syscall_numbering(void)
* The MSB is supposed to be ignored, so we loop over a few
* to test that out.
*/
- for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) {
+ for (size_t i = 0; i < ARRAY_SIZE(msbs); i++) {
int msb = msbs[i];
run("Checking system calls with msb = %d (0x%x)\n",
msb, msb);
diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c
index b30de9aaa6d4..5fb531e3ad7c 100644
--- a/tools/testing/selftests/x86/sysret_rip.c
+++ b/tools/testing/selftests/x86/sysret_rip.c
@@ -22,6 +22,8 @@
#include <sys/mman.h>
#include <assert.h>
+#include "helpers.h"
+
/*
* These items are in clang_helpers_64.S, in order to avoid clang inline asm
* limitations:
@@ -31,28 +33,6 @@ extern const char test_page[];
static void const *current_test_page_addr = test_page;
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
-static void clearhandler(int sig)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
/* State used by our signal handlers. */
static gregset_t initial_regs;
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index 6de11b4df458..05e1e6774fba 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -310,19 +310,6 @@ static void test_getcpu(int cpu)
static jmp_buf jmpbuf;
static volatile unsigned long segv_err;
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
-
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- ksft_exit_fail_msg("sigaction failed\n");
-}
-
static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t *)ctx_void;
diff --git a/tools/testing/selftests/x86/unwind_vdso.c b/tools/testing/selftests/x86/unwind_vdso.c
index 4c311e1af4c7..9cc17588d818 100644
--- a/tools/testing/selftests/x86/unwind_vdso.c
+++ b/tools/testing/selftests/x86/unwind_vdso.c
@@ -43,18 +43,6 @@ int main()
#include <dlfcn.h>
#include <unwind.h>
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static volatile sig_atomic_t nerrs;
static unsigned long sysinfo;
static bool got_sysinfo = false;
diff --git a/tools/testing/selftests/x86/xstate.c b/tools/testing/selftests/x86/xstate.c
new file mode 100644
index 000000000000..23c1d6c964ea
--- /dev/null
+++ b/tools/testing/selftests/x86/xstate.c
@@ -0,0 +1,477 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <elf.h>
+#include <pthread.h>
+#include <stdbool.h>
+
+#include <asm/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+
+#include "helpers.h"
+#include "xstate.h"
+
+/*
+ * The userspace xstate test suite is designed to be generic and operates
+ * with randomized xstate data. However, some states require special handling:
+ *
+ * - PKRU and XTILECFG need specific adjustments, such as modifying
+ * randomization behavior or using fixed values.
+ * - But, PKRU already has a dedicated test suite in /tools/selftests/mm.
+ * - Legacy states (FP and SSE) are excluded, as they are not considered
+ * part of extended states (xstates) and their usage is already deeply
+ * integrated into user-space libraries.
+ */
+#define XFEATURE_MASK_TEST_SUPPORTED \
+ ((1 << XFEATURE_YMM) | \
+ (1 << XFEATURE_OPMASK) | \
+ (1 << XFEATURE_ZMM_Hi256) | \
+ (1 << XFEATURE_Hi16_ZMM) | \
+ (1 << XFEATURE_XTILEDATA))
+
+static inline uint64_t xgetbv(uint32_t index)
+{
+ uint32_t eax, edx;
+
+ asm volatile("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index));
+ return eax + ((uint64_t)edx << 32);
+}
+
+static inline uint64_t get_xstatebv(struct xsave_buffer *xbuf)
+{
+ return *(uint64_t *)(&xbuf->header);
+}
+
+static struct xstate_info xstate;
+
+struct futex_info {
+ unsigned int iterations;
+ struct futex_info *next;
+ pthread_mutex_t mutex;
+ pthread_t thread;
+ bool valid;
+ int nr;
+};
+
+static inline void load_rand_xstate(struct xstate_info *xstate, struct xsave_buffer *xbuf)
+{
+ clear_xstate_header(xbuf);
+ set_xstatebv(xbuf, xstate->mask);
+ set_rand_data(xstate, xbuf);
+ xrstor(xbuf, xstate->mask);
+}
+
+static inline void load_init_xstate(struct xstate_info *xstate, struct xsave_buffer *xbuf)
+{
+ clear_xstate_header(xbuf);
+ xrstor(xbuf, xstate->mask);
+}
+
+static inline void copy_xstate(struct xsave_buffer *xbuf_dst, struct xsave_buffer *xbuf_src)
+{
+ memcpy(&xbuf_dst->bytes[xstate.xbuf_offset],
+ &xbuf_src->bytes[xstate.xbuf_offset],
+ xstate.size);
+}
+
+static inline bool validate_xstate_same(struct xsave_buffer *xbuf1, struct xsave_buffer *xbuf2)
+{
+ int ret;
+
+ ret = memcmp(&xbuf1->bytes[xstate.xbuf_offset],
+ &xbuf2->bytes[xstate.xbuf_offset],
+ xstate.size);
+ return ret == 0;
+}
+
+static inline bool validate_xregs_same(struct xsave_buffer *xbuf1)
+{
+ struct xsave_buffer *xbuf2;
+ bool ret;
+
+ xbuf2 = alloc_xbuf();
+ if (!xbuf2)
+ ksft_exit_fail_msg("failed to allocate XSAVE buffer\n");
+
+ xsave(xbuf2, xstate.mask);
+ ret = validate_xstate_same(xbuf1, xbuf2);
+
+ free(xbuf2);
+ return ret;
+}
+
+/* Context switching test */
+
+static void *check_xstate(void *info)
+{
+ struct futex_info *finfo = (struct futex_info *)info;
+ struct xsave_buffer *xbuf;
+ int i;
+
+ xbuf = alloc_xbuf();
+ if (!xbuf)
+ ksft_exit_fail_msg("unable to allocate XSAVE buffer\n");
+
+ /*
+ * Load random data into 'xbuf' and then restore it to the xstate
+ * registers.
+ */
+ load_rand_xstate(&xstate, xbuf);
+ finfo->valid = true;
+
+ for (i = 0; i < finfo->iterations; i++) {
+ pthread_mutex_lock(&finfo->mutex);
+
+ /*
+ * Ensure the register values have not diverged from the
+ * record. Then reload a new random value. If it failed
+ * ever before, skip it.
+ */
+ if (finfo->valid) {
+ finfo->valid = validate_xregs_same(xbuf);
+ load_rand_xstate(&xstate, xbuf);
+ }
+
+ /*
+ * The last thread's last unlock will be for thread 0's
+ * mutex. However, thread 0 will have already exited the
+ * loop and the mutex will already be unlocked.
+ *
+ * Because this is not an ERRORCHECK mutex, that
+ * inconsistency will be silently ignored.
+ */
+ pthread_mutex_unlock(&finfo->next->mutex);
+ }
+
+ free(xbuf);
+ return finfo;
+}
+
+static void create_threads(uint32_t num_threads, uint32_t iterations, struct futex_info *finfo)
+{
+ int i;
+
+ for (i = 0; i < num_threads; i++) {
+ int next_nr;
+
+ finfo[i].nr = i;
+ finfo[i].iterations = iterations;
+
+ /*
+ * Thread 'i' will wait on this mutex to be unlocked.
+ * Lock it immediately after initialization:
+ */
+ pthread_mutex_init(&finfo[i].mutex, NULL);
+ pthread_mutex_lock(&finfo[i].mutex);
+
+ next_nr = (i + 1) % num_threads;
+ finfo[i].next = &finfo[next_nr];
+
+ if (pthread_create(&finfo[i].thread, NULL, check_xstate, &finfo[i]))
+ ksft_exit_fail_msg("pthread_create() failed\n");
+ }
+}
+
+static bool checkout_threads(uint32_t num_threads, struct futex_info *finfo)
+{
+ void *thread_retval;
+ bool valid = true;
+ int err, i;
+
+ for (i = 0; i < num_threads; i++) {
+ err = pthread_join(finfo[i].thread, &thread_retval);
+ if (err)
+ ksft_exit_fail_msg("pthread_join() failed for thread %d err: %d\n", i, err);
+
+ if (thread_retval != &finfo[i]) {
+ ksft_exit_fail_msg("unexpected thread retval for thread %d: %p\n",
+ i, thread_retval);
+ }
+
+ valid &= finfo[i].valid;
+ }
+
+ return valid;
+}
+
+static void affinitize_cpu0(void)
+{
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ ksft_exit_fail_msg("sched_setaffinity to CPU 0 failed\n");
+}
+
+static void test_context_switch(uint32_t num_threads, uint32_t iterations)
+{
+ struct futex_info *finfo;
+
+ /* Affinitize to one CPU to force context switches */
+ affinitize_cpu0();
+
+ printf("[RUN]\t%s: check context switches, %d iterations, %d threads.\n",
+ xstate.name, iterations, num_threads);
+
+ finfo = malloc(sizeof(*finfo) * num_threads);
+ if (!finfo)
+ ksft_exit_fail_msg("unable allocate memory\n");
+
+ create_threads(num_threads, iterations, finfo);
+
+ /*
+ * This thread wakes up thread 0
+ * Thread 0 will wake up 1
+ * Thread 1 will wake up 2
+ * ...
+ * The last thread will wake up 0
+ *
+ * This will repeat for the configured
+ * number of iterations.
+ */
+ pthread_mutex_unlock(&finfo[0].mutex);
+
+ /* Wait for all the threads to finish: */
+ if (checkout_threads(num_threads, finfo))
+ printf("[OK]\tNo incorrect case was found.\n");
+ else
+ printf("[FAIL]\tFailed with context switching test.\n");
+
+ free(finfo);
+}
+
+/*
+ * Ptrace test for the ABI format as described in arch/x86/include/asm/user.h
+ */
+
+/*
+ * Make sure the ptracee has the expanded kernel buffer on the first use.
+ * Then, initialize the state before performing the state injection from
+ * the ptracer. For non-dynamic states, this is benign.
+ */
+static inline void ptracee_touch_xstate(void)
+{
+ struct xsave_buffer *xbuf;
+
+ xbuf = alloc_xbuf();
+
+ load_rand_xstate(&xstate, xbuf);
+ load_init_xstate(&xstate, xbuf);
+
+ free(xbuf);
+}
+
+/*
+ * Ptracer injects the randomized xstate data. It also reads before and
+ * after that, which will execute the kernel's state copy functions.
+ */
+static void ptracer_inject_xstate(pid_t target)
+{
+ uint32_t xbuf_size = get_xbuf_size();
+ struct xsave_buffer *xbuf1, *xbuf2;
+ struct iovec iov;
+
+ /*
+ * Allocate buffers to keep data while ptracer can write the
+ * other buffer
+ */
+ xbuf1 = alloc_xbuf();
+ xbuf2 = alloc_xbuf();
+ if (!xbuf1 || !xbuf2)
+ ksft_exit_fail_msg("unable to allocate XSAVE buffer\n");
+
+ iov.iov_base = xbuf1;
+ iov.iov_len = xbuf_size;
+
+ if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
+ ksft_exit_fail_msg("PTRACE_GETREGSET failed\n");
+
+ printf("[RUN]\t%s: inject xstate via ptrace().\n", xstate.name);
+
+ load_rand_xstate(&xstate, xbuf1);
+ copy_xstate(xbuf2, xbuf1);
+
+ if (ptrace(PTRACE_SETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
+ ksft_exit_fail_msg("PTRACE_SETREGSET failed\n");
+
+ if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
+ ksft_exit_fail_msg("PTRACE_GETREGSET failed\n");
+
+ if (*(uint64_t *)get_fpx_sw_bytes(xbuf1) == xgetbv(0))
+ printf("[OK]\t'xfeatures' in SW reserved area was correctly written\n");
+ else
+ printf("[FAIL]\t'xfeatures' in SW reserved area was not correctly written\n");
+
+ if (validate_xstate_same(xbuf2, xbuf1))
+ printf("[OK]\txstate was correctly updated.\n");
+ else
+ printf("[FAIL]\txstate was not correctly updated.\n");
+
+ free(xbuf1);
+ free(xbuf2);
+}
+
+static void test_ptrace(void)
+{
+ pid_t child;
+ int status;
+
+ child = fork();
+ if (child < 0) {
+ ksft_exit_fail_msg("fork() failed\n");
+ } else if (!child) {
+ if (ptrace(PTRACE_TRACEME, 0, NULL, NULL))
+ ksft_exit_fail_msg("PTRACE_TRACEME failed\n");
+
+ ptracee_touch_xstate();
+
+ raise(SIGTRAP);
+ _exit(0);
+ }
+
+ do {
+ wait(&status);
+ } while (WSTOPSIG(status) != SIGTRAP);
+
+ ptracer_inject_xstate(child);
+
+ ptrace(PTRACE_DETACH, child, NULL, NULL);
+ wait(&status);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ ksft_exit_fail_msg("ptracee exit error\n");
+}
+
+/*
+ * Test signal delivery for the ABI compatibility.
+ * See the ABI format: arch/x86/include/uapi/asm/sigcontext.h
+ */
+
+/*
+ * Avoid using printf() in signal handlers as it is not
+ * async-signal-safe.
+ */
+#define SIGNAL_BUF_LEN 1000
+static char signal_message_buffer[SIGNAL_BUF_LEN];
+static void sig_print(char *msg)
+{
+ int left = SIGNAL_BUF_LEN - strlen(signal_message_buffer) - 1;
+
+ strncat(signal_message_buffer, msg, left);
+}
+
+static struct xsave_buffer *stashed_xbuf;
+
+static void validate_sigfpstate(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
+ void *xbuf = ctx->uc_mcontext.fpregs;
+ struct _fpx_sw_bytes *sw_bytes;
+ uint32_t magic2;
+
+ /* Reset the signal message buffer: */
+ signal_message_buffer[0] = '\0';
+
+ sw_bytes = get_fpx_sw_bytes(xbuf);
+ if (sw_bytes->magic1 == FP_XSTATE_MAGIC1)
+ sig_print("[OK]\t'magic1' is valid\n");
+ else
+ sig_print("[FAIL]\t'magic1' is not valid\n");
+
+ if (get_fpx_sw_bytes_features(xbuf) & xstate.mask)
+ sig_print("[OK]\t'xfeatures' in SW reserved area is valid\n");
+ else
+ sig_print("[FAIL]\t'xfeatures' in SW reserved area is not valid\n");
+
+ if (get_xstatebv(xbuf) & xstate.mask)
+ sig_print("[OK]\t'xfeatures' in XSAVE header is valid\n");
+ else
+ sig_print("[FAIL]\t'xfeatures' in XSAVE header is not valid\n");
+
+ if (validate_xstate_same(stashed_xbuf, xbuf))
+ sig_print("[OK]\txstate delivery was successful\n");
+ else
+ sig_print("[FAIL]\txstate delivery was not successful\n");
+
+ magic2 = *(uint32_t *)(xbuf + sw_bytes->xstate_size);
+ if (magic2 == FP_XSTATE_MAGIC2)
+ sig_print("[OK]\t'magic2' is valid\n");
+ else
+ sig_print("[FAIL]\t'magic2' is not valid\n");
+
+ set_rand_data(&xstate, xbuf);
+ copy_xstate(stashed_xbuf, xbuf);
+}
+
+static void test_signal(void)
+{
+ bool valid_xstate;
+
+ /*
+ * The signal handler will access this to verify xstate context
+ * preservation.
+ */
+ stashed_xbuf = alloc_xbuf();
+ if (!stashed_xbuf)
+ ksft_exit_fail_msg("unable to allocate XSAVE buffer\n");
+
+ printf("[RUN]\t%s: load xstate and raise SIGUSR1\n", xstate.name);
+
+ sethandler(SIGUSR1, validate_sigfpstate, 0);
+
+ load_rand_xstate(&xstate, stashed_xbuf);
+
+ raise(SIGUSR1);
+
+ /*
+ * Immediately record the test result, deferring printf() to
+ * prevent unintended state contamination by that.
+ */
+ valid_xstate = validate_xregs_same(stashed_xbuf);
+ printf("%s", signal_message_buffer);
+
+ printf("[RUN]\t%s: load new xstate from sighandler and check it after sigreturn\n",
+ xstate.name);
+
+ if (valid_xstate)
+ printf("[OK]\txstate was restored correctly\n");
+ else
+ printf("[FAIL]\txstate restoration failed\n");
+
+ clearhandler(SIGUSR1);
+ free(stashed_xbuf);
+}
+
+void test_xstate(uint32_t feature_num)
+{
+ const unsigned int ctxtsw_num_threads = 5, ctxtsw_iterations = 10;
+ unsigned long features;
+ long rc;
+
+ if (!(XFEATURE_MASK_TEST_SUPPORTED & (1 << feature_num))) {
+ ksft_print_msg("The xstate test does not fully support the component %u, yet.\n",
+ feature_num);
+ return;
+ }
+
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_SUPP, &features);
+ if (rc || !(features & (1 << feature_num))) {
+ ksft_print_msg("The kernel does not support feature number: %u\n", feature_num);
+ return;
+ }
+
+ xstate = get_xstate_info(feature_num);
+ if (!xstate.size || !xstate.xbuf_offset) {
+ ksft_exit_fail_msg("invalid state size/offset (%d/%d)\n",
+ xstate.size, xstate.xbuf_offset);
+ }
+
+ test_context_switch(ctxtsw_num_threads, ctxtsw_iterations);
+ test_ptrace();
+ test_signal();
+}
diff --git a/tools/testing/selftests/x86/xstate.h b/tools/testing/selftests/x86/xstate.h
new file mode 100644
index 000000000000..42af36ec852f
--- /dev/null
+++ b/tools/testing/selftests/x86/xstate.h
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef __SELFTESTS_X86_XSTATE_H
+#define __SELFTESTS_X86_XSTATE_H
+
+#include <stdint.h>
+
+#include "../kselftest.h"
+
+#define XSAVE_HDR_OFFSET 512
+#define XSAVE_HDR_SIZE 64
+
+/*
+ * List of XSAVE features Linux knows about. Copied from
+ * arch/x86/include/asm/fpu/types.h
+ */
+enum xfeature {
+ XFEATURE_FP,
+ XFEATURE_SSE,
+ XFEATURE_YMM,
+ XFEATURE_BNDREGS,
+ XFEATURE_BNDCSR,
+ XFEATURE_OPMASK,
+ XFEATURE_ZMM_Hi256,
+ XFEATURE_Hi16_ZMM,
+ XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
+ XFEATURE_PKRU,
+ XFEATURE_PASID,
+ XFEATURE_CET_USER,
+ XFEATURE_CET_KERNEL_UNUSED,
+ XFEATURE_RSRVD_COMP_13,
+ XFEATURE_RSRVD_COMP_14,
+ XFEATURE_LBR,
+ XFEATURE_RSRVD_COMP_16,
+ XFEATURE_XTILECFG,
+ XFEATURE_XTILEDATA,
+
+ XFEATURE_MAX,
+};
+
+/* Copied from arch/x86/kernel/fpu/xstate.c */
+static const char *xfeature_names[] =
+{
+ "x87 floating point registers",
+ "SSE registers",
+ "AVX registers",
+ "MPX bounds registers",
+ "MPX CSR",
+ "AVX-512 opmask",
+ "AVX-512 Hi256",
+ "AVX-512 ZMM_Hi256",
+ "Processor Trace (unused)",
+ "Protection Keys User registers",
+ "PASID state",
+ "Control-flow User registers",
+ "Control-flow Kernel registers (unused)",
+ "unknown xstate feature",
+ "unknown xstate feature",
+ "unknown xstate feature",
+ "unknown xstate feature",
+ "AMX Tile config",
+ "AMX Tile data",
+ "unknown xstate feature",
+};
+
+struct xsave_buffer {
+ union {
+ struct {
+ char legacy[XSAVE_HDR_OFFSET];
+ char header[XSAVE_HDR_SIZE];
+ char extended[0];
+ };
+ char bytes[0];
+ };
+};
+
+static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm)
+{
+ uint32_t rfbm_hi = rfbm >> 32;
+ uint32_t rfbm_lo = rfbm;
+
+ asm volatile("xsave (%%rdi)"
+ : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)
+ : "memory");
+}
+
+static inline void xrstor(struct xsave_buffer *xbuf, uint64_t rfbm)
+{
+ uint32_t rfbm_hi = rfbm >> 32;
+ uint32_t rfbm_lo = rfbm;
+
+ asm volatile("xrstor (%%rdi)"
+ : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi));
+}
+
+#define CPUID_LEAF_XSTATE 0xd
+#define CPUID_SUBLEAF_XSTATE_USER 0x0
+
+static inline uint32_t get_xbuf_size(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ __cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER,
+ eax, ebx, ecx, edx);
+
+ /*
+ * EBX enumerates the size (in bytes) required by the XSAVE
+ * instruction for an XSAVE area containing all the user state
+ * components corresponding to bits currently set in XCR0.
+ */
+ return ebx;
+}
+
+struct xstate_info {
+ const char *name;
+ uint32_t num;
+ uint32_t mask;
+ uint32_t xbuf_offset;
+ uint32_t size;
+};
+
+static inline struct xstate_info get_xstate_info(uint32_t xfeature_num)
+{
+ struct xstate_info xstate = { };
+ uint32_t eax, ebx, ecx, edx;
+
+ if (xfeature_num >= XFEATURE_MAX) {
+ ksft_print_msg("unknown state\n");
+ return xstate;
+ }
+
+ xstate.name = xfeature_names[xfeature_num];
+ xstate.num = xfeature_num;
+ xstate.mask = 1 << xfeature_num;
+
+ __cpuid_count(CPUID_LEAF_XSTATE, xfeature_num,
+ eax, ebx, ecx, edx);
+ xstate.size = eax;
+ xstate.xbuf_offset = ebx;
+ return xstate;
+}
+
+static inline struct xsave_buffer *alloc_xbuf(void)
+{
+ uint32_t xbuf_size = get_xbuf_size();
+
+ /* XSAVE buffer should be 64B-aligned. */
+ return aligned_alloc(64, xbuf_size);
+}
+
+static inline void clear_xstate_header(struct xsave_buffer *xbuf)
+{
+ memset(&xbuf->header, 0, sizeof(xbuf->header));
+}
+
+static inline void set_xstatebv(struct xsave_buffer *xbuf, uint64_t bv)
+{
+ /* XSTATE_BV is at the beginning of the header: */
+ *(uint64_t *)(&xbuf->header) = bv;
+}
+
+/* See 'struct _fpx_sw_bytes' at sigcontext.h */
+#define SW_BYTES_OFFSET 464
+/* N.B. The struct's field name varies so read from the offset. */
+#define SW_BYTES_BV_OFFSET (SW_BYTES_OFFSET + 8)
+
+static inline struct _fpx_sw_bytes *get_fpx_sw_bytes(void *xbuf)
+{
+ return xbuf + SW_BYTES_OFFSET;
+}
+
+static inline uint64_t get_fpx_sw_bytes_features(void *buffer)
+{
+ return *(uint64_t *)(buffer + SW_BYTES_BV_OFFSET);
+}
+
+static inline void set_rand_data(struct xstate_info *xstate, struct xsave_buffer *xbuf)
+{
+ int *ptr = (int *)&xbuf->bytes[xstate->xbuf_offset];
+ int data, i;
+
+ /*
+ * Ensure that 'data' is never 0. This ensures that
+ * the registers are never in their initial configuration
+ * and thus never tracked as being in the init state.
+ */
+ data = rand() | 1;
+
+ for (i = 0; i < xstate->size / sizeof(int); i++, ptr++)
+ *ptr = data;
+}
+
+/* Testing kernel's context switching and ABI support for the xstate. */
+void test_xstate(uint32_t feature_num);
+
+#endif /* __SELFTESTS_X86_XSTATE_H */
diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
index 9bcf1736bf18..04ab45e27fb8 100644
--- a/tools/testing/vma/vma.c
+++ b/tools/testing/vma/vma.c
@@ -18,6 +18,12 @@ static bool fail_prealloc;
#define vma_iter_prealloc(vmi, vma) \
(fail_prealloc ? -ENOMEM : mas_preallocate(&(vmi)->mas, (vma), GFP_KERNEL))
+#define CONFIG_DEFAULT_MMAP_MIN_ADDR 65536
+
+unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
+unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
+unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
+
/*
* Directly import the VMA implementation here. Our vma_internal.h wrapper
* provides userland-equivalent functionality for everything vma.c uses.
@@ -47,6 +53,11 @@ struct task_struct *get_current(void)
return &__current;
}
+unsigned long rlimit(unsigned int limit)
+{
+ return (unsigned long)-1;
+}
+
/* Helper function to simply allocate a VMA. */
static struct vm_area_struct *alloc_vma(struct mm_struct *mm,
unsigned long start,
@@ -1563,6 +1574,57 @@ static bool test_expand_only_mode(void)
return true;
}
+static bool test_mmap_region_basic(void)
+{
+ struct mm_struct mm = {};
+ unsigned long addr;
+ struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, &mm, 0);
+
+ current->mm = &mm;
+
+ /* Map at 0x300000, length 0x3000. */
+ addr = __mmap_region(NULL, 0x300000, 0x3000,
+ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
+ 0x300, NULL);
+ ASSERT_EQ(addr, 0x300000);
+
+ /* Map at 0x250000, length 0x3000. */
+ addr = __mmap_region(NULL, 0x250000, 0x3000,
+ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
+ 0x250, NULL);
+ ASSERT_EQ(addr, 0x250000);
+
+ /* Map at 0x303000, merging to 0x300000 of length 0x6000. */
+ addr = __mmap_region(NULL, 0x303000, 0x3000,
+ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
+ 0x303, NULL);
+ ASSERT_EQ(addr, 0x303000);
+
+ /* Map at 0x24d000, merging to 0x250000 of length 0x6000. */
+ addr = __mmap_region(NULL, 0x24d000, 0x3000,
+ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
+ 0x24d, NULL);
+ ASSERT_EQ(addr, 0x24d000);
+
+ ASSERT_EQ(mm.map_count, 2);
+
+ for_each_vma(vmi, vma) {
+ if (vma->vm_start == 0x300000) {
+ ASSERT_EQ(vma->vm_end, 0x306000);
+ ASSERT_EQ(vma->vm_pgoff, 0x300);
+ } else if (vma->vm_start == 0x24d000) {
+ ASSERT_EQ(vma->vm_end, 0x253000);
+ ASSERT_EQ(vma->vm_pgoff, 0x24d);
+ } else {
+ ASSERT_FALSE(true);
+ }
+ }
+
+ cleanup_mm(&mm, &vmi);
+ return true;
+}
+
int main(void)
{
int num_tests = 0, num_fail = 0;
@@ -1596,6 +1658,8 @@ int main(void)
TEST(copy_vma);
TEST(expand_only_mode);
+ TEST(mmap_region_basic);
+
#undef TEST
printf("%d tests run, %d passed, %d failed.\n",
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 1d9fc97b8e80..1eae23039854 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -27,11 +27,23 @@
#include <linux/rbtree.h>
#include <linux/rwsem.h>
+extern unsigned long stack_guard_gap;
+#ifdef CONFIG_MMU
+extern unsigned long mmap_min_addr;
+extern unsigned long dac_mmap_min_addr;
+#else
+#define mmap_min_addr 0UL
+#define dac_mmap_min_addr 0UL
+#endif
+
#define VM_WARN_ON(_expr) (WARN_ON(_expr))
#define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr))
+#define VM_WARN_ON_VMG(_expr, _vmg) (WARN_ON(_expr))
#define VM_BUG_ON(_expr) (BUG_ON(_expr))
#define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr))
+#define MMF_HAS_MDWE 28
+
#define VM_NONE 0x00000000
#define VM_READ 0x00000001
#define VM_WRITE 0x00000002
@@ -39,6 +51,7 @@
#define VM_SHARED 0x00000008
#define VM_MAYREAD 0x00000010
#define VM_MAYWRITE 0x00000020
+#define VM_MAYEXEC 0x00000040
#define VM_GROWSDOWN 0x00000100
#define VM_PFNMAP 0x00000400
#define VM_LOCKED 0x00002000
@@ -51,6 +64,8 @@
#define VM_STACK VM_GROWSDOWN
#define VM_SHADOW_STACK VM_NONE
#define VM_SOFTDIRTY 0
+#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
+#define VM_GROWSUP VM_NONE
#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
@@ -58,6 +73,20 @@
/* This mask represents all the VMA flag bits used by mlock */
#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT)
+#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
+
+#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC
+
+#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
+
+#define RLIMIT_STACK 3 /* max stack size */
+#define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */
+
+#define CAP_IPC_LOCK 14
+
#ifdef CONFIG_64BIT
/* VM is sealed, in vm_flags */
#define VM_SEALED _BITUL(63)
@@ -122,10 +151,22 @@ enum {
TASK_COMM_LEN = 16,
};
+/*
+ * Flags for bug emulation.
+ *
+ * These occupy the top three bytes.
+ */
+enum {
+ READ_IMPLIES_EXEC = 0x0400000,
+};
+
struct task_struct {
char comm[TASK_COMM_LEN];
pid_t pid;
struct mm_struct *mm;
+
+ /* Used for emulating ABI behavior of previous Linux versions: */
+ unsigned int personality;
};
struct task_struct *get_current(void);
@@ -186,6 +227,10 @@ struct mm_struct {
unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
unsigned long stack_vm; /* VM_STACK */
+
+ unsigned long def_flags;
+
+ unsigned long flags; /* Must use atomic bitops to access */
};
struct vma_lock {
@@ -373,6 +418,17 @@ struct vm_operations_struct {
unsigned long addr);
};
+struct vm_unmapped_area_info {
+#define VM_UNMAPPED_AREA_TOPDOWN 1
+ unsigned long flags;
+ unsigned long length;
+ unsigned long low_limit;
+ unsigned long high_limit;
+ unsigned long align_mask;
+ unsigned long align_offset;
+ unsigned long start_gap;
+};
+
static inline void vma_iter_invalidate(struct vma_iterator *vmi)
{
mas_pause(&vmi->mas);
@@ -432,6 +488,8 @@ static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached)
extern const struct vm_operations_struct vma_dummy_vm_ops;
+extern unsigned long rlimit(unsigned int limit);
+
static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
{
memset(vma, 0, sizeof(*vma));
@@ -853,6 +911,11 @@ static inline void mmap_write_unlock(struct mm_struct *)
{
}
+static inline int mmap_write_lock_killable(struct mm_struct *)
+{
+ return 0;
+}
+
static inline bool can_modify_mm(struct mm_struct *mm,
unsigned long start,
unsigned long end)
@@ -938,7 +1001,7 @@ static inline bool is_file_hugepages(struct file *)
static inline int security_vm_enough_memory_mm(struct mm_struct *, long)
{
- return true;
+ return 0;
}
static inline bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long)
@@ -1033,4 +1096,159 @@ static inline int mmap_file(struct file *, struct vm_area_struct *)
return 0;
}
+static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma)
+{
+ if (vma->vm_flags & VM_GROWSDOWN)
+ return stack_guard_gap;
+
+ /* See reasoning around the VM_SHADOW_STACK definition */
+ if (vma->vm_flags & VM_SHADOW_STACK)
+ return PAGE_SIZE;
+
+ return 0;
+}
+
+static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
+{
+ unsigned long gap = stack_guard_start_gap(vma);
+ unsigned long vm_start = vma->vm_start;
+
+ vm_start -= gap;
+ if (vm_start > vma->vm_start)
+ vm_start = 0;
+ return vm_start;
+}
+
+static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
+{
+ unsigned long vm_end = vma->vm_end;
+
+ if (vma->vm_flags & VM_GROWSUP) {
+ vm_end += stack_guard_gap;
+ if (vm_end < vma->vm_end)
+ vm_end = -PAGE_SIZE;
+ }
+ return vm_end;
+}
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+ unsigned long addr, unsigned long len)
+{
+ return 0;
+}
+
+static inline bool vma_is_accessible(struct vm_area_struct *vma)
+{
+ return vma->vm_flags & VM_ACCESS_FLAGS;
+}
+
+static inline bool capable(int cap)
+{
+ return true;
+}
+
+static inline bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
+ unsigned long bytes)
+{
+ unsigned long locked_pages, limit_pages;
+
+ if (!(flags & VM_LOCKED) || capable(CAP_IPC_LOCK))
+ return true;
+
+ locked_pages = bytes >> PAGE_SHIFT;
+ locked_pages += mm->locked_vm;
+
+ limit_pages = rlimit(RLIMIT_MEMLOCK);
+ limit_pages >>= PAGE_SHIFT;
+
+ return locked_pages <= limit_pages;
+}
+
+static inline int __anon_vma_prepare(struct vm_area_struct *vma)
+{
+ struct anon_vma *anon_vma = calloc(1, sizeof(struct anon_vma));
+
+ if (!anon_vma)
+ return -ENOMEM;
+
+ anon_vma->root = anon_vma;
+ vma->anon_vma = anon_vma;
+
+ return 0;
+}
+
+static inline int anon_vma_prepare(struct vm_area_struct *vma)
+{
+ if (likely(vma->anon_vma))
+ return 0;
+
+ return __anon_vma_prepare(vma);
+}
+
+static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
+ struct list_head *uf)
+{
+}
+
+/*
+ * Denies creating a writable executable mapping or gaining executable permissions.
+ *
+ * This denies the following:
+ *
+ * a) mmap(PROT_WRITE | PROT_EXEC)
+ *
+ * b) mmap(PROT_WRITE)
+ * mprotect(PROT_EXEC)
+ *
+ * c) mmap(PROT_WRITE)
+ * mprotect(PROT_READ)
+ * mprotect(PROT_EXEC)
+ *
+ * But allows the following:
+ *
+ * d) mmap(PROT_READ | PROT_EXEC)
+ * mmap(PROT_READ | PROT_EXEC | PROT_BTI)
+ *
+ * This is only applicable if the user has set the Memory-Deny-Write-Execute
+ * (MDWE) protection mask for the current process.
+ *
+ * @old specifies the VMA flags the VMA originally possessed, and @new the ones
+ * we propose to set.
+ *
+ * Return: false if proposed change is OK, true if not ok and should be denied.
+ */
+static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
+{
+ /* If MDWE is disabled, we have nothing to deny. */
+ if (!test_bit(MMF_HAS_MDWE, &current->mm->flags))
+ return false;
+
+ /* If the new VMA is not executable, we have nothing to deny. */
+ if (!(new & VM_EXEC))
+ return false;
+
+ /* Under MDWE we do not accept newly writably executable VMAs... */
+ if (new & VM_WRITE)
+ return true;
+
+ /* ...nor previously non-executable VMAs becoming executable. */
+ if (!(old & VM_EXEC))
+ return true;
+
+ return false;
+}
+
+static inline int mapping_map_writable(struct address_space *mapping)
+{
+ int c = atomic_read(&mapping->i_mmap_writable);
+
+ /* Derived from the raw_atomic_inc_unless_negative() implementation. */
+ do {
+ if (c < 0)
+ return -EPERM;
+ } while (!__sync_bool_compare_and_swap(&mapping->i_mmap_writable, c, c+1));
+
+ return 0;
+}
+
#endif /* __MM_VMA_INTERNAL_H */
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 7058dc614c25..de25892f865f 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -96,41 +96,57 @@ void vsock_wait_remote_close(int fd)
close(epollfd);
}
-/* Bind to <bind_port>, connect to <cid, port> and return the file descriptor. */
-int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type)
+/* Create socket <type>, bind to <cid, port> and return the file descriptor. */
+int vsock_bind(unsigned int cid, unsigned int port, int type)
{
- struct sockaddr_vm sa_client = {
- .svm_family = AF_VSOCK,
- .svm_cid = VMADDR_CID_ANY,
- .svm_port = bind_port,
- };
- struct sockaddr_vm sa_server = {
+ struct sockaddr_vm sa = {
.svm_family = AF_VSOCK,
.svm_cid = cid,
.svm_port = port,
};
+ int fd;
- int client_fd, ret;
-
- client_fd = socket(AF_VSOCK, type, 0);
- if (client_fd < 0) {
+ fd = socket(AF_VSOCK, type, 0);
+ if (fd < 0) {
perror("socket");
exit(EXIT_FAILURE);
}
- if (bind(client_fd, (struct sockaddr *)&sa_client, sizeof(sa_client))) {
+ if (bind(fd, (struct sockaddr *)&sa, sizeof(sa))) {
perror("bind");
exit(EXIT_FAILURE);
}
+ return fd;
+}
+
+int vsock_connect_fd(int fd, unsigned int cid, unsigned int port)
+{
+ struct sockaddr_vm sa = {
+ .svm_family = AF_VSOCK,
+ .svm_cid = cid,
+ .svm_port = port,
+ };
+ int ret;
+
timeout_begin(TIMEOUT);
do {
- ret = connect(client_fd, (struct sockaddr *)&sa_server, sizeof(sa_server));
+ ret = connect(fd, (struct sockaddr *)&sa, sizeof(sa));
timeout_check("connect");
} while (ret < 0 && errno == EINTR);
timeout_end();
- if (ret < 0) {
+ return ret;
+}
+
+/* Bind to <bind_port>, connect to <cid, port> and return the file descriptor. */
+int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type)
+{
+ int client_fd;
+
+ client_fd = vsock_bind(VMADDR_CID_ANY, bind_port, type);
+
+ if (vsock_connect_fd(client_fd, cid, port)) {
perror("connect");
exit(EXIT_FAILURE);
}
@@ -141,17 +157,6 @@ int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_po
/* Connect to <cid, port> and return the file descriptor. */
int vsock_connect(unsigned int cid, unsigned int port, int type)
{
- union {
- struct sockaddr sa;
- struct sockaddr_vm svm;
- } addr = {
- .svm = {
- .svm_family = AF_VSOCK,
- .svm_port = port,
- .svm_cid = cid,
- },
- };
- int ret;
int fd;
control_expectln("LISTENING");
@@ -162,20 +167,14 @@ int vsock_connect(unsigned int cid, unsigned int port, int type)
exit(EXIT_FAILURE);
}
- timeout_begin(TIMEOUT);
- do {
- ret = connect(fd, &addr.sa, sizeof(addr.svm));
- timeout_check("connect");
- } while (ret < 0 && errno == EINTR);
- timeout_end();
-
- if (ret < 0) {
+ if (vsock_connect_fd(fd, cid, port)) {
int old_errno = errno;
close(fd);
fd = -1;
errno = old_errno;
}
+
return fd;
}
@@ -192,28 +191,9 @@ int vsock_seqpacket_connect(unsigned int cid, unsigned int port)
/* Listen on <cid, port> and return the file descriptor. */
static int vsock_listen(unsigned int cid, unsigned int port, int type)
{
- union {
- struct sockaddr sa;
- struct sockaddr_vm svm;
- } addr = {
- .svm = {
- .svm_family = AF_VSOCK,
- .svm_port = port,
- .svm_cid = cid,
- },
- };
int fd;
- fd = socket(AF_VSOCK, type, 0);
- if (fd < 0) {
- perror("socket");
- exit(EXIT_FAILURE);
- }
-
- if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
- perror("bind");
- exit(EXIT_FAILURE);
- }
+ fd = vsock_bind(cid, port, type);
if (listen(fd, 1) < 0) {
perror("listen");
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index e62f46b2b92a..d1f765ce3eee 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -39,10 +39,12 @@ struct test_case {
void init_signals(void);
unsigned int parse_cid(const char *str);
unsigned int parse_port(const char *str);
+int vsock_connect_fd(int fd, unsigned int cid, unsigned int port);
int vsock_connect(unsigned int cid, unsigned int port, int type);
int vsock_accept(unsigned int cid, unsigned int port,
struct sockaddr_vm *clientaddrp, int type);
int vsock_stream_connect(unsigned int cid, unsigned int port);
+int vsock_bind(unsigned int cid, unsigned int port, int type);
int vsock_bind_connect(unsigned int cid, unsigned int port,
unsigned int bind_port, int type);
int vsock_seqpacket_connect(unsigned int cid, unsigned int port);
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 1eebbc0d5f61..d0f6d253ac72 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -113,24 +113,9 @@ static void test_stream_bind_only_client(const struct test_opts *opts)
static void test_stream_bind_only_server(const struct test_opts *opts)
{
- union {
- struct sockaddr sa;
- struct sockaddr_vm svm;
- } addr = {
- .svm = {
- .svm_family = AF_VSOCK,
- .svm_port = opts->peer_port,
- .svm_cid = VMADDR_CID_ANY,
- },
- };
int fd;
- fd = socket(AF_VSOCK, SOCK_STREAM, 0);
-
- if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
- perror("bind");
- exit(EXIT_FAILURE);
- }
+ fd = vsock_bind(VMADDR_CID_ANY, opts->peer_port, SOCK_STREAM);
/* Notify the client that the server is ready */
control_writeln("BIND");
@@ -1708,6 +1693,137 @@ static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts)
close(fd);
}
+#define MAX_PORT_RETRIES 24 /* net/vmw_vsock/af_vsock.c */
+
+/* Test attempts to trigger a transport release for an unbound socket. This can
+ * lead to a reference count mishandling.
+ */
+static void test_stream_transport_uaf_client(const struct test_opts *opts)
+{
+ int sockets[MAX_PORT_RETRIES];
+ struct sockaddr_vm addr;
+ int fd, i, alen;
+
+ fd = vsock_bind(VMADDR_CID_ANY, VMADDR_PORT_ANY, SOCK_STREAM);
+
+ alen = sizeof(addr);
+ if (getsockname(fd, (struct sockaddr *)&addr, &alen)) {
+ perror("getsockname");
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 0; i < MAX_PORT_RETRIES; ++i)
+ sockets[i] = vsock_bind(VMADDR_CID_ANY, ++addr.svm_port,
+ SOCK_STREAM);
+
+ close(fd);
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("socket");
+ exit(EXIT_FAILURE);
+ }
+
+ if (!vsock_connect_fd(fd, addr.svm_cid, addr.svm_port)) {
+ perror("Unexpected connect() #1 success");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Vulnerable system may crash now. */
+ if (!vsock_connect_fd(fd, VMADDR_CID_HOST, VMADDR_PORT_ANY)) {
+ perror("Unexpected connect() #2 success");
+ exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+ while (i--)
+ close(sockets[i]);
+
+ control_writeln("DONE");
+}
+
+static void test_stream_transport_uaf_server(const struct test_opts *opts)
+{
+ control_expectln("DONE");
+}
+
+static void test_stream_connect_retry_client(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("socket");
+ exit(EXIT_FAILURE);
+ }
+
+ if (!vsock_connect_fd(fd, opts->peer_cid, opts->peer_port)) {
+ fprintf(stderr, "Unexpected connect() #1 success\n");
+ exit(EXIT_FAILURE);
+ }
+
+ control_writeln("LISTEN");
+ control_expectln("LISTENING");
+
+ if (vsock_connect_fd(fd, opts->peer_cid, opts->peer_port)) {
+ perror("connect() #2");
+ exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+}
+
+static void test_stream_connect_retry_server(const struct test_opts *opts)
+{
+ int fd;
+
+ control_expectln("LISTEN");
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ vsock_wait_remote_close(fd);
+ close(fd);
+}
+
+static void test_stream_linger_client(const struct test_opts *opts)
+{
+ struct linger optval = {
+ .l_onoff = 1,
+ .l_linger = 1
+ };
+ int fd;
+
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) {
+ perror("setsockopt(SO_LINGER)");
+ exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+}
+
+static void test_stream_linger_server(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ vsock_wait_remote_close(fd);
+ close(fd);
+}
+
static struct test_case test_cases[] = {
{
.name = "SOCK_STREAM connection reset",
@@ -1853,6 +1969,21 @@ static struct test_case test_cases[] = {
.run_client = test_stream_msgzcopy_leak_zcskb_client,
.run_server = test_stream_msgzcopy_leak_zcskb_server,
},
+ {
+ .name = "SOCK_STREAM transport release use-after-free",
+ .run_client = test_stream_transport_uaf_client,
+ .run_server = test_stream_transport_uaf_server,
+ },
+ {
+ .name = "SOCK_STREAM retry failed connect()",
+ .run_client = test_stream_connect_retry_client,
+ .run_server = test_stream_connect_retry_server,
+ },
+ {
+ .name = "SOCK_STREAM SO_LINGER null-ptr-deref",
+ .run_client = test_stream_linger_client,
+ .run_server = test_stream_linger_server,
+ },
{},
};
diff --git a/tools/thermal/lib/Makefile b/tools/thermal/lib/Makefile
index f2552f73a64c..056d212f25cf 100644
--- a/tools/thermal/lib/Makefile
+++ b/tools/thermal/lib/Makefile
@@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative)
libdir_SQ = $(subst ','\'',$(libdir))
libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
-ifeq ("$(origin V)", "command line")
- VERBOSE = $(V)
-endif
-ifndef VERBOSE
- VERBOSE = 0
-endif
-
-ifeq ($(VERBOSE),1)
- Q =
-else
- Q = @
-endif
-
# Set compile option CFLAGS
ifdef EXTRA_CFLAGS
CFLAGS := $(EXTRA_CFLAGS)
diff --git a/tools/tracing/latency/Makefile b/tools/tracing/latency/Makefile
index 6518b03e05c7..257a56b1899f 100644
--- a/tools/tracing/latency/Makefile
+++ b/tools/tracing/latency/Makefile
@@ -37,12 +37,6 @@ FEATURE_TESTS += libtracefs
FEATURE_DISPLAY := libtraceevent
FEATURE_DISPLAY += libtracefs
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
all: $(LATENCY-COLLECTOR)
include $(srctree)/tools/build/Makefile.include
diff --git a/tools/tracing/rtla/.gitignore b/tools/tracing/rtla/.gitignore
index 293f0dbb0ca2..1a394ad26cc1 100644
--- a/tools/tracing/rtla/.gitignore
+++ b/tools/tracing/rtla/.gitignore
@@ -4,3 +4,4 @@ rtla-static
fixdep
feature
FEATURE-DUMP
+*.skel.h
diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile
index a6a7dee16622..746ccf2f5808 100644
--- a/tools/tracing/rtla/Makefile
+++ b/tools/tracing/rtla/Makefile
@@ -33,15 +33,15 @@ DOCSRC := ../../../Documentation/tools/rtla/
FEATURE_TESTS := libtraceevent
FEATURE_TESTS += libtracefs
FEATURE_TESTS += libcpupower
+FEATURE_TESTS += libbpf
+FEATURE_TESTS += clang-bpf-co-re
+FEATURE_TESTS += bpftool-skeletons
FEATURE_DISPLAY := libtraceevent
FEATURE_DISPLAY += libtracefs
FEATURE_DISPLAY += libcpupower
-
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
+FEATURE_DISPLAY += libbpf
+FEATURE_DISPLAY += clang-bpf-co-re
+FEATURE_DISPLAY += bpftool-skeletons
all: $(RTLA)
@@ -67,6 +67,17 @@ CFLAGS += $(INCLUDES) $(LIB_INCLUDES)
export CFLAGS OUTPUT srctree
+ifeq ($(BUILD_BPF_SKEL),1)
+src/timerlat.bpf.o: src/timerlat.bpf.c
+ $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -c $(filter %.c,$^) -o $@
+
+src/timerlat.skel.h: src/timerlat.bpf.o
+ $(QUIET_GENSKEL)$(SYSTEM_BPFTOOL) gen skeleton $< > $@
+else
+src/timerlat.skel.h:
+ $(Q)echo '/* BPF skeleton is disabled */' > src/timerlat.skel.h
+endif
+
$(RTLA): $(RTLA_IN)
$(QUIET_LINK)$(CC) $(LDFLAGS) -o $(RTLA) $(RTLA_IN) $(EXTLIBS)
@@ -77,7 +88,7 @@ static: $(RTLA_IN)
rtla.%: fixdep FORCE
make -f $(srctree)/tools/build/Makefile.build dir=. $@
-$(RTLA_IN): fixdep FORCE
+$(RTLA_IN): fixdep FORCE src/timerlat.skel.h
make $(build)=rtla
clean: doc_clean fixdep-clean
@@ -85,4 +96,7 @@ clean: doc_clean fixdep-clean
$(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)rm -f rtla rtla-static fixdep FEATURE-DUMP rtla-*
$(Q)rm -rf feature
-.PHONY: FORCE clean
+ $(Q)rm -f src/timerlat.bpf.o src/timerlat.skel.h
+check: $(RTLA)
+ RTLA=$(RTLA) prove -o -f tests/
+.PHONY: FORCE clean check
diff --git a/tools/tracing/rtla/Makefile.config b/tools/tracing/rtla/Makefile.config
index 92a6e12e42d3..5f2231d8d626 100644
--- a/tools/tracing/rtla/Makefile.config
+++ b/tools/tracing/rtla/Makefile.config
@@ -53,6 +53,48 @@ else
$(info Please install libcpupower-dev/kernel-tools-libs-devel)
endif
+ifndef BUILD_BPF_SKEL
+ # BPF skeletons are used to implement improved sample collection, enable
+ # them by default.
+ BUILD_BPF_SKEL := 1
+endif
+
+ifeq ($(BUILD_BPF_SKEL),0)
+ $(info BPF skeleton support disabled, building without BPF skeleton support.)
+endif
+
+$(call feature_check,libbpf)
+ifeq ($(feature-libbpf), 1)
+ $(call detected,CONFIG_LIBBPF)
+else
+ $(info libbpf is missing, building without BPF skeleton support.)
+ $(info Please install libbpf-dev/libbpf-devel)
+ BUILD_BPF_SKEL := 0
+endif
+
+$(call feature_check,clang-bpf-co-re)
+ifeq ($(feature-clang-bpf-co-re), 1)
+ $(call detected,CONFIG_CLANG_BPF_CO_RE)
+else
+ $(info clang is missing or does not support BPF CO-RE, building without BPF skeleton support.)
+ $(info Please install clang)
+ BUILD_BPF_SKEL := 0
+endif
+
+$(call feature_check,bpftool-skeletons)
+ifeq ($(feature-bpftool-skeletons), 1)
+ $(call detected,CONFIG_BPFTOOL_SKELETONS)
+else
+ $(info bpftool is missing or not supporting skeletons, building without BPF skeleton support.)
+ $(info Please install bpftool)
+ BUILD_BPF_SKEL := 0
+endif
+
+ifeq ($(BUILD_BPF_SKEL),1)
+ CFLAGS += -DHAVE_BPF_SKEL
+ EXTLIBS += -lbpf
+endif
+
ifeq ($(STOP_ERROR),1)
$(error Please, check the errors above.)
endif
diff --git a/tools/tracing/rtla/Makefile.rtla b/tools/tracing/rtla/Makefile.rtla
index cc1d6b615475..08c1b40883d3 100644
--- a/tools/tracing/rtla/Makefile.rtla
+++ b/tools/tracing/rtla/Makefile.rtla
@@ -34,6 +34,8 @@ INSTALL := install
MKDIR := mkdir
STRIP := strip
BINDIR := /usr/bin
+CTAGS := ctags
+ETAGS := ctags -e
.PHONY: install
install: doc_install
@@ -47,6 +49,18 @@ install: doc_install
@test ! -f $(DESTDIR)$(BINDIR)/timerlat || $(RM) $(DESTDIR)$(BINDIR)/timerlat
@$(LN) -s rtla $(DESTDIR)$(BINDIR)/timerlat
+.PHONY: tags
+tags:
+ $(CTAGS) -R --extras=+f --c-kinds=+p src
+
+.PHONY: TAGS
+TAGS:
+ $(ETAGS) -R --extras=+f --c-kinds=+p src
+
+.PHONY: tags_clean
+tags_clean:
+ $(RM) tags TAGS
+
.PHONY: doc doc_clean doc_install
doc:
$(MAKE) -C $(DOCSRC)
@@ -57,8 +71,7 @@ doc_clean:
doc_install:
$(MAKE) -C $(DOCSRC) install
-# This section is neesary for the tarball, when the tarball
-# support is removed, we can delete these entries.
+# This section is necessary to make the rtla tarball
NAME := rtla
DIRS := src
FILES := Makefile README.txt
diff --git a/tools/tracing/rtla/src/Build b/tools/tracing/rtla/src/Build
index dbed9e31829b..7bb7e39e391a 100644
--- a/tools/tracing/rtla/src/Build
+++ b/tools/tracing/rtla/src/Build
@@ -8,4 +8,5 @@ rtla-y += timerlat_top.o
rtla-y += timerlat_hist.o
rtla-y += timerlat_u.o
rtla-y += timerlat_aa.o
+rtla-y += timerlat_bpf.o
rtla-y += rtla.o
diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c
index 245e9344932b..2dc3e4539e99 100644
--- a/tools/tracing/rtla/src/osnoise.c
+++ b/tools/tracing/rtla/src/osnoise.c
@@ -3,6 +3,7 @@
* Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
*/
+#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/stat.h>
#include <pthread.h>
@@ -12,9 +13,12 @@
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
+#include <sched.h>
#include "osnoise.h"
-#include "utils.h"
+
+#define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */
+#define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */
/*
* osnoise_get_cpus - return the original "osnoise/cpus" content
@@ -867,7 +871,7 @@ int osnoise_set_workload(struct osnoise_context *context, bool onoff)
retval = osnoise_options_set_option("OSNOISE_WORKLOAD", onoff);
if (retval < 0)
- return -1;
+ return -2;
context->opt_workload = onoff;
@@ -1079,6 +1083,122 @@ out_err:
return NULL;
}
+bool osnoise_trace_is_off(struct osnoise_tool *tool, struct osnoise_tool *record)
+{
+ /*
+ * The tool instance is always present, it is the one used to collect
+ * data.
+ */
+ if (!tracefs_trace_is_on(tool->trace.inst))
+ return true;
+
+ /*
+ * The trace record instance is only enabled when -t is set. IOW, when the system
+ * is tracing.
+ */
+ return record && !tracefs_trace_is_on(record->trace.inst);
+}
+
+/*
+ * osnoise_report_missed_events - report number of events dropped by trace
+ * buffer
+ */
+void
+osnoise_report_missed_events(struct osnoise_tool *tool)
+{
+ unsigned long long total_events;
+
+ if (tool->trace.missed_events == UINT64_MAX)
+ printf("unknown number of events missed, results might not be accurate\n");
+ else if (tool->trace.missed_events > 0) {
+ total_events = tool->trace.processed_events + tool->trace.missed_events;
+
+ printf("%lld (%.2f%%) events missed, results might not be accurate\n",
+ tool->trace.missed_events,
+ (double) tool->trace.missed_events / total_events * 100.0);
+ }
+}
+
+/*
+ * osnoise_apply_config - apply common configs to the initialized tool
+ */
+int
+osnoise_apply_config(struct osnoise_tool *tool, struct osnoise_params *params)
+{
+ int retval;
+
+ if (!params->sleep_time)
+ params->sleep_time = 1;
+
+ retval = osnoise_set_cpus(tool->context, params->cpus ? params->cpus : "all");
+ if (retval) {
+ err_msg("Failed to apply CPUs config\n");
+ goto out_err;
+ }
+
+ if (params->runtime || params->period) {
+ retval = osnoise_set_runtime_period(tool->context,
+ params->runtime,
+ params->period);
+ } else {
+ retval = osnoise_set_runtime_period(tool->context,
+ DEFAULT_SAMPLE_PERIOD,
+ DEFAULT_SAMPLE_RUNTIME);
+ }
+
+ if (retval) {
+ err_msg("Failed to set runtime and/or period\n");
+ goto out_err;
+ }
+
+ retval = osnoise_set_stop_us(tool->context, params->stop_us);
+ if (retval) {
+ err_msg("Failed to set stop us\n");
+ goto out_err;
+ }
+
+ retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us);
+ if (retval) {
+ err_msg("Failed to set stop total us\n");
+ goto out_err;
+ }
+
+ retval = osnoise_set_tracing_thresh(tool->context, params->threshold);
+ if (retval) {
+ err_msg("Failed to set tracing_thresh\n");
+ goto out_err;
+ }
+
+ if (params->hk_cpus) {
+ retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set),
+ &params->hk_cpu_set);
+ if (retval == -1) {
+ err_msg("Failed to set rtla to the house keeping CPUs\n");
+ goto out_err;
+ }
+ } else if (params->cpus) {
+ /*
+ * Even if the user do not set a house-keeping CPU, try to
+ * move rtla to a CPU set different to the one where the user
+ * set the workload to run.
+ *
+ * No need to check results as this is an automatic attempt.
+ */
+ auto_house_keeping(&params->monitored_cpus);
+ }
+
+ retval = osnoise_set_workload(tool->context, true);
+ if (retval < -1) {
+ err_msg("Failed to set OSNOISE_WORKLOAD option\n");
+ goto out_err;
+ }
+
+ return 0;
+
+out_err:
+ return -1;
+}
+
static void osnoise_usage(int err)
{
int i;
diff --git a/tools/tracing/rtla/src/osnoise.h b/tools/tracing/rtla/src/osnoise.h
index 555f4f4903cc..ac1c99910744 100644
--- a/tools/tracing/rtla/src/osnoise.h
+++ b/tools/tracing/rtla/src/osnoise.h
@@ -1,6 +1,55 @@
// SPDX-License-Identifier: GPL-2.0
+#pragma once
+
+#include "utils.h"
#include "trace.h"
+enum osnoise_mode {
+ MODE_OSNOISE = 0,
+ MODE_HWNOISE
+};
+
+struct osnoise_params {
+ /* Common params */
+ char *cpus;
+ cpu_set_t monitored_cpus;
+ char *trace_output;
+ char *cgroup_name;
+ unsigned long long runtime;
+ unsigned long long period;
+ long long threshold;
+ long long stop_us;
+ long long stop_total_us;
+ int sleep_time;
+ int duration;
+ int set_sched;
+ int cgroup;
+ int hk_cpus;
+ cpu_set_t hk_cpu_set;
+ struct sched_attr sched_param;
+ struct trace_events *events;
+ int warmup;
+ int buffer_size;
+ union {
+ struct {
+ /* top only */
+ int quiet;
+ int pretty_output;
+ enum osnoise_mode mode;
+ };
+ struct {
+ /* hist only */
+ int output_divisor;
+ char no_header;
+ char no_summary;
+ char no_index;
+ char with_zeros;
+ int bucket_size;
+ int entries;
+ };
+ };
+};
+
/*
* osnoise_context - read, store, write, restore osnoise configs.
*/
@@ -104,6 +153,9 @@ struct osnoise_tool {
void osnoise_destroy_tool(struct osnoise_tool *top);
struct osnoise_tool *osnoise_init_tool(char *tool_name);
struct osnoise_tool *osnoise_init_trace_tool(char *tracer);
+void osnoise_report_missed_events(struct osnoise_tool *tool);
+bool osnoise_trace_is_off(struct osnoise_tool *tool, struct osnoise_tool *record);
+int osnoise_apply_config(struct osnoise_tool *tool, struct osnoise_params *params);
int osnoise_hist_main(int argc, char *argv[]);
int osnoise_top_main(int argc, char **argv);
diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c
index 214e2c93fde0..d9d15c8f27c7 100644
--- a/tools/tracing/rtla/src/osnoise_hist.c
+++ b/tools/tracing/rtla/src/osnoise_hist.c
@@ -12,40 +12,9 @@
#include <errno.h>
#include <stdio.h>
#include <time.h>
-#include <sched.h>
-#include "utils.h"
#include "osnoise.h"
-struct osnoise_hist_params {
- char *cpus;
- cpu_set_t monitored_cpus;
- char *trace_output;
- char *cgroup_name;
- unsigned long long runtime;
- unsigned long long period;
- long long threshold;
- long long stop_us;
- long long stop_total_us;
- int sleep_time;
- int duration;
- int set_sched;
- int output_divisor;
- int cgroup;
- int hk_cpus;
- cpu_set_t hk_cpu_set;
- struct sched_attr sched_param;
- struct trace_events *events;
- char no_header;
- char no_summary;
- char no_index;
- char with_zeros;
- int bucket_size;
- int entries;
- int warmup;
- int buffer_size;
-};
-
struct osnoise_hist_cpu {
int *samples;
int count;
@@ -126,7 +95,7 @@ cleanup:
static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu,
unsigned long long duration, int count)
{
- struct osnoise_hist_params *params = tool->params;
+ struct osnoise_params *params = tool->params;
struct osnoise_hist_data *data = tool->data;
unsigned long long total_duration;
int entries = data->entries;
@@ -168,7 +137,7 @@ static void osnoise_destroy_trace_hist(struct osnoise_tool *tool)
*/
static int osnoise_init_trace_hist(struct osnoise_tool *tool)
{
- struct osnoise_hist_params *params = tool->params;
+ struct osnoise_params *params = tool->params;
struct osnoise_hist_data *data = tool->data;
int bucket_size;
char buff[128];
@@ -253,7 +222,7 @@ static void osnoise_read_trace_hist(struct osnoise_tool *tool)
*/
static void osnoise_hist_header(struct osnoise_tool *tool)
{
- struct osnoise_hist_params *params = tool->params;
+ struct osnoise_params *params = tool->params;
struct osnoise_hist_data *data = tool->data;
struct trace_seq *s = tool->trace.seq;
char duration[26];
@@ -292,7 +261,7 @@ static void osnoise_hist_header(struct osnoise_tool *tool)
* osnoise_print_summary - print the summary of the hist data to the output
*/
static void
-osnoise_print_summary(struct osnoise_hist_params *params,
+osnoise_print_summary(struct osnoise_params *params,
struct trace_instance *trace,
struct osnoise_hist_data *data)
{
@@ -370,7 +339,7 @@ osnoise_print_summary(struct osnoise_hist_params *params,
* osnoise_print_stats - print data for all CPUs
*/
static void
-osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *tool)
+osnoise_print_stats(struct osnoise_params *params, struct osnoise_tool *tool)
{
struct osnoise_hist_data *data = tool->data;
struct trace_instance *trace = &tool->trace;
@@ -440,6 +409,7 @@ osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *too
trace_seq_reset(trace->seq);
osnoise_print_summary(params, trace, data);
+ osnoise_report_missed_events(tool);
}
/*
@@ -507,10 +477,10 @@ static void osnoise_hist_usage(char *usage)
/*
* osnoise_hist_parse_args - allocs, parse and fill the cmd line parameters
*/
-static struct osnoise_hist_params
+static struct osnoise_params
*osnoise_hist_parse_args(int argc, char *argv[])
{
- struct osnoise_hist_params *params;
+ struct osnoise_params *params;
struct trace_events *tevent;
int retval;
int c;
@@ -730,72 +700,13 @@ static struct osnoise_hist_params
* osnoise_hist_apply_config - apply the hist configs to the initialized tool
*/
static int
-osnoise_hist_apply_config(struct osnoise_tool *tool, struct osnoise_hist_params *params)
+osnoise_hist_apply_config(struct osnoise_tool *tool, struct osnoise_params *params)
{
int retval;
- if (!params->sleep_time)
- params->sleep_time = 1;
-
- if (params->cpus) {
- retval = osnoise_set_cpus(tool->context, params->cpus);
- if (retval) {
- err_msg("Failed to apply CPUs config\n");
- goto out_err;
- }
- }
-
- if (params->runtime || params->period) {
- retval = osnoise_set_runtime_period(tool->context,
- params->runtime,
- params->period);
- if (retval) {
- err_msg("Failed to set runtime and/or period\n");
- goto out_err;
- }
- }
-
- if (params->stop_us) {
- retval = osnoise_set_stop_us(tool->context, params->stop_us);
- if (retval) {
- err_msg("Failed to set stop us\n");
- goto out_err;
- }
- }
-
- if (params->stop_total_us) {
- retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us);
- if (retval) {
- err_msg("Failed to set stop total us\n");
- goto out_err;
- }
- }
-
- if (params->threshold) {
- retval = osnoise_set_tracing_thresh(tool->context, params->threshold);
- if (retval) {
- err_msg("Failed to set tracing_thresh\n");
- goto out_err;
- }
- }
-
- if (params->hk_cpus) {
- retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set),
- &params->hk_cpu_set);
- if (retval == -1) {
- err_msg("Failed to set rtla to the house keeping CPUs\n");
- goto out_err;
- }
- } else if (params->cpus) {
- /*
- * Even if the user do not set a house-keeping CPU, try to
- * move rtla to a CPU set different to the one where the user
- * set the workload to run.
- *
- * No need to check results as this is an automatic attempt.
- */
- auto_house_keeping(&params->monitored_cpus);
- }
+ retval = osnoise_apply_config(tool, params);
+ if (retval)
+ goto out_err;
return 0;
@@ -807,7 +718,7 @@ out_err:
* osnoise_init_hist - initialize a osnoise hist tool with parameters
*/
static struct osnoise_tool
-*osnoise_init_hist(struct osnoise_hist_params *params)
+*osnoise_init_hist(struct osnoise_params *params)
{
struct osnoise_tool *tool;
int nr_cpus;
@@ -841,7 +752,7 @@ static void stop_hist(int sig)
* osnoise_hist_set_signals - handles the signal to stop the tool
*/
static void
-osnoise_hist_set_signals(struct osnoise_hist_params *params)
+osnoise_hist_set_signals(struct osnoise_params *params)
{
signal(SIGINT, stop_hist);
if (params->duration) {
@@ -852,7 +763,7 @@ osnoise_hist_set_signals(struct osnoise_hist_params *params)
int osnoise_hist_main(int argc, char *argv[])
{
- struct osnoise_hist_params *params;
+ struct osnoise_params *params;
struct osnoise_tool *record = NULL;
struct osnoise_tool *tool = NULL;
struct trace_instance *trace;
@@ -970,7 +881,7 @@ int osnoise_hist_main(int argc, char *argv[])
goto out_hist;
}
- if (trace_is_off(&tool->trace, &record->trace))
+ if (osnoise_trace_is_off(tool, record))
break;
}
@@ -980,12 +891,10 @@ int osnoise_hist_main(int argc, char *argv[])
return_value = 0;
- if (trace_is_off(&tool->trace, &record->trace)) {
+ if (osnoise_trace_is_off(tool, record)) {
printf("rtla osnoise hit stop tracing\n");
- if (params->trace_output) {
- printf(" Saving trace to %s\n", params->trace_output);
- save_trace_to_file(record->trace.inst, params->trace_output);
- }
+ save_trace_to_file(record ? record->trace.inst : NULL,
+ params->trace_output);
}
out_hist:
diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c
index 45647495ce3b..3455ee73e2e6 100644
--- a/tools/tracing/rtla/src/osnoise_top.c
+++ b/tools/tracing/rtla/src/osnoise_top.c
@@ -11,43 +11,8 @@
#include <unistd.h>
#include <stdio.h>
#include <time.h>
-#include <sched.h>
#include "osnoise.h"
-#include "utils.h"
-
-enum osnoise_mode {
- MODE_OSNOISE = 0,
- MODE_HWNOISE
-};
-
-/*
- * osnoise top parameters
- */
-struct osnoise_top_params {
- char *cpus;
- cpu_set_t monitored_cpus;
- char *trace_output;
- char *cgroup_name;
- unsigned long long runtime;
- unsigned long long period;
- long long threshold;
- long long stop_us;
- long long stop_total_us;
- int sleep_time;
- int duration;
- int quiet;
- int set_sched;
- int cgroup;
- int hk_cpus;
- int warmup;
- int buffer_size;
- int pretty_output;
- cpu_set_t hk_cpu_set;
- struct sched_attr sched_param;
- struct trace_events *events;
- enum osnoise_mode mode;
-};
struct osnoise_top_cpu {
unsigned long long sum_runtime;
@@ -158,7 +123,7 @@ osnoise_top_handler(struct trace_seq *s, struct tep_record *record,
*/
static void osnoise_top_header(struct osnoise_tool *top)
{
- struct osnoise_top_params *params = top->params;
+ struct osnoise_params *params = top->params;
struct trace_seq *s = top->trace.seq;
char duration[26];
@@ -218,7 +183,7 @@ static void clear_terminal(struct trace_seq *seq)
*/
static void osnoise_top_print(struct osnoise_tool *tool, int cpu)
{
- struct osnoise_top_params *params = tool->params;
+ struct osnoise_params *params = tool->params;
struct trace_seq *s = tool->trace.seq;
struct osnoise_top_cpu *cpu_data;
struct osnoise_top_data *data;
@@ -258,7 +223,7 @@ static void osnoise_top_print(struct osnoise_tool *tool, int cpu)
* osnoise_print_stats - print data for all cpus
*/
static void
-osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top)
+osnoise_print_stats(struct osnoise_params *params, struct osnoise_tool *top)
{
struct trace_instance *trace = &top->trace;
static int nr_cpus = -1;
@@ -280,12 +245,13 @@ osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top)
trace_seq_do_printf(trace->seq);
trace_seq_reset(trace->seq);
+ osnoise_report_missed_events(top);
}
/*
* osnoise_top_usage - prints osnoise top usage message
*/
-static void osnoise_top_usage(struct osnoise_top_params *params, char *usage)
+static void osnoise_top_usage(struct osnoise_params *params, char *usage)
{
int i;
@@ -353,9 +319,9 @@ static void osnoise_top_usage(struct osnoise_top_params *params, char *usage)
/*
* osnoise_top_parse_args - allocs, parse and fill the cmd line parameters
*/
-struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
+struct osnoise_params *osnoise_top_parse_args(int argc, char **argv)
{
- struct osnoise_top_params *params;
+ struct osnoise_params *params;
struct trace_events *tevent;
int retval;
int c;
@@ -552,54 +518,13 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
* osnoise_top_apply_config - apply the top configs to the initialized tool
*/
static int
-osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *params)
+osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_params *params)
{
int retval;
- if (!params->sleep_time)
- params->sleep_time = 1;
-
- if (params->cpus) {
- retval = osnoise_set_cpus(tool->context, params->cpus);
- if (retval) {
- err_msg("Failed to apply CPUs config\n");
- goto out_err;
- }
- }
-
- if (params->runtime || params->period) {
- retval = osnoise_set_runtime_period(tool->context,
- params->runtime,
- params->period);
- if (retval) {
- err_msg("Failed to set runtime and/or period\n");
- goto out_err;
- }
- }
-
- if (params->stop_us) {
- retval = osnoise_set_stop_us(tool->context, params->stop_us);
- if (retval) {
- err_msg("Failed to set stop us\n");
- goto out_err;
- }
- }
-
- if (params->stop_total_us) {
- retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us);
- if (retval) {
- err_msg("Failed to set stop total us\n");
- goto out_err;
- }
- }
-
- if (params->threshold) {
- retval = osnoise_set_tracing_thresh(tool->context, params->threshold);
- if (retval) {
- err_msg("Failed to set tracing_thresh\n");
- goto out_err;
- }
- }
+ retval = osnoise_apply_config(tool, params);
+ if (retval)
+ goto out_err;
if (params->mode == MODE_HWNOISE) {
retval = osnoise_set_irq_disable(tool->context, 1);
@@ -609,24 +534,6 @@ osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *p
}
}
- if (params->hk_cpus) {
- retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set),
- &params->hk_cpu_set);
- if (retval == -1) {
- err_msg("Failed to set rtla to the house keeping CPUs\n");
- goto out_err;
- }
- } else if (params->cpus) {
- /*
- * Even if the user do not set a house-keeping CPU, try to
- * move rtla to a CPU set different to the one where the user
- * set the workload to run.
- *
- * No need to check results as this is an automatic attempt.
- */
- auto_house_keeping(&params->monitored_cpus);
- }
-
if (isatty(STDOUT_FILENO) && !params->quiet)
params->pretty_output = 1;
@@ -639,7 +546,7 @@ out_err:
/*
* osnoise_init_top - initialize a osnoise top tool with parameters
*/
-struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params)
+struct osnoise_tool *osnoise_init_top(struct osnoise_params *params)
{
struct osnoise_tool *tool;
int nr_cpus;
@@ -673,7 +580,7 @@ static void stop_top(int sig)
/*
* osnoise_top_set_signals - handles the signal to stop the tool
*/
-static void osnoise_top_set_signals(struct osnoise_top_params *params)
+static void osnoise_top_set_signals(struct osnoise_params *params)
{
signal(SIGINT, stop_top);
if (params->duration) {
@@ -684,7 +591,7 @@ static void osnoise_top_set_signals(struct osnoise_top_params *params)
int osnoise_top_main(int argc, char **argv)
{
- struct osnoise_top_params *params;
+ struct osnoise_params *params;
struct osnoise_tool *record = NULL;
struct osnoise_tool *tool = NULL;
struct trace_instance *trace;
@@ -801,7 +708,7 @@ int osnoise_top_main(int argc, char **argv)
if (!params->quiet)
osnoise_print_stats(params, tool);
- if (trace_is_off(&tool->trace, &record->trace))
+ if (osnoise_trace_is_off(tool, record))
break;
}
@@ -810,12 +717,10 @@ int osnoise_top_main(int argc, char **argv)
return_value = 0;
- if (trace_is_off(&tool->trace, &record->trace)) {
+ if (osnoise_trace_is_off(tool, record)) {
printf("osnoise hit stop tracing\n");
- if (params->trace_output) {
- printf(" Saving trace to %s\n", params->trace_output);
- save_trace_to_file(record->trace.inst, params->trace_output);
- }
+ save_trace_to_file(record ? record->trace.inst : NULL,
+ params->trace_output);
}
out_top:
diff --git a/tools/tracing/rtla/src/timerlat.bpf.c b/tools/tracing/rtla/src/timerlat.bpf.c
new file mode 100644
index 000000000000..96196d46e170
--- /dev/null
+++ b/tools/tracing/rtla/src/timerlat.bpf.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+#include "timerlat_bpf.h"
+
+#define nosubprog __always_inline
+#define MAX_ENTRIES_DEFAULT 4096
+
+char LICENSE[] SEC("license") = "GPL";
+
+struct trace_event_raw_timerlat_sample {
+ unsigned long long timer_latency;
+ int context;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, MAX_ENTRIES_DEFAULT);
+ __type(key, unsigned int);
+ __type(value, unsigned long long);
+} hist_irq SEC(".maps"), hist_thread SEC(".maps"), hist_user SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, SUMMARY_FIELD_N);
+ __type(key, unsigned int);
+ __type(value, unsigned long long);
+} summary_irq SEC(".maps"), summary_thread SEC(".maps"), summary_user SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1);
+} signal_stop_tracing SEC(".maps");
+
+/* Params to be set by rtla */
+const volatile int bucket_size = 1;
+const volatile int output_divisor = 1000;
+const volatile int entries = 256;
+const volatile int irq_threshold;
+const volatile int thread_threshold;
+const volatile bool aa_only;
+
+int stop_tracing;
+
+nosubprog unsigned long long map_get(void *map,
+ unsigned int key)
+{
+ unsigned long long *value_ptr;
+
+ value_ptr = bpf_map_lookup_elem(map, &key);
+
+ return !value_ptr ? 0 : *value_ptr;
+}
+
+nosubprog void map_set(void *map,
+ unsigned int key,
+ unsigned long long value)
+{
+ bpf_map_update_elem(map, &key, &value, BPF_ANY);
+}
+
+nosubprog void map_increment(void *map,
+ unsigned int key)
+{
+ map_set(map, key, map_get(map, key) + 1);
+}
+
+nosubprog void update_main_hist(void *map,
+ int bucket)
+{
+ if (entries == 0)
+ /* No histogram */
+ return;
+
+ if (bucket >= entries)
+ /* Overflow */
+ return;
+
+ map_increment(map, bucket);
+}
+
+nosubprog void update_summary(void *map,
+ unsigned long long latency,
+ int bucket)
+{
+ if (aa_only)
+ /* Auto-analysis only, nothing to be done here */
+ return;
+
+ map_set(map, SUMMARY_CURRENT, latency);
+
+ if (bucket >= entries)
+ /* Overflow */
+ map_increment(map, SUMMARY_OVERFLOW);
+
+ if (latency > map_get(map, SUMMARY_MAX))
+ map_set(map, SUMMARY_MAX, latency);
+
+ if (latency < map_get(map, SUMMARY_MIN) || map_get(map, SUMMARY_COUNT) == 0)
+ map_set(map, SUMMARY_MIN, latency);
+
+ map_increment(map, SUMMARY_COUNT);
+ map_set(map, SUMMARY_SUM, map_get(map, SUMMARY_SUM) + latency);
+}
+
+nosubprog void set_stop_tracing(void)
+{
+ int value = 0;
+
+ /* Suppress further sample processing */
+ stop_tracing = 1;
+
+ /* Signal to userspace */
+ bpf_ringbuf_output(&signal_stop_tracing, &value, sizeof(value), 0);
+}
+
+SEC("tp/osnoise/timerlat_sample")
+int handle_timerlat_sample(struct trace_event_raw_timerlat_sample *tp_args)
+{
+ unsigned long long latency, latency_us;
+ int bucket;
+
+ if (stop_tracing)
+ return 0;
+
+ latency = tp_args->timer_latency / output_divisor;
+ latency_us = tp_args->timer_latency / 1000;
+ bucket = latency / bucket_size;
+
+ if (tp_args->context == 0) {
+ update_main_hist(&hist_irq, bucket);
+ update_summary(&summary_irq, latency, bucket);
+
+ if (irq_threshold != 0 && latency_us >= irq_threshold)
+ set_stop_tracing();
+ } else if (tp_args->context == 1) {
+ update_main_hist(&hist_thread, bucket);
+ update_summary(&summary_thread, latency, bucket);
+
+ if (thread_threshold != 0 && latency_us >= thread_threshold)
+ set_stop_tracing();
+ } else {
+ update_main_hist(&hist_user, bucket);
+ update_summary(&summary_user, latency, bucket);
+ }
+
+ return 0;
+}
diff --git a/tools/tracing/rtla/src/timerlat.c b/tools/tracing/rtla/src/timerlat.c
index 21cdcc5c4a29..c29e2ba2d7d8 100644
--- a/tools/tracing/rtla/src/timerlat.c
+++ b/tools/tracing/rtla/src/timerlat.c
@@ -2,6 +2,7 @@
/*
* Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
*/
+#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/stat.h>
#include <pthread.h>
@@ -11,9 +12,114 @@
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
+#include <sched.h>
#include "timerlat.h"
+#define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */
+
+/*
+ * timerlat_apply_config - apply common configs to the initialized tool
+ */
+int
+timerlat_apply_config(struct osnoise_tool *tool, struct timerlat_params *params)
+{
+ int retval, i;
+
+ if (!params->sleep_time)
+ params->sleep_time = 1;
+
+ retval = osnoise_set_cpus(tool->context, params->cpus ? params->cpus : "all");
+ if (retval) {
+ err_msg("Failed to apply CPUs config\n");
+ goto out_err;
+ }
+
+ if (!params->cpus) {
+ for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++)
+ CPU_SET(i, &params->monitored_cpus);
+ }
+
+ retval = osnoise_set_stop_us(tool->context, params->stop_us);
+ if (retval) {
+ err_msg("Failed to set stop us\n");
+ goto out_err;
+ }
+
+ retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us);
+ if (retval) {
+ err_msg("Failed to set stop total us\n");
+ goto out_err;
+ }
+
+
+ retval = osnoise_set_timerlat_period_us(tool->context,
+ params->timerlat_period_us ?
+ params->timerlat_period_us :
+ DEFAULT_TIMERLAT_PERIOD);
+ if (retval) {
+ err_msg("Failed to set timerlat period\n");
+ goto out_err;
+ }
+
+
+ retval = osnoise_set_print_stack(tool->context, params->print_stack);
+ if (retval) {
+ err_msg("Failed to set print stack\n");
+ goto out_err;
+ }
+
+ if (params->hk_cpus) {
+ retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set),
+ &params->hk_cpu_set);
+ if (retval == -1) {
+ err_msg("Failed to set rtla to the house keeping CPUs\n");
+ goto out_err;
+ }
+ } else if (params->cpus) {
+ /*
+ * Even if the user do not set a house-keeping CPU, try to
+ * move rtla to a CPU set different to the one where the user
+ * set the workload to run.
+ *
+ * No need to check results as this is an automatic attempt.
+ */
+ auto_house_keeping(&params->monitored_cpus);
+ }
+
+ /*
+ * If the user did not specify a type of thread, try user-threads first.
+ * Fall back to kernel threads otherwise.
+ */
+ if (!params->kernel_workload && !params->user_data) {
+ retval = tracefs_file_exists(NULL, "osnoise/per_cpu/cpu0/timerlat_fd");
+ if (retval) {
+ debug_msg("User-space interface detected, setting user-threads\n");
+ params->user_workload = 1;
+ params->user_data = 1;
+ } else {
+ debug_msg("User-space interface not detected, setting kernel-threads\n");
+ params->kernel_workload = 1;
+ }
+ }
+
+ /*
+ * Set workload according to type of thread if the kernel supports it.
+ * On kernels without support, user threads will have already failed
+ * on missing timerlat_fd, and kernel threads do not need it.
+ */
+ retval = osnoise_set_workload(tool->context, params->kernel_workload);
+ if (retval < -1) {
+ err_msg("Failed to set OSNOISE_WORKLOAD option\n");
+ goto out_err;
+ }
+
+ return 0;
+
+out_err:
+ return -1;
+}
+
static void timerlat_usage(int err)
{
int i;
diff --git a/tools/tracing/rtla/src/timerlat.h b/tools/tracing/rtla/src/timerlat.h
index 88561bfd14f3..73045aef23fa 100644
--- a/tools/tracing/rtla/src/timerlat.h
+++ b/tools/tracing/rtla/src/timerlat.h
@@ -1,4 +1,58 @@
// SPDX-License-Identifier: GPL-2.0
+#include "osnoise.h"
+
+struct timerlat_params {
+ /* Common params */
+ char *cpus;
+ cpu_set_t monitored_cpus;
+ char *trace_output;
+ char *cgroup_name;
+ unsigned long long runtime;
+ long long stop_us;
+ long long stop_total_us;
+ long long timerlat_period_us;
+ long long print_stack;
+ int sleep_time;
+ int output_divisor;
+ int duration;
+ int set_sched;
+ int dma_latency;
+ int no_aa;
+ int dump_tasks;
+ int cgroup;
+ int hk_cpus;
+ int user_workload;
+ int kernel_workload;
+ int user_data;
+ int warmup;
+ int buffer_size;
+ int deepest_idle_state;
+ cpu_set_t hk_cpu_set;
+ struct sched_attr sched_param;
+ struct trace_events *events;
+ union {
+ struct {
+ /* top only */
+ int quiet;
+ int aa_only;
+ int pretty_output;
+ };
+ struct {
+ /* hist only */
+ char no_irq;
+ char no_thread;
+ char no_header;
+ char no_summary;
+ char no_index;
+ char with_zeros;
+ int bucket_size;
+ int entries;
+ };
+ };
+};
+
+int timerlat_apply_config(struct osnoise_tool *tool, struct timerlat_params *params);
+
int timerlat_hist_main(int argc, char *argv[]);
int timerlat_top_main(int argc, char *argv[]);
int timerlat_main(int argc, char *argv[]);
diff --git a/tools/tracing/rtla/src/timerlat_aa.c b/tools/tracing/rtla/src/timerlat_aa.c
index 7bd80ee2a5b4..31e66ea2b144 100644
--- a/tools/tracing/rtla/src/timerlat_aa.c
+++ b/tools/tracing/rtla/src/timerlat_aa.c
@@ -5,8 +5,6 @@
#include <stdlib.h>
#include <errno.h>
-#include "utils.h"
-#include "osnoise.h"
#include "timerlat.h"
#include <unistd.h>
diff --git a/tools/tracing/rtla/src/timerlat_bpf.c b/tools/tracing/rtla/src/timerlat_bpf.c
new file mode 100644
index 000000000000..5abee884037a
--- /dev/null
+++ b/tools/tracing/rtla/src/timerlat_bpf.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifdef HAVE_BPF_SKEL
+#include "timerlat.h"
+#include "timerlat_bpf.h"
+#include "timerlat.skel.h"
+
+static struct timerlat_bpf *bpf;
+
+/*
+ * timerlat_bpf_init - load and initialize BPF program to collect timerlat data
+ */
+int timerlat_bpf_init(struct timerlat_params *params)
+{
+ int err;
+
+ debug_msg("Loading BPF program\n");
+
+ bpf = timerlat_bpf__open();
+ if (!bpf)
+ return 1;
+
+ /* Pass common options */
+ bpf->rodata->output_divisor = params->output_divisor;
+ bpf->rodata->entries = params->entries;
+ bpf->rodata->irq_threshold = params->stop_us;
+ bpf->rodata->thread_threshold = params->stop_total_us;
+ bpf->rodata->aa_only = params->aa_only;
+
+ if (params->entries != 0) {
+ /* Pass histogram options */
+ bpf->rodata->bucket_size = params->bucket_size;
+
+ /* Set histogram array sizes */
+ bpf_map__set_max_entries(bpf->maps.hist_irq, params->entries);
+ bpf_map__set_max_entries(bpf->maps.hist_thread, params->entries);
+ bpf_map__set_max_entries(bpf->maps.hist_user, params->entries);
+ } else {
+ /* No entries, disable histogram */
+ bpf_map__set_autocreate(bpf->maps.hist_irq, false);
+ bpf_map__set_autocreate(bpf->maps.hist_thread, false);
+ bpf_map__set_autocreate(bpf->maps.hist_user, false);
+ }
+
+ if (params->aa_only) {
+ /* Auto-analysis only, disable summary */
+ bpf_map__set_autocreate(bpf->maps.summary_irq, false);
+ bpf_map__set_autocreate(bpf->maps.summary_thread, false);
+ bpf_map__set_autocreate(bpf->maps.summary_user, false);
+ }
+
+ /* Load and verify BPF program */
+ err = timerlat_bpf__load(bpf);
+ if (err) {
+ timerlat_bpf__destroy(bpf);
+ return err;
+ }
+
+ return 0;
+}
+
+/*
+ * timerlat_bpf_attach - attach BPF program to collect timerlat data
+ */
+int timerlat_bpf_attach(void)
+{
+ debug_msg("Attaching BPF program\n");
+
+ return timerlat_bpf__attach(bpf);
+}
+
+/*
+ * timerlat_bpf_detach - detach BPF program to collect timerlat data
+ */
+void timerlat_bpf_detach(void)
+{
+ timerlat_bpf__detach(bpf);
+}
+
+/*
+ * timerlat_bpf_detach - destroy BPF program to collect timerlat data
+ */
+void timerlat_bpf_destroy(void)
+{
+ timerlat_bpf__destroy(bpf);
+}
+
+static int handle_rb_event(void *ctx, void *data, size_t data_sz)
+{
+ return 0;
+}
+
+/*
+ * timerlat_bpf_wait - wait until tracing is stopped or signal
+ */
+int timerlat_bpf_wait(int timeout)
+{
+ struct ring_buffer *rb;
+ int retval;
+
+ rb = ring_buffer__new(bpf_map__fd(bpf->maps.signal_stop_tracing),
+ handle_rb_event, NULL, NULL);
+ retval = ring_buffer__poll(rb, timeout * 1000);
+ ring_buffer__free(rb);
+
+ return retval;
+}
+
+static int get_value(struct bpf_map *map_irq,
+ struct bpf_map *map_thread,
+ struct bpf_map *map_user,
+ int key,
+ long long *value_irq,
+ long long *value_thread,
+ long long *value_user,
+ int cpus)
+{
+ int err;
+
+ err = bpf_map__lookup_elem(map_irq, &key,
+ sizeof(unsigned int), value_irq,
+ sizeof(long long) * cpus, 0);
+ if (err)
+ return err;
+ err = bpf_map__lookup_elem(map_thread, &key,
+ sizeof(unsigned int), value_thread,
+ sizeof(long long) * cpus, 0);
+ if (err)
+ return err;
+ err = bpf_map__lookup_elem(map_user, &key,
+ sizeof(unsigned int), value_user,
+ sizeof(long long) * cpus, 0);
+ if (err)
+ return err;
+ return 0;
+}
+
+/*
+ * timerlat_bpf_get_hist_value - get value from BPF hist map
+ */
+int timerlat_bpf_get_hist_value(int key,
+ long long *value_irq,
+ long long *value_thread,
+ long long *value_user,
+ int cpus)
+{
+ return get_value(bpf->maps.hist_irq,
+ bpf->maps.hist_thread,
+ bpf->maps.hist_user,
+ key, value_irq, value_thread, value_user, cpus);
+}
+
+/*
+ * timerlat_bpf_get_summary_value - get value from BPF summary map
+ */
+int timerlat_bpf_get_summary_value(enum summary_field key,
+ long long *value_irq,
+ long long *value_thread,
+ long long *value_user,
+ int cpus)
+{
+ return get_value(bpf->maps.summary_irq,
+ bpf->maps.summary_thread,
+ bpf->maps.summary_user,
+ key, value_irq, value_thread, value_user, cpus);
+}
+#endif /* HAVE_BPF_SKEL */
diff --git a/tools/tracing/rtla/src/timerlat_bpf.h b/tools/tracing/rtla/src/timerlat_bpf.h
new file mode 100644
index 000000000000..f1b54dbddb0e
--- /dev/null
+++ b/tools/tracing/rtla/src/timerlat_bpf.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#pragma once
+
+enum summary_field {
+ SUMMARY_CURRENT,
+ SUMMARY_MIN,
+ SUMMARY_MAX,
+ SUMMARY_COUNT,
+ SUMMARY_SUM,
+ SUMMARY_OVERFLOW,
+ SUMMARY_FIELD_N
+};
+
+#ifndef __bpf__
+#ifdef HAVE_BPF_SKEL
+int timerlat_bpf_init(struct timerlat_params *params);
+int timerlat_bpf_attach(void);
+void timerlat_bpf_detach(void);
+void timerlat_bpf_destroy(void);
+int timerlat_bpf_wait(int timeout);
+int timerlat_bpf_get_hist_value(int key,
+ long long *value_irq,
+ long long *value_thread,
+ long long *value_user,
+ int cpus);
+int timerlat_bpf_get_summary_value(enum summary_field key,
+ long long *value_irq,
+ long long *value_thread,
+ long long *value_user,
+ int cpus);
+static inline int have_libbpf_support(void) { return 1; }
+#else
+static inline int timerlat_bpf_init(struct timerlat_params *params)
+{
+ return -1;
+}
+static inline int timerlat_bpf_attach(void) { return -1; }
+static inline void timerlat_bpf_detach(void) { };
+static inline void timerlat_bpf_destroy(void) { };
+static inline int timerlat_bpf_wait(int timeout) { return -1; }
+static inline int timerlat_bpf_get_hist_value(int key,
+ long long *value_irq,
+ long long *value_thread,
+ long long *value_user,
+ int cpus)
+{
+ return -1;
+}
+static inline int timerlat_bpf_get_summary_value(enum summary_field key,
+ long long *value_irq,
+ long long *value_thread,
+ long long *value_user,
+ int cpus)
+{
+ return -1;
+}
+static inline int have_libbpf_support(void) { return 0; }
+#endif /* HAVE_BPF_SKEL */
+#endif /* __bpf__ */
diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
index 4403cc4eba30..9d9efeedc4c2 100644
--- a/tools/tracing/rtla/src/timerlat_hist.c
+++ b/tools/tracing/rtla/src/timerlat_hist.c
@@ -14,49 +14,10 @@
#include <sched.h>
#include <pthread.h>
-#include "utils.h"
-#include "osnoise.h"
#include "timerlat.h"
#include "timerlat_aa.h"
#include "timerlat_u.h"
-
-struct timerlat_hist_params {
- char *cpus;
- cpu_set_t monitored_cpus;
- char *trace_output;
- char *cgroup_name;
- unsigned long long runtime;
- long long stop_us;
- long long stop_total_us;
- long long timerlat_period_us;
- long long print_stack;
- int sleep_time;
- int output_divisor;
- int duration;
- int set_sched;
- int dma_latency;
- int cgroup;
- int hk_cpus;
- int no_aa;
- int dump_tasks;
- int user_workload;
- int kernel_workload;
- int user_hist;
- cpu_set_t hk_cpu_set;
- struct sched_attr sched_param;
- struct trace_events *events;
- char no_irq;
- char no_thread;
- char no_header;
- char no_summary;
- char no_index;
- char with_zeros;
- int bucket_size;
- int entries;
- int warmup;
- int buffer_size;
- int deepest_idle_state;
-};
+#include "timerlat_bpf.h"
struct timerlat_hist_cpu {
int *irq;
@@ -174,7 +135,7 @@ timerlat_hist_update(struct osnoise_tool *tool, int cpu,
unsigned long long context,
unsigned long long latency)
{
- struct timerlat_hist_params *params = tool->params;
+ struct timerlat_params *params = tool->params;
struct timerlat_hist_data *data = tool->data;
int entries = data->entries;
int bucket;
@@ -234,11 +195,94 @@ timerlat_hist_handler(struct trace_seq *s, struct tep_record *record,
}
/*
+ * timerlat_hist_bpf_pull_data - copy data from BPF maps into userspace
+ */
+static int timerlat_hist_bpf_pull_data(struct osnoise_tool *tool)
+{
+ struct timerlat_hist_data *data = tool->data;
+ int i, j, err;
+ long long value_irq[data->nr_cpus],
+ value_thread[data->nr_cpus],
+ value_user[data->nr_cpus];
+
+ /* Pull histogram */
+ for (i = 0; i < data->entries; i++) {
+ err = timerlat_bpf_get_hist_value(i, value_irq, value_thread,
+ value_user, data->nr_cpus);
+ if (err)
+ return err;
+ for (j = 0; j < data->nr_cpus; j++) {
+ data->hist[j].irq[i] = value_irq[j];
+ data->hist[j].thread[i] = value_thread[j];
+ data->hist[j].user[i] = value_user[j];
+ }
+ }
+
+ /* Pull summary */
+ err = timerlat_bpf_get_summary_value(SUMMARY_COUNT,
+ value_irq, value_thread, value_user,
+ data->nr_cpus);
+ if (err)
+ return err;
+ for (i = 0; i < data->nr_cpus; i++) {
+ data->hist[i].irq_count = value_irq[i];
+ data->hist[i].thread_count = value_thread[i];
+ data->hist[i].user_count = value_user[i];
+ }
+
+ err = timerlat_bpf_get_summary_value(SUMMARY_MIN,
+ value_irq, value_thread, value_user,
+ data->nr_cpus);
+ if (err)
+ return err;
+ for (i = 0; i < data->nr_cpus; i++) {
+ data->hist[i].min_irq = value_irq[i];
+ data->hist[i].min_thread = value_thread[i];
+ data->hist[i].min_user = value_user[i];
+ }
+
+ err = timerlat_bpf_get_summary_value(SUMMARY_MAX,
+ value_irq, value_thread, value_user,
+ data->nr_cpus);
+ if (err)
+ return err;
+ for (i = 0; i < data->nr_cpus; i++) {
+ data->hist[i].max_irq = value_irq[i];
+ data->hist[i].max_thread = value_thread[i];
+ data->hist[i].max_user = value_user[i];
+ }
+
+ err = timerlat_bpf_get_summary_value(SUMMARY_SUM,
+ value_irq, value_thread, value_user,
+ data->nr_cpus);
+ if (err)
+ return err;
+ for (i = 0; i < data->nr_cpus; i++) {
+ data->hist[i].sum_irq = value_irq[i];
+ data->hist[i].sum_thread = value_thread[i];
+ data->hist[i].sum_user = value_user[i];
+ }
+
+ err = timerlat_bpf_get_summary_value(SUMMARY_OVERFLOW,
+ value_irq, value_thread, value_user,
+ data->nr_cpus);
+ if (err)
+ return err;
+ for (i = 0; i < data->nr_cpus; i++) {
+ data->hist[i].irq[data->entries] = value_irq[i];
+ data->hist[i].thread[data->entries] = value_thread[i];
+ data->hist[i].user[data->entries] = value_user[i];
+ }
+
+ return 0;
+}
+
+/*
* timerlat_hist_header - print the header of the tracer to the output
*/
static void timerlat_hist_header(struct osnoise_tool *tool)
{
- struct timerlat_hist_params *params = tool->params;
+ struct timerlat_params *params = tool->params;
struct timerlat_hist_data *data = tool->data;
struct trace_seq *s = tool->trace.seq;
char duration[26];
@@ -271,7 +315,7 @@ static void timerlat_hist_header(struct osnoise_tool *tool)
if (!params->no_thread)
trace_seq_printf(s, " Thr-%03d", cpu);
- if (params->user_hist)
+ if (params->user_data)
trace_seq_printf(s, " Usr-%03d", cpu);
}
trace_seq_printf(s, "\n");
@@ -300,7 +344,7 @@ static void format_summary_value(struct trace_seq *seq,
* timerlat_print_summary - print the summary of the hist data to the output
*/
static void
-timerlat_print_summary(struct timerlat_hist_params *params,
+timerlat_print_summary(struct timerlat_params *params,
struct trace_instance *trace,
struct timerlat_hist_data *data)
{
@@ -327,7 +371,7 @@ timerlat_print_summary(struct timerlat_hist_params *params,
trace_seq_printf(trace->seq, "%9llu ",
data->hist[cpu].thread_count);
- if (params->user_hist)
+ if (params->user_data)
trace_seq_printf(trace->seq, "%9llu ",
data->hist[cpu].user_count);
}
@@ -355,7 +399,7 @@ timerlat_print_summary(struct timerlat_hist_params *params,
data->hist[cpu].min_thread,
false);
- if (params->user_hist)
+ if (params->user_data)
format_summary_value(trace->seq,
data->hist[cpu].user_count,
data->hist[cpu].min_user,
@@ -385,7 +429,7 @@ timerlat_print_summary(struct timerlat_hist_params *params,
data->hist[cpu].sum_thread,
true);
- if (params->user_hist)
+ if (params->user_data)
format_summary_value(trace->seq,
data->hist[cpu].user_count,
data->hist[cpu].sum_user,
@@ -415,7 +459,7 @@ timerlat_print_summary(struct timerlat_hist_params *params,
data->hist[cpu].max_thread,
false);
- if (params->user_hist)
+ if (params->user_data)
format_summary_value(trace->seq,
data->hist[cpu].user_count,
data->hist[cpu].max_user,
@@ -427,7 +471,7 @@ timerlat_print_summary(struct timerlat_hist_params *params,
}
static void
-timerlat_print_stats_all(struct timerlat_hist_params *params,
+timerlat_print_stats_all(struct timerlat_params *params,
struct trace_instance *trace,
struct timerlat_hist_data *data)
{
@@ -477,7 +521,7 @@ timerlat_print_stats_all(struct timerlat_hist_params *params,
if (!params->no_thread)
trace_seq_printf(trace->seq, " Thr");
- if (params->user_hist)
+ if (params->user_data)
trace_seq_printf(trace->seq, " Usr");
trace_seq_printf(trace->seq, "\n");
@@ -493,7 +537,7 @@ timerlat_print_stats_all(struct timerlat_hist_params *params,
trace_seq_printf(trace->seq, "%9llu ",
sum.thread_count);
- if (params->user_hist)
+ if (params->user_data)
trace_seq_printf(trace->seq, "%9llu ",
sum.user_count);
@@ -514,7 +558,7 @@ timerlat_print_stats_all(struct timerlat_hist_params *params,
sum.min_thread,
false);
- if (params->user_hist)
+ if (params->user_data)
format_summary_value(trace->seq,
sum.user_count,
sum.min_user,
@@ -537,7 +581,7 @@ timerlat_print_stats_all(struct timerlat_hist_params *params,
sum.sum_thread,
true);
- if (params->user_hist)
+ if (params->user_data)
format_summary_value(trace->seq,
sum.user_count,
sum.sum_user,
@@ -560,7 +604,7 @@ timerlat_print_stats_all(struct timerlat_hist_params *params,
sum.max_thread,
false);
- if (params->user_hist)
+ if (params->user_data)
format_summary_value(trace->seq,
sum.user_count,
sum.max_user,
@@ -575,7 +619,7 @@ timerlat_print_stats_all(struct timerlat_hist_params *params,
* timerlat_print_stats - print data for each CPUs
*/
static void
-timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *tool)
+timerlat_print_stats(struct timerlat_params *params, struct osnoise_tool *tool)
{
struct timerlat_hist_data *data = tool->data;
struct trace_instance *trace = &tool->trace;
@@ -610,7 +654,7 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t
data->hist[cpu].thread[bucket]);
}
- if (params->user_hist) {
+ if (params->user_data) {
total += data->hist[cpu].user[bucket];
trace_seq_printf(trace->seq, "%9d ",
data->hist[cpu].user[bucket]);
@@ -646,7 +690,7 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t
trace_seq_printf(trace->seq, "%9d ",
data->hist[cpu].thread[data->entries]);
- if (params->user_hist)
+ if (params->user_data)
trace_seq_printf(trace->seq, "%9d ",
data->hist[cpu].user[data->entries]);
}
@@ -656,6 +700,7 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t
timerlat_print_summary(params, trace, data);
timerlat_print_stats_all(params, trace, data);
+ osnoise_report_missed_events(tool);
}
/*
@@ -733,10 +778,10 @@ static void timerlat_hist_usage(char *usage)
/*
* timerlat_hist_parse_args - allocs, parse and fill the cmd line parameters
*/
-static struct timerlat_hist_params
+static struct timerlat_params
*timerlat_hist_parse_args(int argc, char *argv[])
{
- struct timerlat_hist_params *params;
+ struct timerlat_params *params;
struct trace_events *tevent;
int auto_thresh;
int retval;
@@ -920,7 +965,7 @@ static struct timerlat_hist_params
params->user_workload = 1;
/* fallback: -u implies in -U */
case 'U':
- params->user_hist = 1;
+ params->user_data = 1;
break;
case '0': /* no irq */
params->no_irq = 1;
@@ -1016,97 +1061,13 @@ static struct timerlat_hist_params
* timerlat_hist_apply_config - apply the hist configs to the initialized tool
*/
static int
-timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_params *params)
+timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_params *params)
{
- int retval, i;
-
- if (!params->sleep_time)
- params->sleep_time = 1;
-
- if (params->cpus) {
- retval = osnoise_set_cpus(tool->context, params->cpus);
- if (retval) {
- err_msg("Failed to apply CPUs config\n");
- goto out_err;
- }
- } else {
- for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++)
- CPU_SET(i, &params->monitored_cpus);
- }
-
- if (params->stop_us) {
- retval = osnoise_set_stop_us(tool->context, params->stop_us);
- if (retval) {
- err_msg("Failed to set stop us\n");
- goto out_err;
- }
- }
-
- if (params->stop_total_us) {
- retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us);
- if (retval) {
- err_msg("Failed to set stop total us\n");
- goto out_err;
- }
- }
-
- if (params->timerlat_period_us) {
- retval = osnoise_set_timerlat_period_us(tool->context, params->timerlat_period_us);
- if (retval) {
- err_msg("Failed to set timerlat period\n");
- goto out_err;
- }
- }
-
- if (params->print_stack) {
- retval = osnoise_set_print_stack(tool->context, params->print_stack);
- if (retval) {
- err_msg("Failed to set print stack\n");
- goto out_err;
- }
- }
-
- if (params->hk_cpus) {
- retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set),
- &params->hk_cpu_set);
- if (retval == -1) {
- err_msg("Failed to set rtla to the house keeping CPUs\n");
- goto out_err;
- }
- } else if (params->cpus) {
- /*
- * Even if the user do not set a house-keeping CPU, try to
- * move rtla to a CPU set different to the one where the user
- * set the workload to run.
- *
- * No need to check results as this is an automatic attempt.
- */
- auto_house_keeping(&params->monitored_cpus);
- }
-
- /*
- * If the user did not specify a type of thread, try user-threads first.
- * Fall back to kernel threads otherwise.
- */
- if (!params->kernel_workload && !params->user_hist) {
- retval = tracefs_file_exists(NULL, "osnoise/per_cpu/cpu0/timerlat_fd");
- if (retval) {
- debug_msg("User-space interface detected, setting user-threads\n");
- params->user_workload = 1;
- params->user_hist = 1;
- } else {
- debug_msg("User-space interface not detected, setting kernel-threads\n");
- params->kernel_workload = 1;
- }
- }
+ int retval;
- if (params->user_hist) {
- retval = osnoise_set_workload(tool->context, 0);
- if (retval) {
- err_msg("Failed to set OSNOISE_WORKLOAD option\n");
- goto out_err;
- }
- }
+ retval = timerlat_apply_config(tool, params);
+ if (retval)
+ goto out_err;
return 0;
@@ -1118,7 +1079,7 @@ out_err:
* timerlat_init_hist - initialize a timerlat hist tool with parameters
*/
static struct osnoise_tool
-*timerlat_init_hist(struct timerlat_hist_params *params)
+*timerlat_init_hist(struct timerlat_params *params)
{
struct osnoise_tool *tool;
int nr_cpus;
@@ -1146,16 +1107,27 @@ out_err:
}
static int stop_tracing;
+static struct trace_instance *hist_inst = NULL;
static void stop_hist(int sig)
{
+ if (stop_tracing) {
+ /*
+ * Stop requested twice in a row; abort event processing and
+ * exit immediately
+ */
+ tracefs_iterate_stop(hist_inst->inst);
+ return;
+ }
stop_tracing = 1;
+ if (hist_inst)
+ trace_instance_stop(hist_inst);
}
/*
* timerlat_hist_set_signals - handles the signal to stop the tool
*/
static void
-timerlat_hist_set_signals(struct timerlat_hist_params *params)
+timerlat_hist_set_signals(struct timerlat_params *params)
{
signal(SIGINT, stop_hist);
if (params->duration) {
@@ -1166,7 +1138,7 @@ timerlat_hist_set_signals(struct timerlat_hist_params *params)
int timerlat_hist_main(int argc, char *argv[])
{
- struct timerlat_hist_params *params;
+ struct timerlat_params *params;
struct osnoise_tool *record = NULL;
struct timerlat_u_params params_u;
struct osnoise_tool *tool = NULL;
@@ -1177,6 +1149,7 @@ int timerlat_hist_main(int argc, char *argv[])
pthread_t timerlat_u;
int retval;
int nr_cpus, i;
+ bool no_bpf = false;
params = timerlat_hist_parse_args(argc, argv);
if (!params)
@@ -1195,6 +1168,30 @@ int timerlat_hist_main(int argc, char *argv[])
}
trace = &tool->trace;
+ /*
+ * Save trace instance into global variable so that SIGINT can stop
+ * the timerlat tracer.
+ * Otherwise, rtla could loop indefinitely when overloaded.
+ */
+ hist_inst = trace;
+
+ if (getenv("RTLA_NO_BPF") && strncmp(getenv("RTLA_NO_BPF"), "1", 2) == 0) {
+ debug_msg("RTLA_NO_BPF set, disabling BPF\n");
+ no_bpf = true;
+ }
+
+ if (!no_bpf && !tep_find_event_by_name(trace->tep, "osnoise", "timerlat_sample")) {
+ debug_msg("osnoise:timerlat_sample missing, disabling BPF\n");
+ no_bpf = true;
+ }
+
+ if (!no_bpf) {
+ retval = timerlat_bpf_init(params);
+ if (retval) {
+ debug_msg("Could not enable BPF\n");
+ no_bpf = true;
+ }
+ }
retval = enable_timerlat(trace);
if (retval) {
@@ -1323,35 +1320,55 @@ int timerlat_hist_main(int argc, char *argv[])
trace_instance_start(&record->trace);
if (!params->no_aa)
trace_instance_start(&aa->trace);
- trace_instance_start(trace);
+ if (no_bpf) {
+ trace_instance_start(trace);
+ } else {
+ retval = timerlat_bpf_attach();
+ if (retval) {
+ err_msg("Error attaching BPF program\n");
+ goto out_hist;
+ }
+ }
tool->start_time = time(NULL);
timerlat_hist_set_signals(params);
- while (!stop_tracing) {
- sleep(params->sleep_time);
-
- retval = tracefs_iterate_raw_events(trace->tep,
- trace->inst,
- NULL,
- 0,
- collect_registered_events,
- trace);
- if (retval < 0) {
- err_msg("Error iterating on events\n");
- goto out_hist;
- }
-
- if (trace_is_off(&tool->trace, &record->trace))
- break;
+ if (no_bpf) {
+ while (!stop_tracing) {
+ sleep(params->sleep_time);
+
+ retval = tracefs_iterate_raw_events(trace->tep,
+ trace->inst,
+ NULL,
+ 0,
+ collect_registered_events,
+ trace);
+ if (retval < 0) {
+ err_msg("Error iterating on events\n");
+ goto out_hist;
+ }
- /* is there still any user-threads ? */
- if (params->user_workload) {
- if (params_u.stopped_running) {
- debug_msg("timerlat user-space threads stopped!\n");
+ if (osnoise_trace_is_off(tool, record))
break;
+
+ /* is there still any user-threads ? */
+ if (params->user_workload) {
+ if (params_u.stopped_running) {
+ debug_msg("timerlat user-space threads stopped!\n");
+ break;
+ }
}
}
+ } else
+ timerlat_bpf_wait(-1);
+
+ if (!no_bpf) {
+ timerlat_bpf_detach();
+ retval = timerlat_hist_bpf_pull_data(tool);
+ if (retval) {
+ err_msg("Error pulling BPF data\n");
+ goto out_hist;
+ }
}
if (params->user_workload && !params_u.stopped_running) {
@@ -1363,16 +1380,14 @@ int timerlat_hist_main(int argc, char *argv[])
return_value = 0;
- if (trace_is_off(&tool->trace, &record->trace)) {
+ if (osnoise_trace_is_off(tool, record) && !stop_tracing) {
printf("rtla timerlat hit stop tracing\n");
if (!params->no_aa)
timerlat_auto_analysis(params->stop_us, params->stop_total_us);
- if (params->trace_output) {
- printf(" Saving trace to %s\n", params->trace_output);
- save_trace_to_file(record->trace.inst, params->trace_output);
- }
+ save_trace_to_file(record ? record->trace.inst : NULL,
+ params->trace_output);
}
out_hist:
@@ -1395,6 +1410,8 @@ out_free:
osnoise_destroy_tool(tool);
free(params);
free_cpu_idle_disable_states();
+ if (!no_bpf)
+ timerlat_bpf_destroy();
out_exit:
exit(return_value);
}
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
index 059b468981e4..79cb6f28967f 100644
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -15,44 +15,10 @@
#include <sched.h>
#include <pthread.h>
-#include "utils.h"
-#include "osnoise.h"
#include "timerlat.h"
#include "timerlat_aa.h"
#include "timerlat_u.h"
-
-struct timerlat_top_params {
- char *cpus;
- cpu_set_t monitored_cpus;
- char *trace_output;
- char *cgroup_name;
- unsigned long long runtime;
- long long stop_us;
- long long stop_total_us;
- long long timerlat_period_us;
- long long print_stack;
- int sleep_time;
- int output_divisor;
- int duration;
- int quiet;
- int set_sched;
- int dma_latency;
- int no_aa;
- int aa_only;
- int dump_tasks;
- int cgroup;
- int hk_cpus;
- int user_top;
- int user_workload;
- int kernel_workload;
- int pretty_output;
- int warmup;
- int buffer_size;
- int deepest_idle_state;
- cpu_set_t hk_cpu_set;
- struct sched_attr sched_param;
- struct trace_events *events;
-};
+#include "timerlat_bpf.h"
struct timerlat_top_cpu {
unsigned long long irq_count;
@@ -162,9 +128,13 @@ timerlat_top_update(struct osnoise_tool *tool, int cpu,
unsigned long long thread,
unsigned long long latency)
{
+ struct timerlat_params *params = tool->params;
struct timerlat_top_data *data = tool->data;
struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu];
+ if (params->output_divisor)
+ latency = latency / params->output_divisor;
+
if (!thread) {
cpu_data->irq_count++;
cpu_data->cur_irq = latency;
@@ -194,7 +164,7 @@ timerlat_top_handler(struct trace_seq *s, struct tep_record *record,
struct tep_event *event, void *context)
{
struct trace_instance *trace = context;
- struct timerlat_top_params *params;
+ struct timerlat_params *params;
unsigned long long latency, thread;
struct osnoise_tool *top;
int cpu = record->cpu;
@@ -213,9 +183,79 @@ timerlat_top_handler(struct trace_seq *s, struct tep_record *record,
}
/*
+ * timerlat_top_bpf_pull_data - copy data from BPF maps into userspace
+ */
+static int timerlat_top_bpf_pull_data(struct osnoise_tool *tool)
+{
+ struct timerlat_top_data *data = tool->data;
+ int i, err;
+ long long value_irq[data->nr_cpus],
+ value_thread[data->nr_cpus],
+ value_user[data->nr_cpus];
+
+ /* Pull summary */
+ err = timerlat_bpf_get_summary_value(SUMMARY_CURRENT,
+ value_irq, value_thread, value_user,
+ data->nr_cpus);
+ if (err)
+ return err;
+ for (i = 0; i < data->nr_cpus; i++) {
+ data->cpu_data[i].cur_irq = value_irq[i];
+ data->cpu_data[i].cur_thread = value_thread[i];
+ data->cpu_data[i].cur_user = value_user[i];
+ }
+
+ err = timerlat_bpf_get_summary_value(SUMMARY_COUNT,
+ value_irq, value_thread, value_user,
+ data->nr_cpus);
+ if (err)
+ return err;
+ for (i = 0; i < data->nr_cpus; i++) {
+ data->cpu_data[i].irq_count = value_irq[i];
+ data->cpu_data[i].thread_count = value_thread[i];
+ data->cpu_data[i].user_count = value_user[i];
+ }
+
+ err = timerlat_bpf_get_summary_value(SUMMARY_MIN,
+ value_irq, value_thread, value_user,
+ data->nr_cpus);
+ if (err)
+ return err;
+ for (i = 0; i < data->nr_cpus; i++) {
+ data->cpu_data[i].min_irq = value_irq[i];
+ data->cpu_data[i].min_thread = value_thread[i];
+ data->cpu_data[i].min_user = value_user[i];
+ }
+
+ err = timerlat_bpf_get_summary_value(SUMMARY_MAX,
+ value_irq, value_thread, value_user,
+ data->nr_cpus);
+ if (err)
+ return err;
+ for (i = 0; i < data->nr_cpus; i++) {
+ data->cpu_data[i].max_irq = value_irq[i];
+ data->cpu_data[i].max_thread = value_thread[i];
+ data->cpu_data[i].max_user = value_user[i];
+ }
+
+ err = timerlat_bpf_get_summary_value(SUMMARY_SUM,
+ value_irq, value_thread, value_user,
+ data->nr_cpus);
+ if (err)
+ return err;
+ for (i = 0; i < data->nr_cpus; i++) {
+ data->cpu_data[i].sum_irq = value_irq[i];
+ data->cpu_data[i].sum_thread = value_thread[i];
+ data->cpu_data[i].sum_user = value_user[i];
+ }
+
+ return 0;
+}
+
+/*
* timerlat_top_header - print the header of the tool output
*/
-static void timerlat_top_header(struct timerlat_top_params *params, struct osnoise_tool *top)
+static void timerlat_top_header(struct timerlat_params *params, struct osnoise_tool *top)
{
struct trace_seq *s = top->trace.seq;
char duration[26];
@@ -226,7 +266,7 @@ static void timerlat_top_header(struct timerlat_top_params *params, struct osnoi
trace_seq_printf(s, "\033[2;37;40m");
trace_seq_printf(s, " Timer Latency ");
- if (params->user_top)
+ if (params->user_data)
trace_seq_printf(s, " ");
if (params->pretty_output)
@@ -237,7 +277,7 @@ static void timerlat_top_header(struct timerlat_top_params *params, struct osnoi
params->output_divisor == 1 ? "ns" : "us",
params->output_divisor == 1 ? "ns" : "us");
- if (params->user_top) {
+ if (params->user_data) {
trace_seq_printf(s, " | Ret user Timer Latency (%s)",
params->output_divisor == 1 ? "ns" : "us");
}
@@ -247,7 +287,7 @@ static void timerlat_top_header(struct timerlat_top_params *params, struct osnoi
trace_seq_printf(s, "\033[2;30;47m");
trace_seq_printf(s, "CPU COUNT | cur min avg max | cur min avg max");
- if (params->user_top)
+ if (params->user_data)
trace_seq_printf(s, " | cur min avg max");
if (params->pretty_output)
@@ -263,15 +303,11 @@ static const char *no_value = " -";
static void timerlat_top_print(struct osnoise_tool *top, int cpu)
{
- struct timerlat_top_params *params = top->params;
+ struct timerlat_params *params = top->params;
struct timerlat_top_data *data = top->data;
struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu];
- int divisor = params->output_divisor;
struct trace_seq *s = top->trace.seq;
- if (divisor == 0)
- return;
-
/*
* Skip if no data is available: is this cpu offline?
*/
@@ -286,23 +322,23 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu)
if (!cpu_data->irq_count) {
trace_seq_printf(s, "%s %s %s %s |", no_value, no_value, no_value, no_value);
} else {
- trace_seq_printf(s, "%9llu ", cpu_data->cur_irq / params->output_divisor);
- trace_seq_printf(s, "%9llu ", cpu_data->min_irq / params->output_divisor);
- trace_seq_printf(s, "%9llu ", (cpu_data->sum_irq / cpu_data->irq_count) / divisor);
- trace_seq_printf(s, "%9llu |", cpu_data->max_irq / divisor);
+ trace_seq_printf(s, "%9llu ", cpu_data->cur_irq);
+ trace_seq_printf(s, "%9llu ", cpu_data->min_irq);
+ trace_seq_printf(s, "%9llu ", cpu_data->sum_irq / cpu_data->irq_count);
+ trace_seq_printf(s, "%9llu |", cpu_data->max_irq);
}
if (!cpu_data->thread_count) {
trace_seq_printf(s, "%s %s %s %s", no_value, no_value, no_value, no_value);
} else {
- trace_seq_printf(s, "%9llu ", cpu_data->cur_thread / divisor);
- trace_seq_printf(s, "%9llu ", cpu_data->min_thread / divisor);
+ trace_seq_printf(s, "%9llu ", cpu_data->cur_thread);
+ trace_seq_printf(s, "%9llu ", cpu_data->min_thread);
trace_seq_printf(s, "%9llu ",
- (cpu_data->sum_thread / cpu_data->thread_count) / divisor);
- trace_seq_printf(s, "%9llu", cpu_data->max_thread / divisor);
+ cpu_data->sum_thread / cpu_data->thread_count);
+ trace_seq_printf(s, "%9llu", cpu_data->max_thread);
}
- if (!params->user_top) {
+ if (!params->user_data) {
trace_seq_printf(s, "\n");
return;
}
@@ -312,11 +348,11 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu)
if (!cpu_data->user_count) {
trace_seq_printf(s, "%s %s %s %s\n", no_value, no_value, no_value, no_value);
} else {
- trace_seq_printf(s, "%9llu ", cpu_data->cur_user / divisor);
- trace_seq_printf(s, "%9llu ", cpu_data->min_user / divisor);
+ trace_seq_printf(s, "%9llu ", cpu_data->cur_user);
+ trace_seq_printf(s, "%9llu ", cpu_data->min_user);
trace_seq_printf(s, "%9llu ",
- (cpu_data->sum_user / cpu_data->user_count) / divisor);
- trace_seq_printf(s, "%9llu\n", cpu_data->max_user / divisor);
+ cpu_data->sum_user / cpu_data->user_count);
+ trace_seq_printf(s, "%9llu\n", cpu_data->max_user);
}
}
@@ -327,15 +363,11 @@ static void
timerlat_top_print_sum(struct osnoise_tool *top, struct timerlat_top_cpu *summary)
{
const char *split = "----------------------------------------";
- struct timerlat_top_params *params = top->params;
+ struct timerlat_params *params = top->params;
unsigned long long count = summary->irq_count;
- int divisor = params->output_divisor;
struct trace_seq *s = top->trace.seq;
int e = 0;
- if (divisor == 0)
- return;
-
/*
* Skip if no data is available: is this cpu offline?
*/
@@ -348,7 +380,7 @@ timerlat_top_print_sum(struct osnoise_tool *top, struct timerlat_top_cpu *summar
}
trace_seq_printf(s, "%.*s|%.*s|%.*s", 15, split, 40, split, 39, split);
- if (params->user_top)
+ if (params->user_data)
trace_seq_printf(s, "-|%.*s", 39, split);
trace_seq_printf(s, "\n");
@@ -358,22 +390,22 @@ timerlat_top_print_sum(struct osnoise_tool *top, struct timerlat_top_cpu *summar
trace_seq_printf(s, " %s %s %s |", no_value, no_value, no_value);
} else {
trace_seq_printf(s, " ");
- trace_seq_printf(s, "%9llu ", summary->min_irq / params->output_divisor);
- trace_seq_printf(s, "%9llu ", (summary->sum_irq / summary->irq_count) / divisor);
- trace_seq_printf(s, "%9llu |", summary->max_irq / divisor);
+ trace_seq_printf(s, "%9llu ", summary->min_irq);
+ trace_seq_printf(s, "%9llu ", summary->sum_irq / summary->irq_count);
+ trace_seq_printf(s, "%9llu |", summary->max_irq);
}
if (!summary->thread_count) {
trace_seq_printf(s, "%s %s %s %s", no_value, no_value, no_value, no_value);
} else {
trace_seq_printf(s, " ");
- trace_seq_printf(s, "%9llu ", summary->min_thread / divisor);
+ trace_seq_printf(s, "%9llu ", summary->min_thread);
trace_seq_printf(s, "%9llu ",
- (summary->sum_thread / summary->thread_count) / divisor);
- trace_seq_printf(s, "%9llu", summary->max_thread / divisor);
+ summary->sum_thread / summary->thread_count);
+ trace_seq_printf(s, "%9llu", summary->max_thread);
}
- if (!params->user_top) {
+ if (!params->user_data) {
trace_seq_printf(s, "\n");
return;
}
@@ -384,10 +416,10 @@ timerlat_top_print_sum(struct osnoise_tool *top, struct timerlat_top_cpu *summar
trace_seq_printf(s, " %s %s %s |", no_value, no_value, no_value);
} else {
trace_seq_printf(s, " ");
- trace_seq_printf(s, "%9llu ", summary->min_user / divisor);
+ trace_seq_printf(s, "%9llu ", summary->min_user);
trace_seq_printf(s, "%9llu ",
- (summary->sum_user / summary->user_count) / divisor);
- trace_seq_printf(s, "%9llu\n", summary->max_user / divisor);
+ summary->sum_user / summary->user_count);
+ trace_seq_printf(s, "%9llu\n", summary->max_user);
}
}
@@ -404,7 +436,7 @@ static void clear_terminal(struct trace_seq *seq)
* timerlat_print_stats - print data for all cpus
*/
static void
-timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *top)
+timerlat_print_stats(struct timerlat_params *params, struct osnoise_tool *top)
{
struct trace_instance *trace = &top->trace;
struct timerlat_top_cpu summary;
@@ -435,6 +467,7 @@ timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *to
trace_seq_do_printf(trace->seq);
trace_seq_reset(trace->seq);
+ osnoise_report_missed_events(top);
}
/*
@@ -504,10 +537,10 @@ static void timerlat_top_usage(char *usage)
/*
* timerlat_top_parse_args - allocs, parse and fill the cmd line parameters
*/
-static struct timerlat_top_params
+static struct timerlat_params
*timerlat_top_parse_args(int argc, char **argv)
{
- struct timerlat_top_params *params;
+ struct timerlat_params *params;
struct trace_events *tevent;
long long auto_thresh;
int retval;
@@ -689,7 +722,7 @@ static struct timerlat_top_params
params->user_workload = true;
/* fallback: -u implies -U */
case 'U':
- params->user_top = true;
+ params->user_data = true;
break;
case '0': /* trigger */
if (params->events) {
@@ -764,100 +797,13 @@ static struct timerlat_top_params
* timerlat_top_apply_config - apply the top configs to the initialized tool
*/
static int
-timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *params)
+timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_params *params)
{
int retval;
- int i;
-
- if (!params->sleep_time)
- params->sleep_time = 1;
-
- if (params->cpus) {
- retval = osnoise_set_cpus(top->context, params->cpus);
- if (retval) {
- err_msg("Failed to apply CPUs config\n");
- goto out_err;
- }
- } else {
- for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++)
- CPU_SET(i, &params->monitored_cpus);
- }
-
- if (params->stop_us) {
- retval = osnoise_set_stop_us(top->context, params->stop_us);
- if (retval) {
- err_msg("Failed to set stop us\n");
- goto out_err;
- }
- }
- if (params->stop_total_us) {
- retval = osnoise_set_stop_total_us(top->context, params->stop_total_us);
- if (retval) {
- err_msg("Failed to set stop total us\n");
- goto out_err;
- }
- }
-
-
- if (params->timerlat_period_us) {
- retval = osnoise_set_timerlat_period_us(top->context, params->timerlat_period_us);
- if (retval) {
- err_msg("Failed to set timerlat period\n");
- goto out_err;
- }
- }
-
-
- if (params->print_stack) {
- retval = osnoise_set_print_stack(top->context, params->print_stack);
- if (retval) {
- err_msg("Failed to set print stack\n");
- goto out_err;
- }
- }
-
- if (params->hk_cpus) {
- retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set),
- &params->hk_cpu_set);
- if (retval == -1) {
- err_msg("Failed to set rtla to the house keeping CPUs\n");
- goto out_err;
- }
- } else if (params->cpus) {
- /*
- * Even if the user do not set a house-keeping CPU, try to
- * move rtla to a CPU set different to the one where the user
- * set the workload to run.
- *
- * No need to check results as this is an automatic attempt.
- */
- auto_house_keeping(&params->monitored_cpus);
- }
-
- /*
- * If the user did not specify a type of thread, try user-threads first.
- * Fall back to kernel threads otherwise.
- */
- if (!params->kernel_workload && !params->user_top) {
- retval = tracefs_file_exists(NULL, "osnoise/per_cpu/cpu0/timerlat_fd");
- if (retval) {
- debug_msg("User-space interface detected, setting user-threads\n");
- params->user_workload = 1;
- params->user_top = 1;
- } else {
- debug_msg("User-space interface not detected, setting kernel-threads\n");
- params->kernel_workload = 1;
- }
- }
-
- if (params->user_top) {
- retval = osnoise_set_workload(top->context, 0);
- if (retval) {
- err_msg("Failed to set OSNOISE_WORKLOAD option\n");
- goto out_err;
- }
- }
+ retval = timerlat_apply_config(top, params);
+ if (retval)
+ goto out_err;
if (isatty(STDOUT_FILENO) && !params->quiet)
params->pretty_output = 1;
@@ -872,7 +818,7 @@ out_err:
* timerlat_init_top - initialize a timerlat top tool with parameters
*/
static struct osnoise_tool
-*timerlat_init_top(struct timerlat_top_params *params)
+*timerlat_init_top(struct timerlat_params *params)
{
struct osnoise_tool *top;
int nr_cpus;
@@ -900,16 +846,27 @@ out_err:
}
static int stop_tracing;
+static struct trace_instance *top_inst = NULL;
static void stop_top(int sig)
{
+ if (stop_tracing) {
+ /*
+ * Stop requested twice in a row; abort event processing and
+ * exit immediately
+ */
+ tracefs_iterate_stop(top_inst->inst);
+ return;
+ }
stop_tracing = 1;
+ if (top_inst)
+ trace_instance_stop(top_inst);
}
/*
* timerlat_top_set_signals - handles the signal to stop the tool
*/
static void
-timerlat_top_set_signals(struct timerlat_top_params *params)
+timerlat_top_set_signals(struct timerlat_params *params)
{
signal(SIGINT, stop_top);
if (params->duration) {
@@ -918,9 +875,114 @@ timerlat_top_set_signals(struct timerlat_top_params *params)
}
}
+/*
+ * timerlat_top_main_loop - main loop to process events
+ */
+static int
+timerlat_top_main_loop(struct osnoise_tool *top,
+ struct osnoise_tool *record,
+ struct timerlat_params *params,
+ struct timerlat_u_params *params_u)
+{
+ struct trace_instance *trace = &top->trace;
+ int retval;
+
+ while (!stop_tracing) {
+ sleep(params->sleep_time);
+
+ if (params->aa_only && !osnoise_trace_is_off(top, record))
+ continue;
+
+ retval = tracefs_iterate_raw_events(trace->tep,
+ trace->inst,
+ NULL,
+ 0,
+ collect_registered_events,
+ trace);
+ if (retval < 0) {
+ err_msg("Error iterating on events\n");
+ return retval;
+ }
+
+ if (!params->quiet)
+ timerlat_print_stats(params, top);
+
+ if (osnoise_trace_is_off(top, record))
+ break;
+
+ /* is there still any user-threads ? */
+ if (params->user_workload) {
+ if (params_u->stopped_running) {
+ debug_msg("timerlat user space threads stopped!\n");
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * timerlat_top_bpf_main_loop - main loop to process events (BPF variant)
+ */
+static int
+timerlat_top_bpf_main_loop(struct osnoise_tool *top,
+ struct osnoise_tool *record,
+ struct timerlat_params *params,
+ struct timerlat_u_params *params_u)
+{
+ int retval, wait_retval;
+
+ if (params->aa_only) {
+ /* Auto-analysis only, just wait for stop tracing */
+ timerlat_bpf_wait(-1);
+ return 0;
+ }
+
+ if (params->quiet) {
+ /* Quiet mode: wait for stop and then, print results */
+ timerlat_bpf_wait(-1);
+
+ retval = timerlat_top_bpf_pull_data(top);
+ if (retval) {
+ err_msg("Error pulling BPF data\n");
+ return retval;
+ }
+
+ return 0;
+ }
+
+ /* Pull and display data in a loop */
+ while (!stop_tracing) {
+ wait_retval = timerlat_bpf_wait(params->sleep_time);
+
+ retval = timerlat_top_bpf_pull_data(top);
+ if (retval) {
+ err_msg("Error pulling BPF data\n");
+ return retval;
+ }
+
+ timerlat_print_stats(params, top);
+
+ if (wait_retval == 1)
+ /* Stopping requested by tracer */
+ break;
+
+ /* is there still any user-threads ? */
+ if (params->user_workload) {
+ if (params_u->stopped_running) {
+ debug_msg("timerlat user space threads stopped!\n");
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
int timerlat_top_main(int argc, char *argv[])
{
- struct timerlat_top_params *params;
+ struct timerlat_params *params;
struct osnoise_tool *record = NULL;
struct timerlat_u_params params_u;
struct osnoise_tool *top = NULL;
@@ -932,6 +994,7 @@ int timerlat_top_main(int argc, char *argv[])
char *max_lat;
int retval;
int nr_cpus, i;
+ bool no_bpf = false;
params = timerlat_top_parse_args(argc, argv);
if (!params)
@@ -950,6 +1013,30 @@ int timerlat_top_main(int argc, char *argv[])
}
trace = &top->trace;
+ /*
+ * Save trace instance into global variable so that SIGINT can stop
+ * the timerlat tracer.
+ * Otherwise, rtla could loop indefinitely when overloaded.
+ */
+ top_inst = trace;
+
+ if (getenv("RTLA_NO_BPF") && strncmp(getenv("RTLA_NO_BPF"), "1", 2) == 0) {
+ debug_msg("RTLA_NO_BPF set, disabling BPF\n");
+ no_bpf = true;
+ }
+
+ if (!no_bpf && !tep_find_event_by_name(trace->tep, "osnoise", "timerlat_sample")) {
+ debug_msg("osnoise:timerlat_sample missing, disabling BPF\n");
+ no_bpf = true;
+ }
+
+ if (!no_bpf) {
+ retval = timerlat_bpf_init(params);
+ if (retval) {
+ debug_msg("Could not enable BPF\n");
+ no_bpf = true;
+ }
+ }
retval = enable_timerlat(trace);
if (retval) {
@@ -965,7 +1052,7 @@ int timerlat_top_main(int argc, char *argv[])
}
}
- if (params->cgroup && !params->user_top) {
+ if (params->cgroup && !params->user_data) {
retval = set_comm_cgroup("timerlat/", params->cgroup_name);
if (!retval) {
err_msg("Failed to move threads to cgroup\n");
@@ -1024,15 +1111,9 @@ int timerlat_top_main(int argc, char *argv[])
}
if (!params->no_aa) {
- if (params->aa_only) {
- /* as top is not used for display, use it for aa */
- aa = top;
- } else {
- /* otherwise, a new instance is needed */
- aa = osnoise_init_tool("timerlat_aa");
- if (!aa)
- goto out_top;
- }
+ aa = osnoise_init_tool("timerlat_aa");
+ if (!aa)
+ goto out_top;
retval = timerlat_aa_init(aa, params->dump_tasks);
if (retval) {
@@ -1083,44 +1164,31 @@ int timerlat_top_main(int argc, char *argv[])
*/
if (params->trace_output)
trace_instance_start(&record->trace);
- if (!params->no_aa && aa != top)
+ if (!params->no_aa)
trace_instance_start(&aa->trace);
- trace_instance_start(trace);
+ if (no_bpf) {
+ trace_instance_start(trace);
+ } else {
+ retval = timerlat_bpf_attach();
+ if (retval) {
+ err_msg("Error attaching BPF program\n");
+ goto out_top;
+ }
+ }
top->start_time = time(NULL);
timerlat_top_set_signals(params);
- while (!stop_tracing) {
- sleep(params->sleep_time);
+ if (no_bpf)
+ retval = timerlat_top_main_loop(top, record, params, &params_u);
+ else
+ retval = timerlat_top_bpf_main_loop(top, record, params, &params_u);
- if (params->aa_only && !trace_is_off(&top->trace, &record->trace))
- continue;
+ if (retval)
+ goto out_top;
- retval = tracefs_iterate_raw_events(trace->tep,
- trace->inst,
- NULL,
- 0,
- collect_registered_events,
- trace);
- if (retval < 0) {
- err_msg("Error iterating on events\n");
- goto out_top;
- }
-
- if (!params->quiet)
- timerlat_print_stats(params, top);
-
- if (trace_is_off(&top->trace, &record->trace))
- break;
-
- /* is there still any user-threads ? */
- if (params->user_workload) {
- if (params_u.stopped_running) {
- debug_msg("timerlat user space threads stopped!\n");
- break;
- }
- }
- }
+ if (!no_bpf)
+ timerlat_bpf_detach();
if (params->user_workload && !params_u.stopped_running) {
params_u.should_run = 0;
@@ -1131,16 +1199,14 @@ int timerlat_top_main(int argc, char *argv[])
return_value = 0;
- if (trace_is_off(&top->trace, &record->trace)) {
+ if (osnoise_trace_is_off(top, record) && !stop_tracing) {
printf("rtla timerlat hit stop tracing\n");
if (!params->no_aa)
timerlat_auto_analysis(params->stop_us, params->stop_total_us);
- if (params->trace_output) {
- printf(" Saving trace to %s\n", params->trace_output);
- save_trace_to_file(record->trace.inst, params->trace_output);
- }
+ save_trace_to_file(record ? record->trace.inst : NULL,
+ params->trace_output);
} else if (params->aa_only) {
/*
* If the trace did not stop with --aa-only, at least print the
diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c
index 170a706248ab..69cbc48d53d3 100644
--- a/tools/tracing/rtla/src/trace.c
+++ b/tools/tracing/rtla/src/trace.c
@@ -75,12 +75,16 @@ int save_trace_to_file(struct tracefs_instance *inst, const char *filename)
int out_fd, in_fd;
int retval = -1;
+ if (!inst || !filename)
+ return 0;
+
in_fd = tracefs_instance_file_open(inst, file, O_RDONLY);
if (in_fd < 0) {
err_msg("Failed to open trace file\n");
return -1;
}
+ printf(" Saving trace to %s\n", filename);
out_fd = creat(filename, mode);
if (out_fd < 0) {
err_msg("Failed to create output file %s\n", filename);
@@ -118,6 +122,8 @@ collect_registered_events(struct tep_event *event, struct tep_record *record,
struct trace_instance *trace = context;
struct trace_seq *s = trace->seq;
+ trace->processed_events++;
+
if (!event->handler)
return 0;
@@ -127,6 +133,31 @@ collect_registered_events(struct tep_event *event, struct tep_record *record,
}
/*
+ * collect_missed_events - record number of missed events
+ *
+ * If rtla cannot keep up with events generated by tracer, events are going
+ * to fall out of the ring buffer.
+ * Collect how many events were missed so it can be reported to the user.
+ */
+static int
+collect_missed_events(struct tep_event *event, struct tep_record *record,
+ int cpu, void *context)
+{
+ struct trace_instance *trace = context;
+
+ if (trace->missed_events == UINT64_MAX)
+ return 0;
+
+ if (record->missed_events > 0)
+ trace->missed_events += record->missed_events;
+ else
+ /* Events missed but no data on how many */
+ trace->missed_events = UINT64_MAX;
+
+ return 0;
+}
+
+/*
* trace_instance_destroy - destroy and free a rtla trace instance
*/
void trace_instance_destroy(struct trace_instance *trace)
@@ -181,6 +212,17 @@ int trace_instance_init(struct trace_instance *trace, char *tool_name)
*/
tracefs_trace_off(trace->inst);
+ /*
+ * Collect the number of events missed due to tracefs buffer
+ * overflow.
+ */
+ trace->missed_events = 0;
+ tracefs_follow_missed_events(trace->inst,
+ collect_missed_events,
+ trace);
+
+ trace->processed_events = 0;
+
return 0;
out_err:
@@ -197,6 +239,14 @@ int trace_instance_start(struct trace_instance *trace)
}
/*
+ * trace_instance_stop - stop tracing a given rtla instance
+ */
+int trace_instance_stop(struct trace_instance *trace)
+{
+ return tracefs_trace_off(trace->inst);
+}
+
+/*
* trace_events_free - free a list of trace events
*/
static void trace_events_free(struct trace_events *events)
@@ -522,25 +572,6 @@ void trace_events_destroy(struct trace_instance *instance,
trace_events_free(events);
}
-int trace_is_off(struct trace_instance *tool, struct trace_instance *trace)
-{
- /*
- * The tool instance is always present, it is the one used to collect
- * data.
- */
- if (!tracefs_trace_is_on(tool->inst))
- return 1;
-
- /*
- * The trace instance is only enabled when -t is set. IOW, when the system
- * is tracing.
- */
- if (trace && !tracefs_trace_is_on(trace->inst))
- return 1;
-
- return 0;
-}
-
/*
* trace_set_buffer_size - set the per-cpu tracing buffer size.
*/
diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h
index c7c92dc9a18a..3cd40dd3f06c 100644
--- a/tools/tracing/rtla/src/trace.h
+++ b/tools/tracing/rtla/src/trace.h
@@ -17,10 +17,13 @@ struct trace_instance {
struct tracefs_instance *inst;
struct tep_handle *tep;
struct trace_seq *seq;
+ unsigned long long missed_events;
+ unsigned long long processed_events;
};
int trace_instance_init(struct trace_instance *trace, char *tool_name);
int trace_instance_start(struct trace_instance *trace);
+int trace_instance_stop(struct trace_instance *trace);
void trace_instance_destroy(struct trace_instance *trace);
struct trace_seq *get_trace_seq(void);
@@ -47,5 +50,4 @@ int trace_events_enable(struct trace_instance *instance,
int trace_event_add_filter(struct trace_events *event, char *filter);
int trace_event_add_trigger(struct trace_events *event, char *trigger);
-int trace_is_off(struct trace_instance *tool, struct trace_instance *trace);
int trace_set_buffer_size(struct trace_instance *trace, int size);
diff --git a/tools/tracing/rtla/tests/engine.sh b/tools/tracing/rtla/tests/engine.sh
new file mode 100644
index 000000000000..b1697b3e3f52
--- /dev/null
+++ b/tools/tracing/rtla/tests/engine.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+test_begin() {
+ # Count tests to allow the test harness to double-check if all were
+ # included correctly.
+ ctr=0
+ [ -z "$RTLA" ] && RTLA="./rtla"
+ [ -n "$TEST_COUNT" ] && echo "1..$TEST_COUNT"
+}
+
+reset_osnoise() {
+ # Reset osnoise options to default and remove any dangling instances created
+ # by improperly exited rtla runs.
+ pushd /sys/kernel/tracing || return 1
+
+ # Remove dangling instances created by previous rtla run
+ echo 0 > tracing_thresh
+ cd instances
+ for i in osnoise_top osnoise_hist timerlat_top timerlat_hist
+ do
+ [ ! -d "$i" ] && continue
+ rmdir "$i"
+ done
+
+ # Reset options to default
+ # Note: those are copied from the default values of osnoise_data
+ # in kernel/trace/trace_osnoise.c
+ cd ../osnoise
+ echo all > cpus
+ echo DEFAULTS > options
+ echo 1000000 > period_us
+ echo 0 > print_stack
+ echo 1000000 > runtime_us
+ echo 0 > stop_tracing_total_us
+ echo 0 > stop_tracing_us
+ echo 1000 > timerlat_period_us
+
+ popd
+}
+
+check() {
+ # Simple check: run rtla with given arguments and test exit code.
+ # If TEST_COUNT is set, run the test. Otherwise, just count.
+ ctr=$(($ctr + 1))
+ if [ -n "$TEST_COUNT" ]
+ then
+ # Reset osnoise options before running test.
+ [ "$NO_RESET_OSNOISE" == 1 ] || reset_osnoise
+ # Run rtla; in case of failure, include its output as comment
+ # in the test results.
+ result=$(stdbuf -oL $TIMEOUT "$RTLA" $2 2>&1); exitcode=$?
+ if [ $exitcode -eq 0 ]
+ then
+ echo "ok $ctr - $1"
+ else
+ echo "not ok $ctr - $1"
+ # Add rtla output and exit code as comments in case of failure
+ echo "$result" | col -b | while read line; do echo "# $line"; done
+ printf "#\n# exit code %s\n" $exitcode
+ fi
+ fi
+}
+
+check_with_osnoise_options() {
+ # Do the same as "check", but with pre-set osnoise options.
+ # Note: rtla should reset the osnoise options, this is used to test
+ # if it indeed does so.
+ # Save original arguments
+ arg1=$1
+ arg2=$2
+
+ # Apply osnoise options (if not dry run)
+ if [ -n "$TEST_COUNT" ]
+ then
+ [ "$NO_RESET_OSNOISE" == 1 ] || reset_osnoise
+ shift
+ while shift
+ do
+ [ "$1" == "" ] && continue
+ option=$(echo $1 | cut -d '=' -f 1)
+ value=$(echo $1 | cut -d '=' -f 2)
+ echo "option: $option, value: $value"
+ echo "$value" > "/sys/kernel/tracing/osnoise/$option" || return 1
+ done
+ fi
+
+ NO_RESET_OSNOISE=1 check "$arg1" "$arg2"
+}
+
+set_timeout() {
+ TIMEOUT="timeout -v -k 15s $1"
+}
+
+unset_timeout() {
+ unset TIMEOUT
+}
+
+set_no_reset_osnoise() {
+ NO_RESET_OSNOISE=1
+}
+
+unset_no_reset_osnoise() {
+ unset NO_RESET_OSNOISE
+}
+
+test_end() {
+ # If running without TEST_COUNT, tests are not actually run, just
+ # counted. In that case, re-run the test with the correct count.
+ [ -z "$TEST_COUNT" ] && TEST_COUNT=$ctr exec bash $0 || true
+}
+
+# Avoid any environmental discrepancies
+export LC_ALL=C
+unset_timeout
diff --git a/tools/tracing/rtla/tests/hwnoise.t b/tools/tracing/rtla/tests/hwnoise.t
new file mode 100644
index 000000000000..bbed17580537
--- /dev/null
+++ b/tools/tracing/rtla/tests/hwnoise.t
@@ -0,0 +1,21 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+source tests/engine.sh
+test_begin
+
+set_timeout 2m
+
+check "verify help page" \
+ "hwnoise --help"
+check "detect noise higher than one microsecond" \
+ "hwnoise -c 0 -T 1 -d 5s -q"
+check "set the automatic trace mode" \
+ "hwnoise -a 5 -d 30s"
+check "set scheduling param to the osnoise tracer threads" \
+ "hwnoise -P F:1 -c 0 -r 900000 -d 1M -q"
+check "stop the trace if a single sample is higher than 1 us" \
+ "hwnoise -s 1 -T 1 -t -d 30s"
+check "enable a trace event trigger" \
+ "hwnoise -t -e osnoise:irq_noise trigger=\"hist:key=desc,duration:sort=desc,duration:vals=hitcount\" -d 1m"
+
+test_end
diff --git a/tools/tracing/rtla/tests/osnoise.t b/tools/tracing/rtla/tests/osnoise.t
new file mode 100644
index 000000000000..e5995c03c790
--- /dev/null
+++ b/tools/tracing/rtla/tests/osnoise.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+source tests/engine.sh
+test_begin
+
+set_timeout 2m
+
+check "verify help page" \
+ "osnoise --help"
+check "verify the --priority/-P param" \
+ "osnoise top -P F:1 -c 0 -r 900000 -d 1M -q"
+check "verify the --stop/-s param" \
+ "osnoise top -s 30 -T 1 -t"
+check "verify the --trace param" \
+ "osnoise hist -s 30 -T 1 -t"
+check "verify the --entries/-E param" \
+ "osnoise hist -P F:1 -c 0 -r 900000 -d 1M -b 10 -E 25"
+
+# Test setting default period by putting an absurdly high period
+# and stopping on threshold.
+# If default period is not set, this will time out.
+check_with_osnoise_options "apply default period" \
+ "osnoise hist -s 1" period_us=600000000
+
+test_end
diff --git a/tools/tracing/rtla/tests/timerlat.t b/tools/tracing/rtla/tests/timerlat.t
new file mode 100644
index 000000000000..e939ff71d6be
--- /dev/null
+++ b/tools/tracing/rtla/tests/timerlat.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+source tests/engine.sh
+test_begin
+
+set_timeout 2m
+timerlat_sample_event='/sys/kernel/tracing/events/osnoise/timerlat_sample'
+
+if ldd $RTLA | grep libbpf >/dev/null && [ -d "$timerlat_sample_event" ]
+then
+ # rtla build with BPF and system supports BPF mode
+ no_bpf_options='0 1'
+else
+ no_bpf_options='1'
+fi
+
+# Do every test with and without BPF
+for option in $no_bpf_options
+do
+export RTLA_NO_BPF=$option
+check "verify help page" \
+ "timerlat --help"
+check "verify -s/--stack" \
+ "timerlat top -s 3 -T 10 -t"
+check "verify -P/--priority" \
+ "timerlat top -P F:1 -c 0 -d 1M -q"
+check "test in nanoseconds" \
+ "timerlat top -i 2 -c 0 -n -d 30s"
+check "set the automatic trace mode" \
+ "timerlat top -a 5 --dump-tasks"
+check "print the auto-analysis if hits the stop tracing condition" \
+ "timerlat top --aa-only 5"
+check "disable auto-analysis" \
+ "timerlat top -s 3 -T 10 -t --no-aa"
+check "verify -c/--cpus" \
+ "timerlat hist -c 0 -d 30s"
+check "hist test in nanoseconds" \
+ "timerlat hist -i 2 -c 0 -n -d 30s"
+done
+
+test_end
diff --git a/tools/verification/dot2/automata.py b/tools/verification/dot2/automata.py
index bdeb98baa8b0..d9a3fe2b74bf 100644
--- a/tools/verification/dot2/automata.py
+++ b/tools/verification/dot2/automata.py
@@ -19,13 +19,14 @@ class Automata:
invalid_state_str = "INVALID_STATE"
- def __init__(self, file_path):
+ def __init__(self, file_path, model_name=None):
self.__dot_path = file_path
- self.name = self.__get_model_name()
+ self.name = model_name or self.__get_model_name()
self.__dot_lines = self.__open_dot()
self.states, self.initial_state, self.final_states = self.__get_state_variables()
self.events = self.__get_event_variables()
self.function = self.__create_matrix()
+ self.events_start, self.events_start_run = self.__store_init_events()
def __get_model_name(self):
basename = ntpath.basename(self.__dot_path)
@@ -172,3 +173,34 @@ class Automata:
cursor += 1
return matrix
+
+ def __store_init_events(self):
+ events_start = [False] * len(self.events)
+ events_start_run = [False] * len(self.events)
+ for i, _ in enumerate(self.events):
+ curr_event_will_init = 0
+ curr_event_from_init = False
+ curr_event_used = 0
+ for j, _ in enumerate(self.states):
+ if self.function[j][i] != self.invalid_state_str:
+ curr_event_used += 1
+ if self.function[j][i] == self.initial_state:
+ curr_event_will_init += 1
+ if self.function[0][i] != self.invalid_state_str:
+ curr_event_from_init = True
+ # this event always leads to init
+ if curr_event_will_init and curr_event_used == curr_event_will_init:
+ events_start[i] = True
+ # this event is only called from init
+ if curr_event_from_init and curr_event_used == 1:
+ events_start_run[i] = True
+ return events_start, events_start_run
+
+ def is_start_event(self, event):
+ return self.events_start[self.events.index(event)]
+
+ def is_start_run_event(self, event):
+ # prefer handle_start_event if there
+ if any(self.events_start):
+ return False
+ return self.events_start_run[self.events.index(event)]
diff --git a/tools/verification/dot2/dot2c.py b/tools/verification/dot2/dot2c.py
index 87d8a1e1470c..fa2816ac7b61 100644
--- a/tools/verification/dot2/dot2c.py
+++ b/tools/verification/dot2/dot2c.py
@@ -22,8 +22,8 @@ class Dot2c(Automata):
struct_automaton_def = "automaton"
var_automaton_def = "aut"
- def __init__(self, file_path):
- super().__init__(file_path)
+ def __init__(self, file_path, model_name=None):
+ super().__init__(file_path, model_name)
self.line_length = 100
def __buff_to_string(self, buff):
diff --git a/tools/verification/dot2/dot2k b/tools/verification/dot2/dot2k
index d4d7e52d549e..767064f415e7 100644
--- a/tools/verification/dot2/dot2k
+++ b/tools/verification/dot2/dot2k
@@ -11,35 +11,43 @@
if __name__ == '__main__':
from dot2.dot2k import dot2k
import argparse
- import ntpath
- import os
- import platform
import sys
+ def is_container():
+ """Should work even before parsing the arguments"""
+ return "-c" in sys.argv or "--container" in sys.argv
+
parser = argparse.ArgumentParser(description='transform .dot file into kernel rv monitor')
- parser.add_argument('-d', "--dot", dest="dot_file", required=True)
- parser.add_argument('-t', "--monitor_type", dest="monitor_type", required=True)
- parser.add_argument('-n', "--model_name", dest="model_name", required=False)
+ parser.add_argument('-d', "--dot", dest="dot_file", required=not is_container())
+ parser.add_argument('-t', "--monitor_type", dest="monitor_type", required=not is_container(),
+ help=f"Available options: {', '.join(dot2k.monitor_types.keys())}")
+ parser.add_argument('-n', "--model_name", dest="model_name", required=is_container())
parser.add_argument("-D", "--description", dest="description", required=False)
+ parser.add_argument("-a", "--auto_patch", dest="auto_patch",
+ action="store_true", required=False,
+ help="Patch the kernel in place")
+ parser.add_argument("-p", "--parent", dest="parent",
+ required=False, help="Create a monitor nested to parent")
+ parser.add_argument("-c", "--container", dest="container",
+ action="store_true", required=False,
+ help="Create an empty monitor to be used as a container")
params = parser.parse_args()
- print("Opening and parsing the dot file %s" % params.dot_file)
+ if not is_container():
+ print("Opening and parsing the dot file %s" % params.dot_file)
try:
- monitor=dot2k(params.dot_file, params.monitor_type)
+ monitor=dot2k(params.dot_file, params.monitor_type, vars(params))
except Exception as e:
print('Error: '+ str(e))
print("Sorry : :-(")
sys.exit(1)
- # easier than using argparse action.
- if params.model_name != None:
- print(params.model_name)
-
print("Writing the monitor into the directory %s" % monitor.name)
monitor.print_files()
print("Almost done, checklist")
- print(" - Edit the %s/%s.c to add the instrumentation" % (monitor.name, monitor.name))
- print(" - Edit include/trace/events/rv.h to add the tracepoint entry")
- print(" - Move it to the kernel's monitor directory")
- print(" - Edit kernel/trace/rv/Makefile")
- print(" - Edit kernel/trace/rv/Kconfig")
+ if not is_container():
+ print(" - Edit the %s/%s.c to add the instrumentation" % (monitor.name, monitor.name))
+ print(monitor.fill_tracepoint_tooltip())
+ print(monitor.fill_makefile_tooltip())
+ print(monitor.fill_kconfig_tooltip())
+ print(monitor.fill_monitor_tooltip())
diff --git a/tools/verification/dot2/dot2k.py b/tools/verification/dot2/dot2k.py
index 016550fccf1f..745d35a4a379 100644
--- a/tools/verification/dot2/dot2k.py
+++ b/tools/verification/dot2/dot2k.py
@@ -14,50 +14,98 @@ import os
class dot2k(Dot2c):
monitor_types = { "global" : 1, "per_cpu" : 2, "per_task" : 3 }
- monitor_templates_dir = "dot2k/rv_templates/"
+ monitor_templates_dir = "dot2/dot2k_templates/"
+ rv_dir = "kernel/trace/rv"
monitor_type = "per_cpu"
- def __init__(self, file_path, MonitorType):
- super().__init__(file_path)
-
- self.monitor_type = self.monitor_types.get(MonitorType)
- if self.monitor_type == None:
- raise Exception("Unknown monitor type: %s" % MonitorType)
-
- self.monitor_type = MonitorType
+ def __init__(self, file_path, MonitorType, extra_params={}):
+ self.container = extra_params.get("container")
+ self.parent = extra_params.get("parent")
self.__fill_rv_templates_dir()
- self.main_c = self.__open_file(self.monitor_templates_dir + "main_" + MonitorType + ".c")
+
+ if self.container:
+ if file_path:
+ raise ValueError("A container does not require a dot file")
+ if MonitorType:
+ raise ValueError("A container does not require a monitor type")
+ if self.parent:
+ raise ValueError("A container cannot have a parent")
+ self.name = extra_params.get("model_name")
+ self.events = []
+ self.states = []
+ self.main_c = self.__read_file(self.monitor_templates_dir + "main_container.c")
+ self.main_h = self.__read_file(self.monitor_templates_dir + "main_container.h")
+ else:
+ super().__init__(file_path, extra_params.get("model_name"))
+
+ self.monitor_type = self.monitor_types.get(MonitorType)
+ if self.monitor_type is None:
+ raise ValueError("Unknown monitor type: %s" % MonitorType)
+ self.monitor_type = MonitorType
+ self.main_c = self.__read_file(self.monitor_templates_dir + "main.c")
+ self.trace_h = self.__read_file(self.monitor_templates_dir + "trace.h")
+ self.kconfig = self.__read_file(self.monitor_templates_dir + "Kconfig")
self.enum_suffix = "_%s" % self.name
+ self.description = extra_params.get("description", self.name) or "auto-generated"
+ self.auto_patch = extra_params.get("auto_patch")
+ if self.auto_patch:
+ self.__fill_rv_kernel_dir()
def __fill_rv_templates_dir(self):
- if os.path.exists(self.monitor_templates_dir) == True:
+ if os.path.exists(self.monitor_templates_dir):
return
if platform.system() != "Linux":
- raise Exception("I can only run on Linux.")
+ raise OSError("I can only run on Linux.")
kernel_path = "/lib/modules/%s/build/tools/verification/dot2/dot2k_templates/" % (platform.release())
- if os.path.exists(kernel_path) == True:
+ if os.path.exists(kernel_path):
self.monitor_templates_dir = kernel_path
return
- if os.path.exists("/usr/share/dot2/dot2k_templates/") == True:
+ if os.path.exists("/usr/share/dot2/dot2k_templates/"):
self.monitor_templates_dir = "/usr/share/dot2/dot2k_templates/"
return
- raise Exception("Could not find the template directory, do you have the kernel source installed?")
+ raise FileNotFoundError("Could not find the template directory, do you have the kernel source installed?")
+ def __fill_rv_kernel_dir(self):
+
+ # first try if we are running in the kernel tree root
+ if os.path.exists(self.rv_dir):
+ return
- def __open_file(self, path):
+ # offset if we are running inside the kernel tree from verification/dot2
+ kernel_path = os.path.join("../..", self.rv_dir)
+
+ if os.path.exists(kernel_path):
+ self.rv_dir = kernel_path
+ return
+
+ if platform.system() != "Linux":
+ raise OSError("I can only run on Linux.")
+
+ kernel_path = os.path.join("/lib/modules/%s/build" % platform.release(), self.rv_dir)
+
+ # if the current kernel is from a distro this may not be a full kernel tree
+ # verify that one of the files we are going to modify is available
+ if os.path.exists(os.path.join(kernel_path, "rv_trace.h")):
+ self.rv_dir = kernel_path
+ return
+
+ raise FileNotFoundError("Could not find the rv directory, do you have the kernel source installed?")
+
+ def __read_file(self, path):
try:
- fd = open(path)
+ fd = open(path, 'r')
except OSError:
raise Exception("Cannot open the file: %s" % path)
content = fd.read()
+ fd.close()
return content
def __buff_to_string(self, buff):
@@ -69,16 +117,34 @@ class dot2k(Dot2c):
# cut off the last \n
return string[:-1]
+ def fill_monitor_type(self):
+ return self.monitor_type.upper()
+
+ def fill_parent(self):
+ return "&rv_%s" % self.parent if self.parent else "NULL"
+
+ def fill_include_parent(self):
+ if self.parent:
+ return "#include <monitors/%s/%s.h>\n" % (self.parent, self.parent)
+ return ""
+
def fill_tracepoint_handlers_skel(self):
buff = []
for event in self.events:
buff.append("static void handle_%s(void *data, /* XXX: fill header */)" % event)
buff.append("{")
+ handle = "handle_event"
+ if self.is_start_event(event):
+ buff.append("\t/* XXX: validate that this event always leads to the initial state */")
+ handle = "handle_start_event"
+ elif self.is_start_run_event(event):
+ buff.append("\t/* XXX: validate that this event is only valid in the initial state */")
+ handle = "handle_start_run_event"
if self.monitor_type == "per_task":
buff.append("\tstruct task_struct *p = /* XXX: how do I get p? */;");
- buff.append("\tda_handle_event_%s(p, %s%s);" % (self.name, event, self.enum_suffix));
+ buff.append("\tda_%s_%s(p, %s%s);" % (handle, self.name, event, self.enum_suffix));
else:
- buff.append("\tda_handle_event_%s(%s%s);" % (self.name, event, self.enum_suffix));
+ buff.append("\tda_%s_%s(%s%s);" % (handle, self.name, event, self.enum_suffix));
buff.append("}")
buff.append("")
return self.__buff_to_string(buff)
@@ -97,23 +163,31 @@ class dot2k(Dot2c):
def fill_main_c(self):
main_c = self.main_c
+ monitor_type = self.fill_monitor_type()
min_type = self.get_minimun_type()
- nr_events = self.events.__len__()
+ nr_events = len(self.events)
tracepoint_handlers = self.fill_tracepoint_handlers_skel()
tracepoint_attach = self.fill_tracepoint_attach_probe()
tracepoint_detach = self.fill_tracepoint_detach_helper()
-
- main_c = main_c.replace("MIN_TYPE", min_type)
- main_c = main_c.replace("MODEL_NAME", self.name)
- main_c = main_c.replace("NR_EVENTS", str(nr_events))
- main_c = main_c.replace("TRACEPOINT_HANDLERS_SKEL", tracepoint_handlers)
- main_c = main_c.replace("TRACEPOINT_ATTACH", tracepoint_attach)
- main_c = main_c.replace("TRACEPOINT_DETACH", tracepoint_detach)
+ parent = self.fill_parent()
+ parent_include = self.fill_include_parent()
+
+ main_c = main_c.replace("%%MONITOR_TYPE%%", monitor_type)
+ main_c = main_c.replace("%%MIN_TYPE%%", min_type)
+ main_c = main_c.replace("%%MODEL_NAME%%", self.name)
+ main_c = main_c.replace("%%NR_EVENTS%%", str(nr_events))
+ main_c = main_c.replace("%%TRACEPOINT_HANDLERS_SKEL%%", tracepoint_handlers)
+ main_c = main_c.replace("%%TRACEPOINT_ATTACH%%", tracepoint_attach)
+ main_c = main_c.replace("%%TRACEPOINT_DETACH%%", tracepoint_detach)
+ main_c = main_c.replace("%%DESCRIPTION%%", self.description)
+ main_c = main_c.replace("%%PARENT%%", parent)
+ main_c = main_c.replace("%%INCLUDE_PARENT%%", parent_include)
return main_c
def fill_model_h_header(self):
buff = []
+ buff.append("/* SPDX-License-Identifier: GPL-2.0 */")
buff.append("/*")
buff.append(" * Automatically generated C representation of %s automaton" % (self.name))
buff.append(" * For further information about this format, see kernel documentation:")
@@ -137,41 +211,179 @@ class dot2k(Dot2c):
return self.__buff_to_string(buff)
+ def fill_monitor_class_type(self):
+ if self.monitor_type == "per_task":
+ return "DA_MON_EVENTS_ID"
+ return "DA_MON_EVENTS_IMPLICIT"
+
+ def fill_monitor_class(self):
+ if self.monitor_type == "per_task":
+ return "da_monitor_id"
+ return "da_monitor"
+
+ def fill_tracepoint_args_skel(self, tp_type):
+ buff = []
+ tp_args_event = [
+ ("char *", "state"),
+ ("char *", "event"),
+ ("char *", "next_state"),
+ ("bool ", "final_state"),
+ ]
+ tp_args_error = [
+ ("char *", "state"),
+ ("char *", "event"),
+ ]
+ tp_args_id = ("int ", "id")
+ tp_args = tp_args_event if tp_type == "event" else tp_args_error
+ if self.monitor_type == "per_task":
+ tp_args.insert(0, tp_args_id)
+ tp_proto_c = ", ".join([a+b for a,b in tp_args])
+ tp_args_c = ", ".join([b for a,b in tp_args])
+ buff.append(" TP_PROTO(%s)," % tp_proto_c)
+ buff.append(" TP_ARGS(%s)" % tp_args_c)
+ return self.__buff_to_string(buff)
+
+ def fill_monitor_deps(self):
+ buff = []
+ buff.append(" # XXX: add dependencies if there")
+ if self.parent:
+ buff.append(" depends on RV_MON_%s" % self.parent.upper())
+ buff.append(" default y")
+ return self.__buff_to_string(buff)
+
+ def fill_trace_h(self):
+ trace_h = self.trace_h
+ monitor_class = self.fill_monitor_class()
+ monitor_class_type = self.fill_monitor_class_type()
+ tracepoint_args_skel_event = self.fill_tracepoint_args_skel("event")
+ tracepoint_args_skel_error = self.fill_tracepoint_args_skel("error")
+ trace_h = trace_h.replace("%%MODEL_NAME%%", self.name)
+ trace_h = trace_h.replace("%%MODEL_NAME_UP%%", self.name.upper())
+ trace_h = trace_h.replace("%%MONITOR_CLASS%%", monitor_class)
+ trace_h = trace_h.replace("%%MONITOR_CLASS_TYPE%%", monitor_class_type)
+ trace_h = trace_h.replace("%%TRACEPOINT_ARGS_SKEL_EVENT%%", tracepoint_args_skel_event)
+ trace_h = trace_h.replace("%%TRACEPOINT_ARGS_SKEL_ERROR%%", tracepoint_args_skel_error)
+ return trace_h
+
+ def fill_kconfig(self):
+ kconfig = self.kconfig
+ monitor_class_type = self.fill_monitor_class_type()
+ monitor_deps = self.fill_monitor_deps()
+ kconfig = kconfig.replace("%%MODEL_NAME%%", self.name)
+ kconfig = kconfig.replace("%%MODEL_NAME_UP%%", self.name.upper())
+ kconfig = kconfig.replace("%%MONITOR_CLASS_TYPE%%", monitor_class_type)
+ kconfig = kconfig.replace("%%DESCRIPTION%%", self.description)
+ kconfig = kconfig.replace("%%MONITOR_DEPS%%", monitor_deps)
+ return kconfig
+
+ def fill_main_container_h(self):
+ main_h = self.main_h
+ main_h = main_h.replace("%%MODEL_NAME%%", self.name)
+ return main_h
+
+ def __patch_file(self, file, marker, line):
+ file_to_patch = os.path.join(self.rv_dir, file)
+ content = self.__read_file(file_to_patch)
+ content = content.replace(marker, line + "\n" + marker)
+ self.__write_file(file_to_patch, content)
+
+ def fill_tracepoint_tooltip(self):
+ monitor_class_type = self.fill_monitor_class_type()
+ if self.auto_patch:
+ self.__patch_file("rv_trace.h",
+ "// Add new monitors based on CONFIG_%s here" % monitor_class_type,
+ "#include <monitors/%s/%s_trace.h>" % (self.name, self.name))
+ return " - Patching %s/rv_trace.h, double check the result" % self.rv_dir
+
+ return """ - Edit %s/rv_trace.h:
+Add this line where other tracepoints are included and %s is defined:
+#include <monitors/%s/%s_trace.h>
+""" % (self.rv_dir, monitor_class_type, self.name, self.name)
+
+ def fill_kconfig_tooltip(self):
+ if self.auto_patch:
+ self.__patch_file("Kconfig",
+ "# Add new monitors here",
+ "source \"kernel/trace/rv/monitors/%s/Kconfig\"" % (self.name))
+ return " - Patching %s/Kconfig, double check the result" % self.rv_dir
+
+ return """ - Edit %s/Kconfig:
+Add this line where other monitors are included:
+source \"kernel/trace/rv/monitors/%s/Kconfig\"
+""" % (self.rv_dir, self.name)
+
+ def fill_makefile_tooltip(self):
+ name = self.name
+ name_up = name.upper()
+ if self.auto_patch:
+ self.__patch_file("Makefile",
+ "# Add new monitors here",
+ "obj-$(CONFIG_RV_MON_%s) += monitors/%s/%s.o" % (name_up, name, name))
+ return " - Patching %s/Makefile, double check the result" % self.rv_dir
+
+ return """ - Edit %s/Makefile:
+Add this line where other monitors are included:
+obj-$(CONFIG_RV_MON_%s) += monitors/%s/%s.o
+""" % (self.rv_dir, name_up, name, name)
+
+ def fill_monitor_tooltip(self):
+ if self.auto_patch:
+ return " - Monitor created in %s/monitors/%s" % (self.rv_dir, self. name)
+ return " - Move %s/ to the kernel's monitor directory (%s/monitors)" % (self.name, self.rv_dir)
+
def __create_directory(self):
+ path = self.name
+ if self.auto_patch:
+ path = os.path.join(self.rv_dir, "monitors", path)
try:
- os.mkdir(self.name)
+ os.mkdir(path)
except FileExistsError:
return
except:
print("Fail creating the output dir: %s" % self.name)
- def __create_file(self, file_name, content):
- path = "%s/%s" % (self.name, file_name)
+ def __write_file(self, file_name, content):
try:
- file = open(path, 'w')
- except FileExistsError:
- return
+ file = open(file_name, 'w')
except:
- print("Fail creating file: %s" % path)
+ print("Fail writing to file: %s" % file_name)
file.write(content)
file.close()
+ def __create_file(self, file_name, content):
+ path = "%s/%s" % (self.name, file_name)
+ if self.auto_patch:
+ path = os.path.join(self.rv_dir, "monitors", path)
+ self.__write_file(path, content)
+
def __get_main_name(self):
path = "%s/%s" % (self.name, "main.c")
- if os.path.exists(path) == False:
- return "main.c"
+ if not os.path.exists(path):
+ return "main.c"
return "__main.c"
def print_files(self):
main_c = self.fill_main_c()
- model_h = self.fill_model_h()
self.__create_directory()
path = "%s.c" % self.name
self.__create_file(path, main_c)
- path = "%s.h" % self.name
- self.__create_file(path, model_h)
+ if self.container:
+ main_h = self.fill_main_container_h()
+ path = "%s.h" % self.name
+ self.__create_file(path, main_h)
+ else:
+ model_h = self.fill_model_h()
+ path = "%s.h" % self.name
+ self.__create_file(path, model_h)
+
+ trace_h = self.fill_trace_h()
+ path = "%s_trace.h" % self.name
+ self.__create_file(path, trace_h)
+
+ kconfig = self.fill_kconfig()
+ self.__create_file("Kconfig", kconfig)
diff --git a/tools/verification/dot2/dot2k_templates/Kconfig b/tools/verification/dot2/dot2k_templates/Kconfig
new file mode 100644
index 000000000000..291b29ea28db
--- /dev/null
+++ b/tools/verification/dot2/dot2k_templates/Kconfig
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+config RV_MON_%%MODEL_NAME_UP%%
+ depends on RV
+%%MONITOR_DEPS%%
+ select %%MONITOR_CLASS_TYPE%%
+ bool "%%MODEL_NAME%% monitor"
+ help
+ %%DESCRIPTION%%
diff --git a/tools/verification/dot2/dot2k_templates/main.c b/tools/verification/dot2/dot2k_templates/main.c
new file mode 100644
index 000000000000..83044a20c89a
--- /dev/null
+++ b/tools/verification/dot2/dot2k_templates/main.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+#include <rv/instrumentation.h>
+#include <rv/da_monitor.h>
+
+#define MODULE_NAME "%%MODEL_NAME%%"
+
+/*
+ * XXX: include required tracepoint headers, e.g.,
+ * #include <trace/events/sched.h>
+ */
+#include <rv_trace.h>
+%%INCLUDE_PARENT%%
+/*
+ * This is the self-generated part of the monitor. Generally, there is no need
+ * to touch this section.
+ */
+#include "%%MODEL_NAME%%.h"
+
+/*
+ * Declare the deterministic automata monitor.
+ *
+ * The rv monitor reference is needed for the monitor declaration.
+ */
+static struct rv_monitor rv_%%MODEL_NAME%%;
+DECLARE_DA_MON_%%MONITOR_TYPE%%(%%MODEL_NAME%%, %%MIN_TYPE%%);
+
+/*
+ * This is the instrumentation part of the monitor.
+ *
+ * This is the section where manual work is required. Here the kernel events
+ * are translated into model's event.
+ *
+ */
+%%TRACEPOINT_HANDLERS_SKEL%%
+static int enable_%%MODEL_NAME%%(void)
+{
+ int retval;
+
+ retval = da_monitor_init_%%MODEL_NAME%%();
+ if (retval)
+ return retval;
+
+%%TRACEPOINT_ATTACH%%
+
+ return 0;
+}
+
+static void disable_%%MODEL_NAME%%(void)
+{
+ rv_%%MODEL_NAME%%.enabled = 0;
+
+%%TRACEPOINT_DETACH%%
+
+ da_monitor_destroy_%%MODEL_NAME%%();
+}
+
+/*
+ * This is the monitor register section.
+ */
+static struct rv_monitor rv_%%MODEL_NAME%% = {
+ .name = "%%MODEL_NAME%%",
+ .description = "%%DESCRIPTION%%",
+ .enable = enable_%%MODEL_NAME%%,
+ .disable = disable_%%MODEL_NAME%%,
+ .reset = da_monitor_reset_all_%%MODEL_NAME%%,
+ .enabled = 0,
+};
+
+static int __init register_%%MODEL_NAME%%(void)
+{
+ rv_register_monitor(&rv_%%MODEL_NAME%%, %%PARENT%%);
+ return 0;
+}
+
+static void __exit unregister_%%MODEL_NAME%%(void)
+{
+ rv_unregister_monitor(&rv_%%MODEL_NAME%%);
+}
+
+module_init(register_%%MODEL_NAME%%);
+module_exit(unregister_%%MODEL_NAME%%);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("dot2k: auto-generated");
+MODULE_DESCRIPTION("%%MODEL_NAME%%: %%DESCRIPTION%%");
diff --git a/tools/verification/dot2/dot2k_templates/main_container.c b/tools/verification/dot2/dot2k_templates/main_container.c
new file mode 100644
index 000000000000..89fc17cf8958
--- /dev/null
+++ b/tools/verification/dot2/dot2k_templates/main_container.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+
+#define MODULE_NAME "%%MODEL_NAME%%"
+
+#include "%%MODEL_NAME%%.h"
+
+struct rv_monitor rv_%%MODEL_NAME%%;
+
+struct rv_monitor rv_%%MODEL_NAME%% = {
+ .name = "%%MODEL_NAME%%",
+ .description = "%%DESCRIPTION%%",
+ .enable = NULL,
+ .disable = NULL,
+ .reset = NULL,
+ .enabled = 0,
+};
+
+static int __init register_%%MODEL_NAME%%(void)
+{
+ rv_register_monitor(&rv_%%MODEL_NAME%%, NULL);
+ return 0;
+}
+
+static void __exit unregister_%%MODEL_NAME%%(void)
+{
+ rv_unregister_monitor(&rv_%%MODEL_NAME%%);
+}
+
+module_init(register_%%MODEL_NAME%%);
+module_exit(unregister_%%MODEL_NAME%%);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("dot2k: auto-generated");
+MODULE_DESCRIPTION("%%MODEL_NAME%%: %%DESCRIPTION%%");
diff --git a/tools/verification/dot2/dot2k_templates/main_container.h b/tools/verification/dot2/dot2k_templates/main_container.h
new file mode 100644
index 000000000000..0f6883ab4bcc
--- /dev/null
+++ b/tools/verification/dot2/dot2k_templates/main_container.h
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+extern struct rv_monitor rv_%%MODEL_NAME%%;
diff --git a/tools/verification/dot2/dot2k_templates/main_global.c b/tools/verification/dot2/dot2k_templates/main_global.c
deleted file mode 100644
index a5658bfb9044..000000000000
--- a/tools/verification/dot2/dot2k_templates/main_global.c
+++ /dev/null
@@ -1,91 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/ftrace.h>
-#include <linux/tracepoint.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/rv.h>
-#include <rv/instrumentation.h>
-#include <rv/da_monitor.h>
-
-#define MODULE_NAME "MODEL_NAME"
-
-/*
- * XXX: include required tracepoint headers, e.g.,
- * #include <trace/events/sched.h>
- */
-#include <trace/events/rv.h>
-
-/*
- * This is the self-generated part of the monitor. Generally, there is no need
- * to touch this section.
- */
-#include "MODEL_NAME.h"
-
-/*
- * Declare the deterministic automata monitor.
- *
- * The rv monitor reference is needed for the monitor declaration.
- */
-static struct rv_monitor rv_MODEL_NAME;
-DECLARE_DA_MON_GLOBAL(MODEL_NAME, MIN_TYPE);
-
-/*
- * This is the instrumentation part of the monitor.
- *
- * This is the section where manual work is required. Here the kernel events
- * are translated into model's event.
- *
- */
-TRACEPOINT_HANDLERS_SKEL
-static int enable_MODEL_NAME(void)
-{
- int retval;
-
- retval = da_monitor_init_MODEL_NAME();
- if (retval)
- return retval;
-
-TRACEPOINT_ATTACH
-
- return 0;
-}
-
-static void disable_MODEL_NAME(void)
-{
- rv_MODEL_NAME.enabled = 0;
-
-TRACEPOINT_DETACH
-
- da_monitor_destroy_MODEL_NAME();
-}
-
-/*
- * This is the monitor register section.
- */
-static struct rv_monitor rv_MODEL_NAME = {
- .name = "MODEL_NAME",
- .description = "auto-generated MODEL_NAME",
- .enable = enable_MODEL_NAME,
- .disable = disable_MODEL_NAME,
- .reset = da_monitor_reset_all_MODEL_NAME,
- .enabled = 0,
-};
-
-static int __init register_MODEL_NAME(void)
-{
- rv_register_monitor(&rv_MODEL_NAME);
- return 0;
-}
-
-static void __exit unregister_MODEL_NAME(void)
-{
- rv_unregister_monitor(&rv_MODEL_NAME);
-}
-
-module_init(register_MODEL_NAME);
-module_exit(unregister_MODEL_NAME);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("dot2k: auto-generated");
-MODULE_DESCRIPTION("MODEL_NAME");
diff --git a/tools/verification/dot2/dot2k_templates/main_per_cpu.c b/tools/verification/dot2/dot2k_templates/main_per_cpu.c
deleted file mode 100644
index 03539a97633f..000000000000
--- a/tools/verification/dot2/dot2k_templates/main_per_cpu.c
+++ /dev/null
@@ -1,91 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/ftrace.h>
-#include <linux/tracepoint.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/rv.h>
-#include <rv/instrumentation.h>
-#include <rv/da_monitor.h>
-
-#define MODULE_NAME "MODEL_NAME"
-
-/*
- * XXX: include required tracepoint headers, e.g.,
- * #include <linux/trace/events/sched.h>
- */
-#include <trace/events/rv.h>
-
-/*
- * This is the self-generated part of the monitor. Generally, there is no need
- * to touch this section.
- */
-#include "MODEL_NAME.h"
-
-/*
- * Declare the deterministic automata monitor.
- *
- * The rv monitor reference is needed for the monitor declaration.
- */
-static struct rv_monitor rv_MODEL_NAME;
-DECLARE_DA_MON_PER_CPU(MODEL_NAME, MIN_TYPE);
-
-/*
- * This is the instrumentation part of the monitor.
- *
- * This is the section where manual work is required. Here the kernel events
- * are translated into model's event.
- *
- */
-TRACEPOINT_HANDLERS_SKEL
-static int enable_MODEL_NAME(void)
-{
- int retval;
-
- retval = da_monitor_init_MODEL_NAME();
- if (retval)
- return retval;
-
-TRACEPOINT_ATTACH
-
- return 0;
-}
-
-static void disable_MODEL_NAME(void)
-{
- rv_MODEL_NAME.enabled = 0;
-
-TRACEPOINT_DETACH
-
- da_monitor_destroy_MODEL_NAME();
-}
-
-/*
- * This is the monitor register section.
- */
-static struct rv_monitor rv_MODEL_NAME = {
- .name = "MODEL_NAME",
- .description = "auto-generated MODEL_NAME",
- .enable = enable_MODEL_NAME,
- .disable = disable_MODEL_NAME,
- .reset = da_monitor_reset_all_MODEL_NAME,
- .enabled = 0,
-};
-
-static int __init register_MODEL_NAME(void)
-{
- rv_register_monitor(&rv_MODEL_NAME);
- return 0;
-}
-
-static void __exit unregister_MODEL_NAME(void)
-{
- rv_unregister_monitor(&rv_MODEL_NAME);
-}
-
-module_init(register_MODEL_NAME);
-module_exit(unregister_MODEL_NAME);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("dot2k: auto-generated");
-MODULE_DESCRIPTION("MODEL_NAME");
diff --git a/tools/verification/dot2/dot2k_templates/main_per_task.c b/tools/verification/dot2/dot2k_templates/main_per_task.c
deleted file mode 100644
index ffd92af87a86..000000000000
--- a/tools/verification/dot2/dot2k_templates/main_per_task.c
+++ /dev/null
@@ -1,91 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/ftrace.h>
-#include <linux/tracepoint.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/rv.h>
-#include <rv/instrumentation.h>
-#include <rv/da_monitor.h>
-
-#define MODULE_NAME "MODEL_NAME"
-
-/*
- * XXX: include required tracepoint headers, e.g.,
- * #include <linux/trace/events/sched.h>
- */
-#include <trace/events/rv.h>
-
-/*
- * This is the self-generated part of the monitor. Generally, there is no need
- * to touch this section.
- */
-#include "MODEL_NAME.h"
-
-/*
- * Declare the deterministic automata monitor.
- *
- * The rv monitor reference is needed for the monitor declaration.
- */
-static struct rv_monitor rv_MODEL_NAME;
-DECLARE_DA_MON_PER_TASK(MODEL_NAME, MIN_TYPE);
-
-/*
- * This is the instrumentation part of the monitor.
- *
- * This is the section where manual work is required. Here the kernel events
- * are translated into model's event.
- *
- */
-TRACEPOINT_HANDLERS_SKEL
-static int enable_MODEL_NAME(void)
-{
- int retval;
-
- retval = da_monitor_init_MODEL_NAME();
- if (retval)
- return retval;
-
-TRACEPOINT_ATTACH
-
- return 0;
-}
-
-static void disable_MODEL_NAME(void)
-{
- rv_MODEL_NAME.enabled = 0;
-
-TRACEPOINT_DETACH
-
- da_monitor_destroy_MODEL_NAME();
-}
-
-/*
- * This is the monitor register section.
- */
-static struct rv_monitor rv_MODEL_NAME = {
- .name = "MODEL_NAME",
- .description = "auto-generated MODEL_NAME",
- .enable = enable_MODEL_NAME,
- .disable = disable_MODEL_NAME,
- .reset = da_monitor_reset_all_MODEL_NAME,
- .enabled = 0,
-};
-
-static int __init register_MODEL_NAME(void)
-{
- rv_register_monitor(&rv_MODEL_NAME);
- return 0;
-}
-
-static void __exit unregister_MODEL_NAME(void)
-{
- rv_unregister_monitor(&rv_MODEL_NAME);
-}
-
-module_init(register_MODEL_NAME);
-module_exit(unregister_MODEL_NAME);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("dot2k: auto-generated");
-MODULE_DESCRIPTION("MODEL_NAME");
diff --git a/tools/verification/dot2/dot2k_templates/trace.h b/tools/verification/dot2/dot2k_templates/trace.h
new file mode 100644
index 000000000000..87d3a1308926
--- /dev/null
+++ b/tools/verification/dot2/dot2k_templates/trace.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Snippet to be included in rv_trace.h
+ */
+
+#ifdef CONFIG_RV_MON_%%MODEL_NAME_UP%%
+DEFINE_EVENT(event_%%MONITOR_CLASS%%, event_%%MODEL_NAME%%,
+%%TRACEPOINT_ARGS_SKEL_EVENT%%);
+
+DEFINE_EVENT(error_%%MONITOR_CLASS%%, error_%%MODEL_NAME%%,
+%%TRACEPOINT_ARGS_SKEL_ERROR%%);
+#endif /* CONFIG_RV_MON_%%MODEL_NAME_UP%% */
diff --git a/tools/verification/models/sched/sco.dot b/tools/verification/models/sched/sco.dot
new file mode 100644
index 000000000000..20b0e3b449a6
--- /dev/null
+++ b/tools/verification/models/sched/sco.dot
@@ -0,0 +1,18 @@
+digraph state_automaton {
+ center = true;
+ size = "7,11";
+ {node [shape = plaintext] "scheduling_context"};
+ {node [shape = plaintext, style=invis, label=""] "__init_thread_context"};
+ {node [shape = ellipse] "thread_context"};
+ {node [shape = plaintext] "thread_context"};
+ "__init_thread_context" -> "thread_context";
+ "scheduling_context" [label = "scheduling_context"];
+ "scheduling_context" -> "thread_context" [ label = "schedule_exit" ];
+ "thread_context" [label = "thread_context", color = green3];
+ "thread_context" -> "scheduling_context" [ label = "schedule_entry" ];
+ "thread_context" -> "thread_context" [ label = "sched_set_state" ];
+ { rank = min ;
+ "__init_thread_context";
+ "thread_context";
+ }
+}
diff --git a/tools/verification/models/sched/scpd.dot b/tools/verification/models/sched/scpd.dot
new file mode 100644
index 000000000000..340413896765
--- /dev/null
+++ b/tools/verification/models/sched/scpd.dot
@@ -0,0 +1,18 @@
+digraph state_automaton {
+ center = true;
+ size = "7,11";
+ {node [shape = plaintext] "can_sched"};
+ {node [shape = plaintext, style=invis, label=""] "__init_cant_sched"};
+ {node [shape = ellipse] "cant_sched"};
+ {node [shape = plaintext] "cant_sched"};
+ "__init_cant_sched" -> "cant_sched";
+ "can_sched" [label = "can_sched"];
+ "can_sched" -> "can_sched" [ label = "schedule_entry\nschedule_exit" ];
+ "can_sched" -> "cant_sched" [ label = "preempt_enable" ];
+ "cant_sched" [label = "cant_sched", color = green3];
+ "cant_sched" -> "can_sched" [ label = "preempt_disable" ];
+ { rank = min ;
+ "__init_cant_sched";
+ "cant_sched";
+ }
+}
diff --git a/tools/verification/models/sched/sncid.dot b/tools/verification/models/sched/sncid.dot
new file mode 100644
index 000000000000..072851721b50
--- /dev/null
+++ b/tools/verification/models/sched/sncid.dot
@@ -0,0 +1,18 @@
+digraph state_automaton {
+ center = true;
+ size = "7,11";
+ {node [shape = plaintext, style=invis, label=""] "__init_can_sched"};
+ {node [shape = ellipse] "can_sched"};
+ {node [shape = plaintext] "can_sched"};
+ {node [shape = plaintext] "cant_sched"};
+ "__init_can_sched" -> "can_sched";
+ "can_sched" [label = "can_sched", color = green3];
+ "can_sched" -> "can_sched" [ label = "schedule_entry\nschedule_exit" ];
+ "can_sched" -> "cant_sched" [ label = "irq_disable" ];
+ "cant_sched" [label = "cant_sched"];
+ "cant_sched" -> "can_sched" [ label = "irq_enable" ];
+ { rank = min ;
+ "__init_can_sched";
+ "can_sched";
+ }
+}
diff --git a/tools/verification/models/sched/snep.dot b/tools/verification/models/sched/snep.dot
new file mode 100644
index 000000000000..fe1300e93f21
--- /dev/null
+++ b/tools/verification/models/sched/snep.dot
@@ -0,0 +1,18 @@
+digraph state_automaton {
+ center = true;
+ size = "7,11";
+ {node [shape = plaintext, style=invis, label=""] "__init_non_scheduling_context"};
+ {node [shape = ellipse] "non_scheduling_context"};
+ {node [shape = plaintext] "non_scheduling_context"};
+ {node [shape = plaintext] "scheduling_contex"};
+ "__init_non_scheduling_context" -> "non_scheduling_context";
+ "non_scheduling_context" [label = "non_scheduling_context", color = green3];
+ "non_scheduling_context" -> "non_scheduling_context" [ label = "preempt_disable\npreempt_enable" ];
+ "non_scheduling_context" -> "scheduling_contex" [ label = "schedule_entry" ];
+ "scheduling_contex" [label = "scheduling_contex"];
+ "scheduling_contex" -> "non_scheduling_context" [ label = "schedule_exit" ];
+ { rank = min ;
+ "__init_non_scheduling_context";
+ "non_scheduling_context";
+ }
+}
diff --git a/tools/verification/models/sched/snroc.dot b/tools/verification/models/sched/snroc.dot
new file mode 100644
index 000000000000..8b71c32d4dca
--- /dev/null
+++ b/tools/verification/models/sched/snroc.dot
@@ -0,0 +1,18 @@
+digraph state_automaton {
+ center = true;
+ size = "7,11";
+ {node [shape = plaintext, style=invis, label=""] "__init_other_context"};
+ {node [shape = ellipse] "other_context"};
+ {node [shape = plaintext] "other_context"};
+ {node [shape = plaintext] "own_context"};
+ "__init_other_context" -> "other_context";
+ "other_context" [label = "other_context", color = green3];
+ "other_context" -> "own_context" [ label = "sched_switch_in" ];
+ "own_context" [label = "own_context"];
+ "own_context" -> "other_context" [ label = "sched_switch_out" ];
+ "own_context" -> "own_context" [ label = "sched_set_state" ];
+ { rank = min ;
+ "__init_other_context";
+ "other_context";
+ }
+}
diff --git a/tools/verification/models/sched/tss.dot b/tools/verification/models/sched/tss.dot
new file mode 100644
index 000000000000..7dfa1d9121bb
--- /dev/null
+++ b/tools/verification/models/sched/tss.dot
@@ -0,0 +1,18 @@
+digraph state_automaton {
+ center = true;
+ size = "7,11";
+ {node [shape = plaintext] "sched"};
+ {node [shape = plaintext, style=invis, label=""] "__init_thread"};
+ {node [shape = ellipse] "thread"};
+ {node [shape = plaintext] "thread"};
+ "__init_thread" -> "thread";
+ "sched" [label = "sched"];
+ "sched" -> "sched" [ label = "sched_switch" ];
+ "sched" -> "thread" [ label = "schedule_exit" ];
+ "thread" [label = "thread", color = green3];
+ "thread" -> "sched" [ label = "schedule_entry" ];
+ { rank = min ;
+ "__init_thread";
+ "thread";
+ }
+}
diff --git a/tools/verification/rv/Makefile b/tools/verification/rv/Makefile
index 411d62b3d8eb..5b898360ba48 100644
--- a/tools/verification/rv/Makefile
+++ b/tools/verification/rv/Makefile
@@ -35,12 +35,6 @@ FEATURE_TESTS += libtracefs
FEATURE_DISPLAY := libtraceevent
FEATURE_DISPLAY += libtracefs
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
all: $(RV)
include $(srctree)/tools/build/Makefile.include
diff --git a/tools/verification/rv/Makefile.rv b/tools/verification/rv/Makefile.rv
index 161baa29eb86..2497fb96c83d 100644
--- a/tools/verification/rv/Makefile.rv
+++ b/tools/verification/rv/Makefile.rv
@@ -27,7 +27,7 @@ endif
INCLUDE := -Iinclude/
CFLAGS := -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(WOPTS) $(EXTRA_CFLAGS) $(INCLUDE)
-LDFLAGS := -ggdb $(EXTRA_LDFLAGS)
+LDFLAGS := -ggdb $(LDFLAGS) $(EXTRA_LDFLAGS)
INSTALL := install
MKDIR := mkdir
diff --git a/tools/verification/rv/include/in_kernel.h b/tools/verification/rv/include/in_kernel.h
index 3090638c8d71..f3bfd3b9895f 100644
--- a/tools/verification/rv/include/in_kernel.h
+++ b/tools/verification/rv/include/in_kernel.h
@@ -1,3 +1,3 @@
// SPDX-License-Identifier: GPL-2.0
-int ikm_list_monitors(void);
+int ikm_list_monitors(char *container);
int ikm_run_monitor(char *monitor, int argc, char **argv);
diff --git a/tools/verification/rv/include/rv.h b/tools/verification/rv/include/rv.h
index 770fd6da3610..6f668eb266cb 100644
--- a/tools/verification/rv/include/rv.h
+++ b/tools/verification/rv/include/rv.h
@@ -1,12 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
#define MAX_DESCRIPTION 1024
-#define MAX_DA_NAME_LEN 24
+#define MAX_DA_NAME_LEN 32
struct monitor {
char name[MAX_DA_NAME_LEN];
char desc[MAX_DESCRIPTION];
int enabled;
+ int nested;
};
int should_stop(void);
diff --git a/tools/verification/rv/src/in_kernel.c b/tools/verification/rv/src/in_kernel.c
index f2bbc75a76f4..c0dcee795c0d 100644
--- a/tools/verification/rv/src/in_kernel.c
+++ b/tools/verification/rv/src/in_kernel.c
@@ -6,15 +6,18 @@
*/
#include <getopt.h>
#include <stdlib.h>
+#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
+#include <dirent.h>
#include <trace.h>
#include <utils.h>
#include <rv.h>
static int config_has_id;
+static int config_is_container;
static int config_my_pid;
static int config_trace;
@@ -45,6 +48,51 @@ static int __ikm_read_enable(char *monitor_name)
}
/*
+ * __ikm_find_monitor - find the full name of a possibly nested module
+ *
+ * __does not log errors.
+ *
+ * Returns 1 if we found the monitor, -1 on error and 0 if it does not exist.
+ * The string out_name is populated with the full name, which can be
+ * equal to monitor_name or container/monitor_name if nested
+ */
+static int __ikm_find_monitor_name(char *monitor_name, char *out_name)
+{
+ char *available_monitors, container[MAX_DA_NAME_LEN+1], *cursor, *end;
+ int retval = 1;
+
+ available_monitors = tracefs_instance_file_read(NULL, "rv/available_monitors", NULL);
+ if (!available_monitors)
+ return -1;
+
+ cursor = strstr(available_monitors, monitor_name);
+ if (!cursor) {
+ retval = 0;
+ goto out_free;
+ }
+
+ for (; cursor > available_monitors; cursor--)
+ if (*(cursor-1) == '\n')
+ break;
+ end = strstr(cursor, "\n");
+ memcpy(out_name, cursor, end-cursor);
+ out_name[end-cursor] = '\0';
+
+ cursor = strstr(out_name, ":");
+ if (cursor)
+ *cursor = '/';
+ else {
+ sprintf(container, "%s:", monitor_name);
+ if (strstr(available_monitors, container))
+ config_is_container = 1;
+ }
+
+out_free:
+ free(available_monitors);
+ return retval;
+}
+
+/*
* ikm_read_enable - reads monitor's enable status
*
* Returns the current status, or -1 on error.
@@ -132,12 +180,28 @@ static char *ikm_read_desc(char *monitor_name)
/*
* ikm_fill_monitor_definition - fill monitor's definition
*
- * Returns -1 on error, 0 otherwise.
+ * Returns -1 on error, 1 if the monitor does not belong in the container, 0 otherwise.
+ * container can be NULL
*/
-static int ikm_fill_monitor_definition(char *name, struct monitor *ikm)
+static int ikm_fill_monitor_definition(char *name, struct monitor *ikm, char *container)
{
int enabled;
- char *desc;
+ char *desc, *nested_name;
+
+ nested_name = strstr(name, ":");
+ if (nested_name) {
+ /* it belongs in container if it starts with "container:" */
+ if (container && strstr(name, container) != name)
+ return 1;
+ *nested_name = '/';
+ ++nested_name;
+ ikm->nested = 1;
+ } else {
+ if (container)
+ return 1;
+ nested_name = name;
+ ikm->nested = 0;
+ }
enabled = ikm_read_enable(name);
if (enabled < 0) {
@@ -151,7 +215,7 @@ static int ikm_fill_monitor_definition(char *name, struct monitor *ikm)
return -1;
}
- strncpy(ikm->name, name, MAX_DA_NAME_LEN);
+ strncpy(ikm->name, nested_name, MAX_DA_NAME_LEN);
ikm->enabled = enabled;
strncpy(ikm->desc, desc, MAX_DESCRIPTION);
@@ -270,12 +334,12 @@ static int ikm_has_id(char *monitor_name)
*
* Returns 0 on success, -1 otherwise.
*/
-int ikm_list_monitors(void)
+int ikm_list_monitors(char *container)
{
char *available_monitors;
- struct monitor ikm;
+ struct monitor ikm = {0};
char *curr, *next;
- int retval;
+ int retval, list_monitor = 0;
available_monitors = tracefs_instance_file_read(NULL, "rv/available_monitors", NULL);
@@ -289,15 +353,29 @@ int ikm_list_monitors(void)
next = strstr(curr, "\n");
*next = '\0';
- retval = ikm_fill_monitor_definition(curr, &ikm);
- if (retval)
+ retval = ikm_fill_monitor_definition(curr, &ikm, container);
+ if (retval < 0)
err_msg("ikm: error reading %d in kernel monitor, skipping\n", curr);
- printf("%-24s %s %s\n", ikm.name, ikm.desc, ikm.enabled ? "[ON]" : "[OFF]");
+ if (!retval) {
+ int indent = ikm.nested && !container;
+
+ list_monitor = 1;
+ printf("%s%-*s %s %s\n", indent ? " - " : "",
+ indent ? MAX_DA_NAME_LEN - 3 : MAX_DA_NAME_LEN,
+ ikm.name, ikm.desc, ikm.enabled ? "[ON]" : "[OFF]");
+ }
curr = ++next;
} while (strlen(curr));
+ if (!list_monitor) {
+ if (container)
+ printf("-- No monitor found in container %s --\n", container);
+ else
+ printf("-- No monitor found --\n");
+ }
+
free(available_monitors);
return 0;
@@ -343,11 +421,11 @@ ikm_event_handler(struct trace_seq *s, struct tep_record *record,
unsigned long long final_state;
unsigned long long pid;
unsigned long long id;
- int cpu = record->cpu;
int val;
+ bool missing_id;
if (config_has_id)
- tep_get_field_val(s, trace_event, "id", record, &id, 1);
+ missing_id = tep_get_field_val(s, trace_event, "id", record, &id, 1);
tep_get_common_field_val(s, trace_event, "common_pid", record, &pid, 1);
@@ -356,12 +434,21 @@ ikm_event_handler(struct trace_seq *s, struct tep_record *record,
else if (config_my_pid && (config_my_pid == pid))
return 0;
- tep_print_event(trace_event->tep, s, record, "%16s-%-8d ", TEP_PRINT_COMM, TEP_PRINT_PID);
+ tep_print_event(trace_event->tep, s, record, "%16s-%-8d [%.3d] ",
+ TEP_PRINT_COMM, TEP_PRINT_PID, TEP_PRINT_CPU);
- trace_seq_printf(s, "[%.3d] event ", cpu);
+ if (config_is_container)
+ tep_print_event(trace_event->tep, s, record, "%s ", TEP_PRINT_NAME);
+ else
+ trace_seq_printf(s, "event ");
- if (config_has_id)
- trace_seq_printf(s, "%8llu ", id);
+ if (config_has_id) {
+ if (missing_id)
+ /* placeholder if we are dealing with a mixed-type container*/
+ trace_seq_printf(s, " ");
+ else
+ trace_seq_printf(s, "%8llu ", id);
+ }
state = tep_get_field_raw(s, trace_event, "state", record, &val, 0);
event = tep_get_field_raw(s, trace_event, "event", record, &val, 0);
@@ -394,9 +481,10 @@ ikm_error_handler(struct trace_seq *s, struct tep_record *record,
int cpu = record->cpu;
char *state, *event;
int val;
+ bool missing_id;
if (config_has_id)
- tep_get_field_val(s, trace_event, "id", record, &id, 1);
+ missing_id = tep_get_field_val(s, trace_event, "id", record, &id, 1);
tep_get_common_field_val(s, trace_event, "common_pid", record, &pid, 1);
@@ -405,10 +493,20 @@ ikm_error_handler(struct trace_seq *s, struct tep_record *record,
else if (config_my_pid == pid)
return 0;
- trace_seq_printf(s, "%8lld [%03d] error ", pid, cpu);
+ trace_seq_printf(s, "%8lld [%03d] ", pid, cpu);
- if (config_has_id)
- trace_seq_printf(s, "%8llu ", id);
+ if (config_is_container)
+ tep_print_event(trace_event->tep, s, record, "%s ", TEP_PRINT_NAME);
+ else
+ trace_seq_printf(s, "error ");
+
+ if (config_has_id) {
+ if (missing_id)
+ /* placeholder if we are dealing with a mixed-type container*/
+ trace_seq_printf(s, " ");
+ else
+ trace_seq_printf(s, "%8llu ", id);
+ }
state = tep_get_field_raw(s, trace_event, "state", record, &val, 0);
event = tep_get_field_raw(s, trace_event, "event", record, &val, 0);
@@ -421,6 +519,64 @@ ikm_error_handler(struct trace_seq *s, struct tep_record *record,
return 0;
}
+static int ikm_enable_trace_events(char *monitor_name, struct trace_instance *inst)
+{
+ char event[MAX_DA_NAME_LEN + 7]; /* max(error_,event_) + '0' = 7 */
+ int retval;
+
+ snprintf(event, sizeof(event), "event_%s", monitor_name);
+ retval = tracefs_event_enable(inst->inst, "rv", event);
+ if (retval)
+ return -1;
+
+ tep_register_event_handler(inst->tep, -1, "rv", event,
+ ikm_event_handler, NULL);
+
+ snprintf(event, sizeof(event), "error_%s", monitor_name);
+ retval = tracefs_event_enable(inst->inst, "rv", event);
+ if (retval)
+ return -1;
+
+ tep_register_event_handler(inst->tep, -1, "rv", event,
+ ikm_error_handler, NULL);
+
+ /* set if at least 1 monitor has id in case of a container */
+ config_has_id = ikm_has_id(monitor_name);
+ if (config_has_id < 0)
+ return -1;
+
+
+ return 0;
+}
+
+static int ikm_enable_trace_container(char *monitor_name,
+ struct trace_instance *inst)
+{
+ DIR *dp;
+ char *abs_path, rv_path[MAX_PATH];
+ struct dirent *ep;
+ int retval = 0;
+
+ snprintf(rv_path, MAX_PATH, "rv/monitors/%s", monitor_name);
+ abs_path = tracefs_instance_get_file(NULL, rv_path);
+ if (!abs_path)
+ return -1;
+ dp = opendir(abs_path);
+ if (!dp)
+ goto out_free;
+
+ while (!retval && (ep = readdir(dp))) {
+ if (ep->d_type != DT_DIR || ep->d_name[0] == '.')
+ continue;
+ retval = ikm_enable_trace_events(ep->d_name, inst);
+ }
+
+ closedir(dp);
+out_free:
+ free(abs_path);
+ return retval;
+}
+
/*
* ikm_setup_trace_instance - set up a tracing instance to collect data
*
@@ -430,19 +586,12 @@ ikm_error_handler(struct trace_seq *s, struct tep_record *record,
*/
static struct trace_instance *ikm_setup_trace_instance(char *monitor_name)
{
- char event[MAX_DA_NAME_LEN + 7]; /* max(error_,event_) + '0' = 7 */
struct trace_instance *inst;
int retval;
if (!config_trace)
return NULL;
- config_has_id = ikm_has_id(monitor_name);
- if (config_has_id < 0) {
- err_msg("ikm: failed to read monitor %s event format\n", monitor_name);
- goto out_err;
- }
-
/* alloc data */
inst = calloc(1, sizeof(*inst));
if (!inst) {
@@ -454,23 +603,13 @@ static struct trace_instance *ikm_setup_trace_instance(char *monitor_name)
if (retval)
goto out_free;
- /* enable events */
- snprintf(event, sizeof(event), "event_%s", monitor_name);
- retval = tracefs_event_enable(inst->inst, "rv", event);
+ if (config_is_container)
+ retval = ikm_enable_trace_container(monitor_name, inst);
+ else
+ retval = ikm_enable_trace_events(monitor_name, inst);
if (retval)
goto out_inst;
- tep_register_event_handler(inst->tep, -1, "rv", event,
- ikm_event_handler, NULL);
-
- snprintf(event, sizeof(event), "error_%s", monitor_name);
- retval = tracefs_event_enable(inst->inst, "rv", event);
- if (retval)
- goto out_inst;
-
- tep_register_event_handler(inst->tep, -1, "rv", event,
- ikm_error_handler, NULL);
-
/* ready to enable */
tracefs_trace_on(inst->inst);
@@ -633,32 +772,41 @@ static int parse_arguments(char *monitor_name, int argc, char **argv)
int ikm_run_monitor(char *monitor_name, int argc, char **argv)
{
struct trace_instance *inst = NULL;
+ char *nested_name, full_name[2*MAX_DA_NAME_LEN];
int retval;
- /*
- * Check if monitor exists by seeing it is enabled.
- */
- retval = __ikm_read_enable(monitor_name);
- if (retval < 0)
+ nested_name = strstr(monitor_name, ":");
+ if (nested_name)
+ ++nested_name;
+ else
+ nested_name = monitor_name;
+
+ retval = __ikm_find_monitor_name(monitor_name, full_name);
+ if (!retval)
return 0;
+ if (retval < 0) {
+ err_msg("ikm: error finding monitor %s\n", nested_name);
+ return -1;
+ }
+ retval = __ikm_read_enable(full_name);
if (retval) {
- err_msg("ikm: monitor %s (in-kernel) is already enabled\n", monitor_name);
+ err_msg("ikm: monitor %s (in-kernel) is already enabled\n", nested_name);
return -1;
}
/* we should be good to go */
- retval = parse_arguments(monitor_name, argc, argv);
+ retval = parse_arguments(full_name, argc, argv);
if (retval)
- ikm_usage(1, monitor_name, "ikm: failed parsing arguments");
+ ikm_usage(1, nested_name, "ikm: failed parsing arguments");
if (config_trace) {
- inst = ikm_setup_trace_instance(monitor_name);
+ inst = ikm_setup_trace_instance(nested_name);
if (!inst)
return -1;
}
- retval = ikm_enable(monitor_name);
+ retval = ikm_enable(full_name);
if (retval < 0)
goto out_free_instance;
@@ -682,17 +830,17 @@ int ikm_run_monitor(char *monitor_name, int argc, char **argv)
sleep(1);
}
- ikm_disable(monitor_name);
+ ikm_disable(full_name);
ikm_destroy_trace_instance(inst);
if (config_reactor && config_initial_reactor)
- ikm_write_reactor(monitor_name, config_initial_reactor);
+ ikm_write_reactor(full_name, config_initial_reactor);
return 1;
out_free_instance:
ikm_destroy_trace_instance(inst);
if (config_reactor && config_initial_reactor)
- ikm_write_reactor(monitor_name, config_initial_reactor);
+ ikm_write_reactor(full_name, config_initial_reactor);
return -1;
}
diff --git a/tools/verification/rv/src/rv.c b/tools/verification/rv/src/rv.c
index 1ddb85532816..239de054d1e0 100644
--- a/tools/verification/rv/src/rv.c
+++ b/tools/verification/rv/src/rv.c
@@ -41,30 +41,42 @@ static void rv_list(int argc, char **argv)
{
static const char *const usage[] = {
"",
- " usage: rv list [-h]",
+ " usage: rv list [-h] [container]",
"",
" list all available monitors",
"",
" -h/--help: print this menu",
+ "",
+ " [container]: list only monitors in this container",
NULL,
};
- int i;
-
- if (argc > 1) {
+ int i, print_help = 0, retval = 0;
+ char *container = NULL;
+
+ if (argc == 2) {
+ if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
+ print_help = 1;
+ retval = 0;
+ } else if (argv[1][0] == '-') {
+ /* assume invalid option */
+ print_help = 1;
+ retval = 1;
+ } else
+ container = argv[1];
+ } else if (argc > 2) {
+ /* more than 2 is always usage */
+ print_help = 1;
+ retval = 1;
+ }
+ if (print_help) {
fprintf(stderr, "rv version %s\n", VERSION);
-
- /* more than 1 is always usage */
for (i = 0; usage[i]; i++)
fprintf(stderr, "%s\n", usage[i]);
-
- /* but only -h is valid */
- if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))
- exit(0);
- else
- exit(1);
+ exit(retval);
}
- ikm_list_monitors();
+ ikm_list_monitors(container);
+
exit(0);
}