diff options
Diffstat (limited to 'tools/testing')
403 files changed, 21650 insertions, 5101 deletions
diff --git a/tools/testing/crypto/chacha20-s390/test-cipher.c b/tools/testing/crypto/chacha20-s390/test-cipher.c index 8141d45df51a..35ea65c54ffa 100644 --- a/tools/testing/crypto/chacha20-s390/test-cipher.c +++ b/tools/testing/crypto/chacha20-s390/test-cipher.c @@ -66,7 +66,7 @@ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain) } /* Encrypt */ - chacha_init_arch(chacha_state, (u32*)key, iv); + chacha_init(chacha_state, (u32 *)key, iv); start = ktime_get_ns(); chacha_crypt_arch(chacha_state, cipher, plain, data_size, 20); @@ -81,7 +81,7 @@ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain) pr_info("lib encryption took: %lld nsec", end - start); /* Decrypt */ - chacha_init_arch(chacha_state, (u32 *)key, iv); + chacha_init(chacha_state, (u32 *)key, iv); start = ktime_get_ns(); chacha_crypt_arch(chacha_state, revert, cipher, data_size, 20); diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index b1256fee3567..0a6572ab6f37 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -63,6 +63,7 @@ cxl_core-y += $(CXL_CORE_SRC)/pmu.o cxl_core-y += $(CXL_CORE_SRC)/cdat.o cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o +cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o cxl_core-y += cxl_core_exports.o diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 8d731bd63988..9495dbcc03a7 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -45,6 +45,18 @@ static struct cxl_cel_entry mock_cel[] = { .effect = CXL_CMD_EFFECT_NONE, }, { + .opcode = cpu_to_le16(CXL_MBOX_OP_GET_SUPPORTED_FEATURES), + .effect = CXL_CMD_EFFECT_NONE, + }, + { + .opcode = cpu_to_le16(CXL_MBOX_OP_GET_FEATURE), + .effect = CXL_CMD_EFFECT_NONE, + }, + { + .opcode = cpu_to_le16(CXL_MBOX_OP_SET_FEATURE), + .effect = cpu_to_le16(EFFECT(CONF_CHANGE_IMMEDIATE)), + }, + { .opcode = cpu_to_le16(CXL_MBOX_OP_IDENTIFY), .effect = CXL_CMD_EFFECT_NONE, }, @@ -145,6 +157,10 @@ struct mock_event_store { u32 ev_status; }; +struct vendor_test_feat { + __le32 data; +} __packed; + struct cxl_mockmem_data { void *lsa; void *fw; @@ -161,6 +177,7 @@ struct cxl_mockmem_data { u8 event_buf[SZ_4K]; u64 timestamp; unsigned long sanitize_timeout; + struct vendor_test_feat test_feat; }; static struct mock_event_log *event_find_log(struct device *dev, int log_type) @@ -1354,6 +1371,151 @@ static int mock_activate_fw(struct cxl_mockmem_data *mdata, return -EINVAL; } +#define CXL_VENDOR_FEATURE_TEST \ + UUID_INIT(0xffffffff, 0xffff, 0xffff, 0xff, 0xff, 0xff, 0xff, 0xff, \ + 0xff, 0xff, 0xff) + +static void fill_feature_vendor_test(struct cxl_feat_entry *feat) +{ + feat->uuid = CXL_VENDOR_FEATURE_TEST; + feat->id = 0; + feat->get_feat_size = cpu_to_le16(0x4); + feat->set_feat_size = cpu_to_le16(0x4); + feat->flags = cpu_to_le32(CXL_FEATURE_F_CHANGEABLE | + CXL_FEATURE_F_DEFAULT_SEL | + CXL_FEATURE_F_SAVED_SEL); + feat->get_feat_ver = 1; + feat->set_feat_ver = 1; + feat->effects = cpu_to_le16(CXL_CMD_CONFIG_CHANGE_COLD_RESET | + CXL_CMD_EFFECTS_VALID); +} + +#define MAX_CXL_TEST_FEATS 1 + +static int mock_get_test_feature(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) +{ + struct vendor_test_feat *output = cmd->payload_out; + struct cxl_mbox_get_feat_in *input = cmd->payload_in; + u16 offset = le16_to_cpu(input->offset); + u16 count = le16_to_cpu(input->count); + u8 *ptr; + + if (offset > sizeof(*output)) { + cmd->return_code = CXL_MBOX_CMD_RC_INPUT; + return -EINVAL; + } + + if (offset + count > sizeof(*output)) { + cmd->return_code = CXL_MBOX_CMD_RC_INPUT; + return -EINVAL; + } + + ptr = (u8 *)&mdata->test_feat + offset; + memcpy((u8 *)output + offset, ptr, count); + + return 0; +} + +static int mock_get_feature(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_get_feat_in *input = cmd->payload_in; + + if (uuid_equal(&input->uuid, &CXL_VENDOR_FEATURE_TEST)) + return mock_get_test_feature(mdata, cmd); + + cmd->return_code = CXL_MBOX_CMD_RC_UNSUPPORTED; + + return -EOPNOTSUPP; +} + +static int mock_set_test_feature(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_set_feat_in *input = cmd->payload_in; + struct vendor_test_feat *test = + (struct vendor_test_feat *)input->feat_data; + u32 action; + + action = FIELD_GET(CXL_SET_FEAT_FLAG_DATA_TRANSFER_MASK, + le32_to_cpu(input->hdr.flags)); + /* + * While it is spec compliant to support other set actions, it is not + * necessary to add the complication in the emulation currently. Reject + * anything besides full xfer. + */ + if (action != CXL_SET_FEAT_FLAG_FULL_DATA_TRANSFER) { + cmd->return_code = CXL_MBOX_CMD_RC_INPUT; + return -EINVAL; + } + + /* Offset should be reserved when doing full transfer */ + if (input->hdr.offset) { + cmd->return_code = CXL_MBOX_CMD_RC_INPUT; + return -EINVAL; + } + + memcpy(&mdata->test_feat.data, &test->data, sizeof(u32)); + + return 0; +} + +static int mock_set_feature(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_set_feat_in *input = cmd->payload_in; + + if (uuid_equal(&input->hdr.uuid, &CXL_VENDOR_FEATURE_TEST)) + return mock_set_test_feature(mdata, cmd); + + cmd->return_code = CXL_MBOX_CMD_RC_UNSUPPORTED; + + return -EOPNOTSUPP; +} + +static int mock_get_supported_features(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_get_sup_feats_in *in = cmd->payload_in; + struct cxl_mbox_get_sup_feats_out *out = cmd->payload_out; + struct cxl_feat_entry *feat; + u16 start_idx, count; + + if (cmd->size_out < sizeof(*out)) { + cmd->return_code = CXL_MBOX_CMD_RC_PAYLOADLEN; + return -EINVAL; + } + + /* + * Current emulation only supports 1 feature + */ + start_idx = le16_to_cpu(in->start_idx); + if (start_idx != 0) { + cmd->return_code = CXL_MBOX_CMD_RC_INPUT; + return -EINVAL; + } + + count = le16_to_cpu(in->count); + if (count < struct_size(out, ents, 0)) { + cmd->return_code = CXL_MBOX_CMD_RC_PAYLOADLEN; + return -EINVAL; + } + + out->supported_feats = cpu_to_le16(MAX_CXL_TEST_FEATS); + cmd->return_code = 0; + if (count < struct_size(out, ents, MAX_CXL_TEST_FEATS)) { + out->num_entries = 0; + return 0; + } + + out->num_entries = cpu_to_le16(MAX_CXL_TEST_FEATS); + feat = out->ents; + fill_feature_vendor_test(feat); + + return 0; +} + static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox, struct cxl_mbox_cmd *cmd) { @@ -1439,6 +1601,15 @@ static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox, case CXL_MBOX_OP_ACTIVATE_FW: rc = mock_activate_fw(mdata, cmd); break; + case CXL_MBOX_OP_GET_SUPPORTED_FEATURES: + rc = mock_get_supported_features(mdata, cmd); + break; + case CXL_MBOX_OP_GET_FEATURE: + rc = mock_get_feature(mdata, cmd); + break; + case CXL_MBOX_OP_SET_FEATURE: + rc = mock_set_feature(mdata, cmd); + break; default: break; } @@ -1486,6 +1657,11 @@ static int cxl_mock_mailbox_create(struct cxl_dev_state *cxlds) return 0; } +static void cxl_mock_test_feat_init(struct cxl_mockmem_data *mdata) +{ + mdata->test_feat.data = cpu_to_le32(0xdeadbeef); +} + static int cxl_mock_mem_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -1558,6 +1734,10 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; + rc = devm_cxl_setup_features(cxlds); + if (rc) + dev_dbg(dev, "No CXL Features discovered\n"); + cxl_mock_add_event_logs(&mdata->mes); cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds); @@ -1572,7 +1752,12 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; + rc = devm_cxl_setup_fwctl(cxlmd); + if (rc) + dev_dbg(dev, "No CXL FWCTL setup\n"); + cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL); + cxl_mock_test_feat_init(mdata); return 0; } diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 8c8da966c641..a5f7fdd0c1fb 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -4303,6 +4303,14 @@ if (defined($opt{"LOG_FILE"})) { if ($opt{"CLEAR_LOG"}) { unlink $opt{"LOG_FILE"}; } + + if (! -e $opt{"LOG_FILE"} && $opt{"LOG_FILE"} =~ m,^(.*/),) { + my $dir = $1; + if (! -d $dir) { + mkpath($dir) or die "Failed to create directories '$dir': $!"; + print "\nThe log directory $dir did not exist, so it was created.\n"; + } + } open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}"; LOG->autoflush(1); } diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index b0049be00c70..cdd9782f9646 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -41,6 +41,8 @@ CONFIG_DAMON_PADDR=y CONFIG_REGMAP_BUILD=y +CONFIG_AUDIT=y + CONFIG_SECURITY=y CONFIG_SECURITY_APPARMOR=y CONFIG_SECURITY_LANDLOCK=y diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index d30f90eae9a4..d3f39bc1ceec 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -72,8 +72,8 @@ class LinuxSourceTreeOperations: raise ConfigError(e.output.decode()) def make(self, jobs: int, build_dir: str, make_options: Optional[List[str]]) -> None: - command = ['make', 'all', 'compile_commands.json', 'ARCH=' + self._linux_arch, - 'O=' + build_dir, '--jobs=' + str(jobs)] + command = ['make', 'all', 'compile_commands.json', 'scripts_gdb', + 'ARCH=' + self._linux_arch, 'O=' + build_dir, '--jobs=' + str(jobs)] if make_options: command.extend(make_options) if self._cross_compile: diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 29fc27e8949b..da53a709773a 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -759,7 +759,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # If parsing the main/top-level test, parse KTAP version line and # test plan test.name = "main" - ktap_line = parse_ktap_header(lines, test, printer) + parse_ktap_header(lines, test, printer) test.log.extend(parse_diagnostic(lines)) parse_test_plan(lines, test) parent_test = True @@ -768,13 +768,12 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # the KTAP version line and/or subtest header line ktap_line = parse_ktap_header(lines, test, printer) subtest_line = parse_test_header(lines, test) + test.log.extend(parse_diagnostic(lines)) + parse_test_plan(lines, test) parent_test = (ktap_line or subtest_line) if parent_test: - # If KTAP version line and/or subtest header is found, attempt - # to parse test plan and print test header - test.log.extend(parse_diagnostic(lines)) - parse_test_plan(lines, test) print_test_header(test, printer) + expected_count = test.expected_count subtests = [] test_num = 1 diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 0bcb0cc002f8..5ff4f6ffd873 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -363,6 +363,17 @@ class KUnitParserTest(unittest.TestCase): self.print_mock.assert_any_call(StrContains(' Indented more.')) self.noPrintCallContains('not ok 1 test1') + def test_parse_late_test_plan(self): + output = """ + TAP version 13 + ok 4 test4 + 1..4 + """ + result = kunit_parser.parse_run_tests(output.splitlines(), stdout) + # Missing test results after test plan should alert a suspected test crash. + self.assertEqual(kunit_parser.TestStatus.TEST_CRASHED, result.status) + self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, crashed=1, errors=1)) + def line_stream_from_strs(strs: Iterable[str]) -> kunit_parser.LineStream: return kunit_parser.LineStream(enumerate(strs, start=1)) diff --git a/tools/testing/kunit/qemu_configs/sparc.py b/tools/testing/kunit/qemu_configs/sparc.py index e975c4331a7c..256d9573b446 100644 --- a/tools/testing/kunit/qemu_configs/sparc.py +++ b/tools/testing/kunit/qemu_configs/sparc.py @@ -2,8 +2,9 @@ from ..qemu_config import QemuArchParams QEMU_ARCH = QemuArchParams(linux_arch='sparc', kconfig=''' -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y''', +CONFIG_SERIAL_SUNZILOG=y +CONFIG_SERIAL_SUNZILOG_CONSOLE=y +''', qemu_arch='sparc', kernel_path='arch/sparc/boot/zImage', kernel_command_line='console=ttyS0 mem=256M', diff --git a/tools/testing/kunit/qemu_configs/x86_64.py b/tools/testing/kunit/qemu_configs/x86_64.py index dc7949076863..4a6bf4e048f5 100644 --- a/tools/testing/kunit/qemu_configs/x86_64.py +++ b/tools/testing/kunit/qemu_configs/x86_64.py @@ -7,4 +7,6 @@ CONFIG_SERIAL_8250_CONSOLE=y''', qemu_arch='x86_64', kernel_path='arch/x86/boot/bzImage', kernel_command_line='console=ttyS0', - extra_qemu_params=[]) + # qboot is faster than SeaBIOS and doesn't mess up + # the terminal. + extra_qemu_params=['-bios', 'qboot.rom']) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 2ebaf5e6942e..2694344274bf 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -114,6 +114,7 @@ endif TARGETS += tmpfs TARGETS += tpm2 TARGETS += tty +TARGETS += ublk TARGETS += uevent TARGETS += user_events TARGETS += vDSO diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c index 348e8bef62c7..e3cec3723ffa 100644 --- a/tools/testing/selftests/arm64/fp/kernel-test.c +++ b/tools/testing/selftests/arm64/fp/kernel-test.c @@ -46,7 +46,6 @@ static void handle_kick_signal(int sig, siginfo_t *info, void *context) } static char *drivers[] = { - "crct10dif-arm64", "sha1-ce", "sha224-arm64", "sha224-arm64-neon", diff --git a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c index 303260a6dc65..3bfcd3848432 100644 --- a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c +++ b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c @@ -227,6 +227,8 @@ static int check_child_hugetlb_memory_mapping(int mem_type, int mode, int mappin int main(int argc, char *argv[]) { int err; + void *map_ptr; + unsigned long map_size; err = mte_default_setup(); if (err) @@ -243,6 +245,15 @@ int main(int argc, char *argv[]) return KSFT_FAIL; } + /* Check if MTE supports hugetlb mappings */ + map_size = default_huge_page_size(); + map_ptr = mmap(NULL, map_size, PROT_READ | PROT_MTE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); + if (map_ptr == MAP_FAILED) + ksft_exit_skip("PROT_MTE not supported with MAP_HUGETLB mappings\n"); + else + munmap(map_ptr, map_size); + /* Set test plan */ ksft_set_plan(12); @@ -270,13 +281,13 @@ int main(int argc, char *argv[]) "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n"); evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), - "Check child hugetlb memory with private mapping, precise mode and mmap memory\n"); + "Check child hugetlb memory with private mapping, sync error mode and mmap memory\n"); evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), - "Check child hugetlb memory with private mapping, precise mode and mmap memory\n"); + "Check child hugetlb memory with private mapping, async error mode and mmap memory\n"); evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), - "Check child hugetlb memory with private mapping, precise mode and mmap/mprotect memory\n"); + "Check child hugetlb memory with private mapping, sync error mode and mmap/mprotect memory\n"); evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), - "Check child hugetlb memory with private mapping, precise mode and mmap/mprotect memory\n"); + "Check child hugetlb memory with private mapping, async error mode and mmap/mprotect memory\n"); mte_restore_setup(); free_hugetlb(); diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 901349da680f..6d8feda27ce9 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -1,12 +1,3 @@ -bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 -bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 -kprobe_multi_bench_attach # needs CONFIG_FPROBE -kprobe_multi_test # needs CONFIG_FPROBE -module_attach # prog 'kprobe_multi': failed to auto-attach: -95 fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524 fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524 tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524 -fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 87551628e112..66bb50356be0 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -95,18 +95,12 @@ TEST_GEN_PROGS += test_progs-cpuv4 TEST_INST_SUBDIRS += cpuv4 endif -TEST_GEN_FILES = test_lwt_ip_encap.bpf.o test_tc_edt.bpf.o +TEST_GEN_FILES = test_tc_edt.bpf.o TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c) # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ - test_xdp_redirect_multi.sh \ - test_tunnel.sh \ - test_lwt_seg6local.sh \ test_lirc_mode2.sh \ - test_xdp_vlan_mode_generic.sh \ - test_xdp_vlan_mode_native.sh \ - test_lwt_ip_encap.sh \ test_tc_tunnel.sh \ test_tc_edt.sh \ test_xdping.sh \ @@ -117,9 +111,9 @@ TEST_PROGS := test_kmod.sh \ test_xsk.sh \ test_xdp_features.sh -TEST_PROGS_EXTENDED := with_addr.sh \ - with_tunnels.sh ima_setup.sh verify_sig_setup.sh \ - test_xdp_vlan.sh test_bpftool.py +TEST_PROGS_EXTENDED := \ + ima_setup.sh verify_sig_setup.sh \ + test_bpftool.py TEST_KMODS := bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \ bpf_test_modorder_y.ko @@ -135,7 +129,6 @@ TEST_GEN_PROGS_EXTENDED = \ veristat \ xdp_features \ xdp_hw_metadata \ - xdp_redirect_multi \ xdp_synproxy \ xdping \ xskxceiver @@ -184,9 +177,14 @@ ifeq ($(feature-llvm),1) LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets # both llvm-config and lib.mk add -D_GNU_SOURCE, which ends up as conflict LLVM_CFLAGS += $(filter-out -D_GNU_SOURCE,$(shell $(LLVM_CONFIG) --cflags)) - LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS)) - LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS)) - LLVM_LDLIBS += -lstdc++ + # Prefer linking statically if it's available, otherwise fallback to shared + ifeq ($(shell $(LLVM_CONFIG) --link-static --libs >/dev/null 2>&1 && echo static),static) + LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS)) + LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS)) + LLVM_LDLIBS += -lstdc++ + else + LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-shared --libs $(LLVM_CONFIG_LIB_COMPONENTS)) + endif LLVM_LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags) endif @@ -306,6 +304,7 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf/ \ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \ + BPF_TARGET_ENDIAN=$(BPF_TARGET_ENDIAN) \ EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \ EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' && \ cp $(RUNQSLOWER_OUTPUT)runqslower $@ @@ -684,6 +683,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \ $(RESOLVE_BTFIDS) \ $(TRUNNER_BPFTOOL) \ + $(OUTPUT)/veristat \ | $(TRUNNER_BINARY)-extras $$(call msg,BINARY,,$$@) $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LDFLAGS) -o $$@ diff --git a/tools/testing/selftests/bpf/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/bpf_arena_spin_lock.h new file mode 100644 index 000000000000..fb8dc0768999 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_arena_spin_lock.h @@ -0,0 +1,533 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#ifndef BPF_ARENA_SPIN_LOCK_H +#define BPF_ARENA_SPIN_LOCK_H + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_atomic.h" + +#define arch_mcs_spin_lock_contended_label(l, label) smp_cond_load_acquire_label(l, VAL, label) +#define arch_mcs_spin_unlock_contended(l) smp_store_release((l), 1) + +#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST) + +#define EBUSY 16 +#define EOPNOTSUPP 95 +#define ETIMEDOUT 110 + +#ifndef __arena +#define __arena __attribute__((address_space(1))) +#endif + +extern unsigned long CONFIG_NR_CPUS __kconfig; + +/* + * Typically, we'd just rely on the definition in vmlinux.h for qspinlock, but + * PowerPC overrides the definition to define lock->val as u32 instead of + * atomic_t, leading to compilation errors. Import a local definition below so + * that we don't depend on the vmlinux.h version. + */ + +struct __qspinlock { + union { + atomic_t val; + struct { + u8 locked; + u8 pending; + }; + struct { + u16 locked_pending; + u16 tail; + }; + }; +}; + +#define arena_spinlock_t struct __qspinlock +/* FIXME: Using typedef causes CO-RE relocation error */ +/* typedef struct qspinlock arena_spinlock_t; */ + +struct arena_mcs_spinlock { + struct arena_mcs_spinlock __arena *next; + int locked; + int count; +}; + +struct arena_qnode { + struct arena_mcs_spinlock mcs; +}; + +#define _Q_MAX_NODES 4 +#define _Q_PENDING_LOOPS 1 + +/* + * Bitfields in the atomic value: + * + * 0- 7: locked byte + * 8: pending + * 9-15: not used + * 16-17: tail index + * 18-31: tail cpu (+1) + */ +#define _Q_MAX_CPUS 1024 + +#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\ + << _Q_ ## type ## _OFFSET) +#define _Q_LOCKED_OFFSET 0 +#define _Q_LOCKED_BITS 8 +#define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) + +#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#define _Q_PENDING_BITS 8 +#define _Q_PENDING_MASK _Q_SET_MASK(PENDING) + +#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS) +#define _Q_TAIL_IDX_BITS 2 +#define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX) + +#define _Q_TAIL_CPU_OFFSET (_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS) +#define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET) +#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) + +#define _Q_TAIL_OFFSET _Q_TAIL_IDX_OFFSET +#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK) + +#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) +#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET) + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +struct arena_qnode __arena qnodes[_Q_MAX_CPUS][_Q_MAX_NODES]; + +static inline u32 encode_tail(int cpu, int idx) +{ + u32 tail; + + tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET; + tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */ + + return tail; +} + +static inline struct arena_mcs_spinlock __arena *decode_tail(u32 tail) +{ + u32 cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; + u32 idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; + + return &qnodes[cpu][idx].mcs; +} + +static inline +struct arena_mcs_spinlock __arena *grab_mcs_node(struct arena_mcs_spinlock __arena *base, int idx) +{ + return &((struct arena_qnode __arena *)base + idx)->mcs; +} + +#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) + +/** + * xchg_tail - Put in the new queue tail code word & retrieve previous one + * @lock : Pointer to queued spinlock structure + * @tail : The new queue tail code word + * Return: The previous queue tail code word + * + * xchg(lock, tail) + * + * p,*,* -> n,*,* ; prev = xchg(lock, node) + */ +static __always_inline u32 xchg_tail(arena_spinlock_t __arena *lock, u32 tail) +{ + u32 old, new; + + old = atomic_read(&lock->val); + do { + new = (old & _Q_LOCKED_PENDING_MASK) | tail; + /* + * We can use relaxed semantics since the caller ensures that + * the MCS node is properly initialized before updating the + * tail. + */ + /* These loops are not expected to stall, but we still need to + * prove to the verifier they will terminate eventually. + */ + cond_break_label(out); + } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new)); + + return old; +out: + bpf_printk("RUNTIME ERROR: %s unexpected cond_break exit!!!", __func__); + return old; +} + +/** + * clear_pending - clear the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,1,* -> *,0,* + */ +static __always_inline void clear_pending(arena_spinlock_t __arena *lock) +{ + WRITE_ONCE(lock->pending, 0); +} + +/** + * clear_pending_set_locked - take ownership and clear the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,1,0 -> *,0,1 + * + * Lock stealing is not allowed if this function is used. + */ +static __always_inline void clear_pending_set_locked(arena_spinlock_t __arena *lock) +{ + WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL); +} + +/** + * set_locked - Set the lock bit and own the lock + * @lock: Pointer to queued spinlock structure + * + * *,*,0 -> *,0,1 + */ +static __always_inline void set_locked(arena_spinlock_t __arena *lock) +{ + WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); +} + +static __always_inline +u32 arena_fetch_set_pending_acquire(arena_spinlock_t __arena *lock) +{ + u32 old, new; + + old = atomic_read(&lock->val); + do { + new = old | _Q_PENDING_VAL; + /* + * These loops are not expected to stall, but we still need to + * prove to the verifier they will terminate eventually. + */ + cond_break_label(out); + } while (!atomic_try_cmpxchg_acquire(&lock->val, &old, new)); + + return old; +out: + bpf_printk("RUNTIME ERROR: %s unexpected cond_break exit!!!", __func__); + return old; +} + +/** + * arena_spin_trylock - try to acquire the queued spinlock + * @lock : Pointer to queued spinlock structure + * Return: 1 if lock acquired, 0 if failed + */ +static __always_inline int arena_spin_trylock(arena_spinlock_t __arena *lock) +{ + int val = atomic_read(&lock->val); + + if (unlikely(val)) + return 0; + + return likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)); +} + +__noinline +int arena_spin_lock_slowpath(arena_spinlock_t __arena __arg_arena *lock, u32 val) +{ + struct arena_mcs_spinlock __arena *prev, *next, *node0, *node; + int ret = -ETIMEDOUT; + u32 old, tail; + int idx; + + /* + * Wait for in-progress pending->locked hand-overs with a bounded + * number of spins so that we guarantee forward progress. + * + * 0,1,0 -> 0,0,1 + */ + if (val == _Q_PENDING_VAL) { + int cnt = _Q_PENDING_LOOPS; + val = atomic_cond_read_relaxed_label(&lock->val, + (VAL != _Q_PENDING_VAL) || !cnt--, + release_err); + } + + /* + * If we observe any contention; queue. + */ + if (val & ~_Q_LOCKED_MASK) + goto queue; + + /* + * trylock || pending + * + * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock + */ + val = arena_fetch_set_pending_acquire(lock); + + /* + * If we observe contention, there is a concurrent locker. + * + * Undo and queue; our setting of PENDING might have made the + * n,0,0 -> 0,0,0 transition fail and it will now be waiting + * on @next to become !NULL. + */ + if (unlikely(val & ~_Q_LOCKED_MASK)) { + + /* Undo PENDING if we set it. */ + if (!(val & _Q_PENDING_MASK)) + clear_pending(lock); + + goto queue; + } + + /* + * We're pending, wait for the owner to go away. + * + * 0,1,1 -> *,1,0 + * + * this wait loop must be a load-acquire such that we match the + * store-release that clears the locked bit and create lock + * sequentiality; this is because not all + * clear_pending_set_locked() implementations imply full + * barriers. + */ + if (val & _Q_LOCKED_MASK) + smp_cond_load_acquire_label(&lock->locked, !VAL, release_err); + + /* + * take ownership and clear the pending bit. + * + * 0,1,0 -> 0,0,1 + */ + clear_pending_set_locked(lock); + return 0; + + /* + * End of pending bit optimistic spinning and beginning of MCS + * queuing. + */ +queue: + node0 = &(qnodes[bpf_get_smp_processor_id()])[0].mcs; + idx = node0->count++; + tail = encode_tail(bpf_get_smp_processor_id(), idx); + + /* + * 4 nodes are allocated based on the assumption that there will not be + * nested NMIs taking spinlocks. That may not be true in some + * architectures even though the chance of needing more than 4 nodes + * will still be extremely unlikely. When that happens, we simply return + * an error. Original qspinlock has a trylock fallback in this case. + */ + if (unlikely(idx >= _Q_MAX_NODES)) { + ret = -EBUSY; + goto release_node_err; + } + + node = grab_mcs_node(node0, idx); + + /* + * Ensure that we increment the head node->count before initialising + * the actual node. If the compiler is kind enough to reorder these + * stores, then an IRQ could overwrite our assignments. + */ + barrier(); + + node->locked = 0; + node->next = NULL; + + /* + * We touched a (possibly) cold cacheline in the per-cpu queue node; + * attempt the trylock once more in the hope someone let go while we + * weren't watching. + */ + if (arena_spin_trylock(lock)) + goto release; + + /* + * Ensure that the initialisation of @node is complete before we + * publish the updated tail via xchg_tail() and potentially link + * @node into the waitqueue via WRITE_ONCE(prev->next, node) below. + */ + smp_wmb(); + + /* + * Publish the updated tail. + * We have already touched the queueing cacheline; don't bother with + * pending stuff. + * + * p,*,* -> n,*,* + */ + old = xchg_tail(lock, tail); + next = NULL; + + /* + * if there was a previous node; link it and wait until reaching the + * head of the waitqueue. + */ + if (old & _Q_TAIL_MASK) { + prev = decode_tail(old); + + /* Link @node into the waitqueue. */ + WRITE_ONCE(prev->next, node); + + arch_mcs_spin_lock_contended_label(&node->locked, release_node_err); + + /* + * While waiting for the MCS lock, the next pointer may have + * been set by another lock waiter. We cannot prefetch here + * due to lack of equivalent instruction in BPF ISA. + */ + next = READ_ONCE(node->next); + } + + /* + * we're at the head of the waitqueue, wait for the owner & pending to + * go away. + * + * *,x,y -> *,0,0 + * + * this wait loop must use a load-acquire such that we match the + * store-release that clears the locked bit and create lock + * sequentiality; this is because the set_locked() function below + * does not imply a full barrier. + */ + val = atomic_cond_read_acquire_label(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK), + release_node_err); + + /* + * claim the lock: + * + * n,0,0 -> 0,0,1 : lock, uncontended + * *,*,0 -> *,*,1 : lock, contended + * + * If the queue head is the only one in the queue (lock value == tail) + * and nobody is pending, clear the tail code and grab the lock. + * Otherwise, we only need to grab the lock. + */ + + /* + * In the PV case we might already have _Q_LOCKED_VAL set, because + * of lock stealing; therefore we must also allow: + * + * n,0,1 -> 0,0,1 + * + * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the + * above wait condition, therefore any concurrent setting of + * PENDING will make the uncontended transition fail. + */ + if ((val & _Q_TAIL_MASK) == tail) { + if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL)) + goto release; /* No contention */ + } + + /* + * Either somebody is queued behind us or _Q_PENDING_VAL got set + * which will then detect the remaining tail and queue behind us + * ensuring we'll see a @next. + */ + set_locked(lock); + + /* + * contended path; wait for next if not observed yet, release. + */ + if (!next) + next = smp_cond_load_relaxed_label(&node->next, (VAL), release_node_err); + + arch_mcs_spin_unlock_contended(&next->locked); + +release:; + /* + * release the node + * + * Doing a normal dec vs this_cpu_dec is fine. An upper context always + * decrements count it incremented before returning, thus we're fine. + * For contexts interrupting us, they either observe our dec or not. + * Just ensure the compiler doesn't reorder this statement, as a + * this_cpu_dec implicitly implied that. + */ + barrier(); + node0->count--; + return 0; +release_node_err: + barrier(); + node0->count--; + goto release_err; +release_err: + return ret; +} + +/** + * arena_spin_lock - acquire a queued spinlock + * @lock: Pointer to queued spinlock structure + * + * On error, returned value will be negative. + * On success, zero is returned. + * + * The return value _must_ be tested against zero for success, + * instead of checking it against negative, for passing the + * BPF verifier. + * + * The user should do: + * if (arena_spin_lock(...) != 0) // failure + * or + * if (arena_spin_lock(...) == 0) // success + * or + * if (arena_spin_lock(...)) // failure + * or + * if (!arena_spin_lock(...)) // success + * instead of: + * if (arena_spin_lock(...) < 0) // failure + * + * The return value can still be inspected later. + */ +static __always_inline int arena_spin_lock(arena_spinlock_t __arena *lock) +{ + int val = 0; + + if (CONFIG_NR_CPUS > 1024) + return -EOPNOTSUPP; + + bpf_preempt_disable(); + if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) + return 0; + + val = arena_spin_lock_slowpath(lock, val); + /* FIXME: bpf_assert_range(-MAX_ERRNO, 0) once we have it working for all cases. */ + if (val) + bpf_preempt_enable(); + return val; +} + +/** + * arena_spin_unlock - release a queued spinlock + * @lock : Pointer to queued spinlock structure + */ +static __always_inline void arena_spin_unlock(arena_spinlock_t __arena *lock) +{ + /* + * unlock() needs release semantics: + */ + smp_store_release(&lock->locked, 0); + bpf_preempt_enable(); +} + +#define arena_spin_lock_irqsave(lock, flags) \ + ({ \ + int __ret; \ + bpf_local_irq_save(&(flags)); \ + __ret = arena_spin_lock((lock)); \ + if (__ret) \ + bpf_local_irq_restore(&(flags)); \ + (__ret); \ + }) + +#define arena_spin_unlock_irqrestore(lock, flags) \ + ({ \ + arena_spin_unlock((lock)); \ + bpf_local_irq_restore(&(flags)); \ + }) + +#endif + +#endif /* BPF_ARENA_SPIN_LOCK_H */ diff --git a/tools/testing/selftests/bpf/bpf_atomic.h b/tools/testing/selftests/bpf/bpf_atomic.h new file mode 100644 index 000000000000..a9674e544322 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_atomic.h @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#ifndef BPF_ATOMIC_H +#define BPF_ATOMIC_H + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_experimental.h" + +extern bool CONFIG_X86_64 __kconfig __weak; + +/* + * __unqual_typeof(x) - Declare an unqualified scalar type, leaving + * non-scalar types unchanged, + * + * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char' + * is not type-compatible with 'signed char', and we define a separate case. + * + * This is copied verbatim from kernel's include/linux/compiler_types.h, but + * with default expression (for pointers) changed from (x) to (typeof(x)0). + * + * This is because LLVM has a bug where for lvalue (x), it does not get rid of + * an extra address_space qualifier, but does in case of rvalue (typeof(x)0). + * Hence, for pointers, we need to create an rvalue expression to get the + * desired type. See https://github.com/llvm/llvm-project/issues/53400. + */ +#define __scalar_type_to_expr_cases(type) \ + unsigned type : (unsigned type)0, signed type : (signed type)0 + +#define __unqual_typeof(x) \ + typeof(_Generic((x), \ + char: (char)0, \ + __scalar_type_to_expr_cases(char), \ + __scalar_type_to_expr_cases(short), \ + __scalar_type_to_expr_cases(int), \ + __scalar_type_to_expr_cases(long), \ + __scalar_type_to_expr_cases(long long), \ + default: (typeof(x))0)) + +/* No-op for BPF */ +#define cpu_relax() ({}) + +#define READ_ONCE(x) (*(volatile typeof(x) *)&(x)) + +#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *)&(x)) = (val)) + +#define cmpxchg(p, old, new) __sync_val_compare_and_swap((p), old, new) + +#define try_cmpxchg(p, pold, new) \ + ({ \ + __unqual_typeof(*(pold)) __o = *(pold); \ + __unqual_typeof(*(p)) __r = cmpxchg(p, __o, new); \ + if (__r != __o) \ + *(pold) = __r; \ + __r == __o; \ + }) + +#define try_cmpxchg_relaxed(p, pold, new) try_cmpxchg(p, pold, new) + +#define try_cmpxchg_acquire(p, pold, new) try_cmpxchg(p, pold, new) + +#define smp_mb() \ + ({ \ + unsigned long __val; \ + __sync_fetch_and_add(&__val, 0); \ + }) + +#define smp_rmb() \ + ({ \ + if (!CONFIG_X86_64) \ + smp_mb(); \ + else \ + barrier(); \ + }) + +#define smp_wmb() \ + ({ \ + if (!CONFIG_X86_64) \ + smp_mb(); \ + else \ + barrier(); \ + }) + +/* Control dependency provides LOAD->STORE, provide LOAD->LOAD */ +#define smp_acquire__after_ctrl_dep() ({ smp_rmb(); }) + +#define smp_load_acquire(p) \ + ({ \ + __unqual_typeof(*(p)) __v = READ_ONCE(*(p)); \ + if (!CONFIG_X86_64) \ + smp_mb(); \ + barrier(); \ + __v; \ + }) + +#define smp_store_release(p, val) \ + ({ \ + if (!CONFIG_X86_64) \ + smp_mb(); \ + barrier(); \ + WRITE_ONCE(*(p), val); \ + }) + +#define smp_cond_load_relaxed_label(p, cond_expr, label) \ + ({ \ + typeof(p) __ptr = (p); \ + __unqual_typeof(*(p)) VAL; \ + for (;;) { \ + VAL = (__unqual_typeof(*(p)))READ_ONCE(*__ptr); \ + if (cond_expr) \ + break; \ + cond_break_label(label); \ + cpu_relax(); \ + } \ + (typeof(*(p)))VAL; \ + }) + +#define smp_cond_load_acquire_label(p, cond_expr, label) \ + ({ \ + __unqual_typeof(*p) __val = \ + smp_cond_load_relaxed_label(p, cond_expr, label); \ + smp_acquire__after_ctrl_dep(); \ + (typeof(*(p)))__val; \ + }) + +#define atomic_read(p) READ_ONCE((p)->counter) + +#define atomic_cond_read_relaxed_label(p, cond_expr, label) \ + smp_cond_load_relaxed_label(&(p)->counter, cond_expr, label) + +#define atomic_cond_read_acquire_label(p, cond_expr, label) \ + smp_cond_load_acquire_label(&(p)->counter, cond_expr, label) + +#define atomic_try_cmpxchg_relaxed(p, pold, new) \ + try_cmpxchg_relaxed(&(p)->counter, pold, new) + +#define atomic_try_cmpxchg_acquire(p, pold, new) \ + try_cmpxchg_acquire(&(p)->counter, pold, new) + +#endif /* BPF_ATOMIC_H */ diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index cd8ecd39c3f3..6535c8ae3c46 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -368,12 +368,12 @@ l_true: \ ret; \ }) -#define cond_break \ +#define __cond_break(expr) \ ({ __label__ l_break, l_continue; \ asm volatile goto("may_goto %l[l_break]" \ :::: l_break); \ goto l_continue; \ - l_break: break; \ + l_break: expr; \ l_continue:; \ }) #else @@ -392,7 +392,7 @@ l_true: \ ret; \ }) -#define cond_break \ +#define __cond_break(expr) \ ({ __label__ l_break, l_continue; \ asm volatile goto("1:.byte 0xe5; \ .byte 0; \ @@ -400,7 +400,7 @@ l_true: \ .short 0" \ :::: l_break); \ goto l_continue; \ - l_break: break; \ + l_break: expr; \ l_continue:; \ }) #else @@ -418,7 +418,7 @@ l_true: \ ret; \ }) -#define cond_break \ +#define __cond_break(expr) \ ({ __label__ l_break, l_continue; \ asm volatile goto("1:.byte 0xe5; \ .byte 0; \ @@ -426,12 +426,15 @@ l_true: \ .short 0" \ :::: l_break); \ goto l_continue; \ - l_break: break; \ + l_break: expr; \ l_continue:; \ }) #endif #endif +#define cond_break __cond_break(break) +#define cond_break_label(label) __cond_break(goto label) + #ifndef bpf_nop_mov #define bpf_nop_mov(var) \ asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var)) diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 2eb3483f2fb0..8215c9b3115e 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -87,4 +87,9 @@ struct dentry; */ extern int bpf_get_dentry_xattr(struct dentry *dentry, const char *name, struct bpf_dynptr *value_ptr) __ksym __weak; + +extern int bpf_set_dentry_xattr(struct dentry *dentry, const char *name__str, + const struct bpf_dynptr *value_p, int flags) __ksym __weak; +extern int bpf_remove_dentry_xattr(struct dentry *dentry, const char *name__str) __ksym __weak; + #endif diff --git a/tools/testing/selftests/bpf/cap_helpers.c b/tools/testing/selftests/bpf/cap_helpers.c index d5ac507401d7..98f840c3a38f 100644 --- a/tools/testing/selftests/bpf/cap_helpers.c +++ b/tools/testing/selftests/bpf/cap_helpers.c @@ -19,7 +19,7 @@ int cap_enable_effective(__u64 caps, __u64 *old_caps) err = capget(&hdr, data); if (err) - return err; + return -errno; if (old_caps) *old_caps = (__u64)(data[1].effective) << 32 | data[0].effective; @@ -32,7 +32,7 @@ int cap_enable_effective(__u64 caps, __u64 *old_caps) data[1].effective |= cap1; err = capset(&hdr, data); if (err) - return err; + return -errno; return 0; } @@ -49,7 +49,7 @@ int cap_disable_effective(__u64 caps, __u64 *old_caps) err = capget(&hdr, data); if (err) - return err; + return -errno; if (old_caps) *old_caps = (__u64)(data[1].effective) << 32 | data[0].effective; @@ -61,7 +61,7 @@ int cap_disable_effective(__u64 caps, __u64 *old_caps) data[1].effective &= ~cap1; err = capset(&hdr, data); if (err) - return err; + return -errno; return 0; } diff --git a/tools/testing/selftests/bpf/cap_helpers.h b/tools/testing/selftests/bpf/cap_helpers.h index 6d163530cb0f..8dcb28557f76 100644 --- a/tools/testing/selftests/bpf/cap_helpers.h +++ b/tools/testing/selftests/bpf/cap_helpers.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <linux/capability.h> +#include <errno.h> #ifndef CAP_PERFMON #define CAP_PERFMON 38 diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 80844a5fb1fe..72b5c174ab3b 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -446,6 +446,23 @@ char *ping_command(int family) return "ping"; } +int append_tid(char *str, size_t sz) +{ + size_t end; + + if (!str) + return -1; + + end = strlen(str); + if (end + 8 > sz) + return -1; + + sprintf(&str[end], "%07d", gettid()); + str[end + 7] = '\0'; + + return 0; +} + int remove_netns(const char *name) { char *cmd; @@ -548,6 +565,34 @@ void close_netns(struct nstoken *token) free(token); } +int open_tuntap(const char *dev_name, bool need_mac) +{ + int err = 0; + struct ifreq ifr; + int fd = open("/dev/net/tun", O_RDWR); + + if (!ASSERT_GE(fd, 0, "open(/dev/net/tun)")) + return -1; + + ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN); + strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1); + ifr.ifr_name[IFNAMSIZ - 1] = '\0'; + + err = ioctl(fd, TUNSETIFF, &ifr); + if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) { + close(fd); + return -1; + } + + err = fcntl(fd, F_SETFL, O_NONBLOCK); + if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) { + close(fd); + return -1; + } + + return fd; +} + int get_socket_local_port(int sock_fd) { struct sockaddr_storage addr; @@ -733,6 +778,36 @@ struct tmonitor_ctx { int pcap_fd; }; +static int __base_pr(const char *format, va_list args) +{ + return vfprintf(stdout, format, args); +} + +static tm_print_fn_t __tm_pr = __base_pr; + +tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn) +{ + tm_print_fn_t old_print_fn; + + old_print_fn = __atomic_exchange_n(&__tm_pr, fn, __ATOMIC_RELAXED); + + return old_print_fn; +} + +void tm_print(const char *format, ...) +{ + tm_print_fn_t print_fn; + va_list args; + + print_fn = __atomic_load_n(&__tm_pr, __ATOMIC_RELAXED); + if (!print_fn) + return; + + va_start(args, format); + print_fn(format, args); + va_end(args); +} + /* Is this packet captured with a Ethernet protocol type? */ static bool is_ethernet(const u_char *packet) { @@ -750,7 +825,7 @@ static bool is_ethernet(const u_char *packet) case 770: /* ARPHRD_FRAD */ case 778: /* ARPHDR_IPGRE */ case 803: /* ARPHRD_IEEE80211_RADIOTAP */ - printf("Packet captured: arphdr_type=%d\n", arphdr_type); + tm_print("Packet captured: arphdr_type=%d\n", arphdr_type); return false; } return true; @@ -771,12 +846,13 @@ static const char *pkt_type_str(u16 pkt_type) return "Unknown"; } +#define MAX_FLAGS_STRLEN 21 /* Show the information of the transport layer in the packet */ static void show_transport(const u_char *packet, u16 len, u32 ifindex, const char *src_addr, const char *dst_addr, u16 proto, bool ipv6, u8 pkt_type) { - char *ifname, _ifname[IF_NAMESIZE]; + char *ifname, _ifname[IF_NAMESIZE], flags[MAX_FLAGS_STRLEN] = ""; const char *transport_str; u16 src_port, dst_port; struct udphdr *udp; @@ -799,47 +875,39 @@ static void show_transport(const u_char *packet, u16 len, u32 ifindex, dst_port = ntohs(tcp->dest); transport_str = "TCP"; } else if (proto == IPPROTO_ICMP) { - printf("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n", - ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len, - packet[0], packet[1]); + tm_print("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n", + ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len, + packet[0], packet[1]); return; } else if (proto == IPPROTO_ICMPV6) { - printf("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n", - ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len, - packet[0], packet[1]); + tm_print("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n", + ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len, + packet[0], packet[1]); return; } else { - printf("%-7s %-3s %s %s > %s: protocol %d\n", - ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4", - src_addr, dst_addr, proto); + tm_print("%-7s %-3s %s %s > %s: protocol %d\n", + ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4", + src_addr, dst_addr, proto); return; } /* TCP or UDP*/ - flockfile(stdout); + if (proto == IPPROTO_TCP) + snprintf(flags, MAX_FLAGS_STRLEN, "%s%s%s%s", + tcp->fin ? ", FIN" : "", + tcp->syn ? ", SYN" : "", + tcp->rst ? ", RST" : "", + tcp->ack ? ", ACK" : ""); + if (ipv6) - printf("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d", - ifname, pkt_type_str(pkt_type), src_addr, src_port, - dst_addr, dst_port, transport_str, len); + tm_print("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n", + ifname, pkt_type_str(pkt_type), src_addr, src_port, + dst_addr, dst_port, transport_str, len, flags); else - printf("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d", - ifname, pkt_type_str(pkt_type), src_addr, src_port, - dst_addr, dst_port, transport_str, len); - - if (proto == IPPROTO_TCP) { - if (tcp->fin) - printf(", FIN"); - if (tcp->syn) - printf(", SYN"); - if (tcp->rst) - printf(", RST"); - if (tcp->ack) - printf(", ACK"); - } - - printf("\n"); - funlockfile(stdout); + tm_print("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n", + ifname, pkt_type_str(pkt_type), src_addr, src_port, + dst_addr, dst_port, transport_str, len, flags); } static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type) @@ -954,8 +1022,8 @@ static void *traffic_monitor_thread(void *arg) ifname = _ifname; } - printf("%-7s %-3s Unknown network protocol type 0x%x\n", - ifname, pkt_type_str(ptype), proto); + tm_print("%-7s %-3s Unknown network protocol type 0x%x\n", + ifname, pkt_type_str(ptype), proto); } } @@ -1155,8 +1223,9 @@ void traffic_monitor_stop(struct tmonitor_ctx *ctx) write(ctx->wake_fd, &w, sizeof(w)); pthread_join(ctx->thread, NULL); - printf("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1); + tm_print("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1); traffic_monitor_release(ctx); } + #endif /* TRAFFIC_MONITOR */ diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index ebec8a8d6f81..ef208eefd571 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -8,6 +8,7 @@ typedef __u16 __sum16; #include <linux/if_ether.h> #include <linux/if_packet.h> +#include <linux/if_tun.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/ethtool.h> @@ -17,6 +18,7 @@ typedef __u16 __sum16; #include <netinet/udp.h> #include <bpf/bpf_endian.h> #include <net/if.h> +#include <stdio.h> #define MAGIC_VAL 0x1234 #define NUM_ITER 100000 @@ -85,6 +87,8 @@ int get_socket_local_port(int sock_fd); int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param); int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param); +int open_tuntap(const char *dev_name, bool need_mac); + struct nstoken; /** * open_netns() - Switch to specified network namespace by name. @@ -98,6 +102,18 @@ int send_recv_data(int lfd, int fd, uint32_t total_bytes); int make_netns(const char *name); int remove_netns(const char *name); +/** + * append_tid() - Append thread ID to the given string. + * + * @str: string to extend + * @sz: string's size + * + * 8 characters are used to append the thread ID (7 digits + '\0') + * + * Returns -1 on errors, 0 otherwise + */ +int append_tid(char *str, size_t sz); + static __u16 csum_fold(__u32 csum) { csum = (csum & 0xffff) + (csum >> 16); @@ -237,10 +253,13 @@ static inline __sum16 build_udp_v6_csum(const struct ipv6hdr *ip6h, struct tmonitor_ctx; +typedef int (*tm_print_fn_t)(const char *format, va_list args); + #ifdef TRAFFIC_MONITOR struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name, const char *subtest_name); void traffic_monitor_stop(struct tmonitor_ctx *ctx); +tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn); #else static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name, const char *subtest_name) @@ -251,6 +270,11 @@ static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns, cons static inline void traffic_monitor_stop(struct tmonitor_ctx *ctx) { } + +static inline tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn) +{ + return NULL; +} #endif #endif diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 4ebd0da898f5..1d53a8561ee2 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -610,9 +610,11 @@ static int do_test_single(struct bpf_align_test *test) .log_size = sizeof(bpf_vlog), .log_level = 2, ); + const char *main_pass_start = "0: R1=ctx() R10=fp0"; const char *line_ptr; int cur_line = -1; int prog_len, i; + char *start; int fd_prog; int ret; @@ -632,7 +634,13 @@ static int do_test_single(struct bpf_align_test *test) ret = 0; /* We make a local copy so that we can strtok() it */ strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy)); - line_ptr = strtok(bpf_vlog_copy, "\n"); + start = strstr(bpf_vlog_copy, main_pass_start); + if (!start) { + ret = 1; + printf("Can't find initial line '%s'\n", main_pass_start); + goto out; + } + line_ptr = strtok(start, "\n"); for (i = 0; i < MAX_MATCHES; i++) { struct bpf_reg_match m = test->matches[i]; const char *p; @@ -682,6 +690,7 @@ static int do_test_single(struct bpf_align_test *test) break; } } +out: if (fd_prog >= 0) close(fd_prog); } diff --git a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c index 26e7c06c6cb4..d98577a6babc 100644 --- a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c +++ b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c @@ -162,6 +162,66 @@ static void test_uaf(struct arena_atomics *skel) ASSERT_EQ(skel->arena->uaf_recovery_fails, 0, "uaf_recovery_fails"); } +static void test_load_acquire(struct arena_atomics *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + if (skel->data->skip_lacq_srel_tests) { + printf("%s:SKIP: ENABLE_ATOMICS_TESTS not defined, Clang doesn't support addr_space_cast, and/or JIT doesn't support load-acquire\n", + __func__); + test__skip(); + return; + } + + /* No need to attach it, just run it directly */ + prog_fd = bpf_program__fd(skel->progs.load_acquire); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + ASSERT_EQ(skel->arena->load_acquire8_result, 0x12, + "load_acquire8_result"); + ASSERT_EQ(skel->arena->load_acquire16_result, 0x1234, + "load_acquire16_result"); + ASSERT_EQ(skel->arena->load_acquire32_result, 0x12345678, + "load_acquire32_result"); + ASSERT_EQ(skel->arena->load_acquire64_result, 0x1234567890abcdef, + "load_acquire64_result"); +} + +static void test_store_release(struct arena_atomics *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + if (skel->data->skip_lacq_srel_tests) { + printf("%s:SKIP: ENABLE_ATOMICS_TESTS not defined, Clang doesn't support addr_space_cast, and/or JIT doesn't support store-release\n", + __func__); + test__skip(); + return; + } + + /* No need to attach it, just run it directly */ + prog_fd = bpf_program__fd(skel->progs.store_release); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + ASSERT_EQ(skel->arena->store_release8_result, 0x12, + "store_release8_result"); + ASSERT_EQ(skel->arena->store_release16_result, 0x1234, + "store_release16_result"); + ASSERT_EQ(skel->arena->store_release32_result, 0x12345678, + "store_release32_result"); + ASSERT_EQ(skel->arena->store_release64_result, 0x1234567890abcdef, + "store_release64_result"); +} + void test_arena_atomics(void) { struct arena_atomics *skel; @@ -171,7 +231,7 @@ void test_arena_atomics(void) if (!ASSERT_OK_PTR(skel, "arena atomics skeleton open")) return; - if (skel->data->skip_tests) { + if (skel->data->skip_all_tests) { printf("%s:SKIP:no ENABLE_ATOMICS_TESTS or no addr_space_cast support in clang", __func__); test__skip(); @@ -198,6 +258,10 @@ void test_arena_atomics(void) test_xchg(skel); if (test__start_subtest("uaf")) test_uaf(skel); + if (test__start_subtest("load_acquire")) + test_load_acquire(skel); + if (test__start_subtest("store_release")) + test_store_release(skel); cleanup: arena_atomics__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c new file mode 100644 index 000000000000..7565fc7690c2 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <network_helpers.h> +#include <sys/sysinfo.h> + +struct __qspinlock { int val; }; +typedef struct __qspinlock arena_spinlock_t; + +struct arena_qnode { + unsigned long next; + int count; + int locked; +}; + +#include "arena_spin_lock.skel.h" + +static long cpu; +static int repeat; + +pthread_barrier_t barrier; + +static void *spin_lock_thread(void *arg) +{ + int err, prog_fd = *(u32 *)arg; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = repeat, + ); + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset); + ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset), "cpu affinity"); + + err = pthread_barrier_wait(&barrier); + if (err != PTHREAD_BARRIER_SERIAL_THREAD && err != 0) + ASSERT_FALSE(true, "pthread_barrier"); + + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run err"); + ASSERT_EQ((int)topts.retval, 0, "test_run retval"); + + pthread_exit(arg); +} + +static void test_arena_spin_lock_size(int size) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct arena_spin_lock *skel; + pthread_t thread_id[16]; + int prog_fd, i, err; + void *ret; + + if (get_nprocs() < 2) { + test__skip(); + return; + } + + skel = arena_spin_lock__open_and_load(); + if (!ASSERT_OK_PTR(skel, "arena_spin_lock__open_and_load")) + return; + if (skel->data->test_skip == 2) { + test__skip(); + goto end; + } + skel->bss->cs_count = size; + skel->bss->limit = repeat * 16; + + ASSERT_OK(pthread_barrier_init(&barrier, NULL, 16), "barrier init"); + + prog_fd = bpf_program__fd(skel->progs.prog); + for (i = 0; i < 16; i++) { + err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd); + if (!ASSERT_OK(err, "pthread_create")) + goto end_barrier; + } + + for (i = 0; i < 16; i++) { + if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join")) + goto end_barrier; + if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd")) + goto end_barrier; + } + + ASSERT_EQ(skel->bss->counter, repeat * 16, "check counter value"); + +end_barrier: + pthread_barrier_destroy(&barrier); +end: + arena_spin_lock__destroy(skel); + return; +} + +void test_arena_spin_lock(void) +{ + repeat = 1000; + if (test__start_subtest("arena_spin_lock_1")) + test_arena_spin_lock_size(1); + cpu = 0; + if (test__start_subtest("arena_spin_lock_1000")) + test_arena_spin_lock_size(1000); + cpu = 0; + repeat = 100; + if (test__start_subtest("arena_spin_lock_50000")) + test_arena_spin_lock_size(50000); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c index cc184e4420f6..67557cda2208 100644 --- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c +++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c @@ -6,6 +6,10 @@ #include <test_progs.h> #include "bloom_filter_map.skel.h" +#ifndef NUMA_NO_NODE +#define NUMA_NO_NODE (-1) +#endif + static void test_fail_cases(void) { LIBBPF_OPTS(bpf_map_create_opts, opts); @@ -69,6 +73,7 @@ static void test_success_cases(void) /* Create a map */ opts.map_flags = BPF_F_ZERO_SEED | BPF_F_NUMA_NODE; + opts.numa_node = NUMA_NO_NODE; fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, &opts); if (!ASSERT_GE(fd, 0, "bpf_map_create bloom filter success case")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 6f1bfacd7375..add4a18c33bd 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -323,19 +323,87 @@ static void test_task_pidfd(void) static void test_task_sleepable(void) { struct bpf_iter_tasks *skel; + int pid, status, err, data_pipe[2], finish_pipe[2], c; + char *test_data = NULL; + char *test_data_long = NULL; + char *data[2]; + + if (!ASSERT_OK(pipe(data_pipe), "data_pipe") || + !ASSERT_OK(pipe(finish_pipe), "finish_pipe")) + return; skel = bpf_iter_tasks__open_and_load(); if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load")) return; + pid = fork(); + if (!ASSERT_GE(pid, 0, "fork")) + return; + + if (pid == 0) { + /* child */ + close(data_pipe[0]); + close(finish_pipe[1]); + + test_data = malloc(sizeof(char) * 10); + strncpy(test_data, "test_data", 10); + test_data[9] = '\0'; + + test_data_long = malloc(sizeof(char) * 5000); + for (int i = 0; i < 5000; ++i) { + if (i % 2 == 0) + test_data_long[i] = 'b'; + else + test_data_long[i] = 'a'; + } + test_data_long[4999] = '\0'; + + data[0] = test_data; + data[1] = test_data_long; + + write(data_pipe[1], &data, sizeof(data)); + + /* keep child alive until after the test */ + err = read(finish_pipe[0], &c, 1); + if (err != 1) + exit(-1); + + close(data_pipe[1]); + close(finish_pipe[0]); + _exit(0); + } + + /* parent */ + close(data_pipe[1]); + close(finish_pipe[0]); + + err = read(data_pipe[0], &data, sizeof(data)); + ASSERT_EQ(err, sizeof(data), "read_check"); + + skel->bss->user_ptr = data[0]; + skel->bss->user_ptr_long = data[1]; + skel->bss->pid = pid; + do_dummy_read(skel->progs.dump_task_sleepable); ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0, "num_expected_failure_copy_from_user_task"); ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0, "num_success_copy_from_user_task"); + ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task_str, 0, + "num_expected_failure_copy_from_user_task_str"); + ASSERT_GT(skel->bss->num_success_copy_from_user_task_str, 0, + "num_success_copy_from_user_task_str"); bpf_iter_tasks__destroy(skel); + + write(finish_pipe[1], &c, 1); + err = waitpid(pid, &status, 0); + ASSERT_EQ(err, pid, "waitpid"); + ASSERT_EQ(status, 0, "zero_child_exit"); + + close(data_pipe[0]); + close(finish_pipe[1]); } static void test_task_stack(void) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index a4a1f93878d4..dbd13f8e42a7 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -72,11 +72,14 @@ static void test_bpf_nf_ct(int mode) if (!ASSERT_OK(system(cmd), cmd)) goto end; - srv_port = (mode == TEST_XDP) ? 5005 : 5006; - srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", srv_port, TIMEOUT_MS); + srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", 0, TIMEOUT_MS); if (!ASSERT_GE(srv_fd, 0, "start_server")) goto end; + srv_port = get_socket_local_port(srv_fd); + if (!ASSERT_GE(srv_port, 0, "get_sock_local_port")) + goto end; + client_fd = connect_to_server(srv_fd); if (!ASSERT_GE(client_fd, 0, "connect_to_server")) goto end; @@ -91,7 +94,7 @@ static void test_bpf_nf_ct(int mode) skel->bss->saddr = peer_addr.sin_addr.s_addr; skel->bss->sport = peer_addr.sin_port; skel->bss->daddr = peer_addr.sin_addr.s_addr; - skel->bss->dport = htons(srv_port); + skel->bss->dport = srv_port; if (mode == TEST_XDP) prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test); diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index e63d74ce046f..8a9ba4292109 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -3866,11 +3866,11 @@ static struct btf_raw_test raw_tests[] = { .err_str = "vlen != 0", }, { - .descr = "decl_tag test #8, invalid kflag", + .descr = "decl_tag test #8, tag with kflag", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), 2), (-1), + BTF_DECL_ATTR_ENC(NAME_TBD, 2, -1), BTF_END_RAW, }, BTF_STR_SEC("\0local\0tag1"), @@ -3881,8 +3881,6 @@ static struct btf_raw_test raw_tests[] = { .key_type_id = 1, .value_type_id = 1, .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid btf_info kind_flag", }, { .descr = "decl_tag test #9, var, invalid component_idx", @@ -4207,6 +4205,23 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Type tags don't precede modifiers", }, { + .descr = "type_tag test #7, tag with kflag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ATTR_ENC(NAME_TBD, 1), /* [2] */ + BTF_PTR_ENC(2), /* [3] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, +}, +{ .descr = "enum64 test #1, unsigned, size 8", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index b293b8501fd6..c0a776feec23 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -126,26 +126,69 @@ done: return err; } -static char *dump_buf; -static size_t dump_buf_sz; -static FILE *dump_buf_file; +struct test_ctx { + struct btf *btf; + struct btf_dump *d; + char *dump_buf; + size_t dump_buf_sz; + FILE *dump_buf_file; +}; -static void test_btf_dump_incremental(void) +static void test_ctx__free(struct test_ctx *t) { - struct btf *btf = NULL; - struct btf_dump *d = NULL; - int id, err, i; + fclose(t->dump_buf_file); + free(t->dump_buf); + btf_dump__free(t->d); + btf__free(t->btf); +} - dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz); - if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream")) - return; - btf = btf__new_empty(); - if (!ASSERT_OK_PTR(btf, "new_empty")) +static int test_ctx__init(struct test_ctx *t) +{ + t->dump_buf_file = open_memstream(&t->dump_buf, &t->dump_buf_sz); + if (!ASSERT_OK_PTR(t->dump_buf_file, "dump_memstream")) + return -1; + t->btf = btf__new_empty(); + if (!ASSERT_OK_PTR(t->btf, "new_empty")) goto err_out; - d = btf_dump__new(btf, btf_dump_printf, dump_buf_file, NULL); - if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new")) + t->d = btf_dump__new(t->btf, btf_dump_printf, t->dump_buf_file, NULL); + if (!ASSERT_OK(libbpf_get_error(t->d), "btf_dump__new")) goto err_out; + return 0; + +err_out: + test_ctx__free(t); + return -1; +} + +static void test_ctx__dump_and_compare(struct test_ctx *t, + const char *expected_output, + const char *message) +{ + int i, err; + + for (i = 1; i < btf__type_cnt(t->btf); i++) { + err = btf_dump__dump_type(t->d, i); + ASSERT_OK(err, "dump_type_ok"); + } + + fflush(t->dump_buf_file); + t->dump_buf[t->dump_buf_sz] = 0; /* some libc implementations don't do this */ + + ASSERT_STREQ(t->dump_buf, expected_output, message); +} + +static void test_btf_dump_incremental(void) +{ + struct test_ctx t = {}; + struct btf *btf; + int id, err; + + if (test_ctx__init(&t)) + return; + + btf = t.btf; + /* First, generate BTF corresponding to the following C code: * * enum x; @@ -182,15 +225,7 @@ static void test_btf_dump_incremental(void) err = btf__add_field(btf, "x", 4, 0, 0); ASSERT_OK(err, "field_ok"); - for (i = 1; i < btf__type_cnt(btf); i++) { - err = btf_dump__dump_type(d, i); - ASSERT_OK(err, "dump_type_ok"); - } - - fflush(dump_buf_file); - dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */ - - ASSERT_STREQ(dump_buf, + test_ctx__dump_and_compare(&t, "enum x;\n" "\n" "enum x {\n" @@ -221,7 +256,7 @@ static void test_btf_dump_incremental(void) * enum values don't conflict; * */ - fseek(dump_buf_file, 0, SEEK_SET); + fseek(t.dump_buf_file, 0, SEEK_SET); id = btf__add_struct(btf, "s", 4); ASSERT_EQ(id, 7, "struct_id"); @@ -232,14 +267,7 @@ static void test_btf_dump_incremental(void) err = btf__add_field(btf, "s", 6, 64, 0); ASSERT_OK(err, "field_ok"); - for (i = 1; i < btf__type_cnt(btf); i++) { - err = btf_dump__dump_type(d, i); - ASSERT_OK(err, "dump_type_ok"); - } - - fflush(dump_buf_file); - dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */ - ASSERT_STREQ(dump_buf, + test_ctx__dump_and_compare(&t, "struct s___2 {\n" " enum x x;\n" " enum {\n" @@ -248,11 +276,53 @@ static void test_btf_dump_incremental(void) " struct s s;\n" "};\n\n" , "c_dump1"); -err_out: - fclose(dump_buf_file); - free(dump_buf); - btf_dump__free(d); - btf__free(btf); + test_ctx__free(&t); +} + +static void test_btf_dump_type_tags(void) +{ + struct test_ctx t = {}; + struct btf *btf; + int id, err; + + if (test_ctx__init(&t)) + return; + + btf = t.btf; + + /* Generate BTF corresponding to the following C code: + * + * struct s { + * void __attribute__((btf_type_tag(\"void_tag\"))) *p1; + * void __attribute__((void_attr)) *p2; + * }; + * + */ + + id = btf__add_type_tag(btf, "void_tag", 0); + ASSERT_EQ(id, 1, "type_tag_id"); + id = btf__add_ptr(btf, id); + ASSERT_EQ(id, 2, "void_ptr_id1"); + + id = btf__add_type_attr(btf, "void_attr", 0); + ASSERT_EQ(id, 3, "type_attr_id"); + id = btf__add_ptr(btf, id); + ASSERT_EQ(id, 4, "void_ptr_id2"); + + id = btf__add_struct(btf, "s", 8); + ASSERT_EQ(id, 5, "struct_id"); + err = btf__add_field(btf, "p1", 2, 0, 0); + ASSERT_OK(err, "field_ok1"); + err = btf__add_field(btf, "p2", 4, 0, 0); + ASSERT_OK(err, "field_ok2"); + + test_ctx__dump_and_compare(&t, +"struct s {\n" +" void __attribute__((btf_type_tag(\"void_tag\"))) *p1;\n" +" void __attribute__((void_attr)) *p2;\n" +"};\n\n", "dump_and_compare"); + + test_ctx__free(&t); } #define STRSIZE 4096 @@ -874,6 +944,9 @@ void test_btf_dump() { if (test__start_subtest("btf_dump: incremental")) test_btf_dump_incremental(); + if (test__start_subtest("btf_dump: type_tags")) + test_btf_dump_type_tags(); + btf = libbpf_find_kernel_btf(); if (!ASSERT_OK_PTR(btf, "no kernel BTF found")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c new file mode 100644 index 000000000000..d4d583872fa2 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "cgroup_preorder.skel.h" + +static int run_getsockopt_test(int cg_parent, int cg_child, int sock_fd, bool all_preorder) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opts); + enum bpf_attach_type prog_c_atype, prog_c2_atype, prog_p_atype, prog_p2_atype; + int prog_c_fd, prog_c2_fd, prog_p_fd, prog_p2_fd; + struct cgroup_preorder *skel = NULL; + struct bpf_program *prog; + __u8 *result, buf; + socklen_t optlen; + int err = 0; + + skel = cgroup_preorder__open_and_load(); + if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load")) + return 0; + + buf = 0x00; + err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1); + if (!ASSERT_OK(err, "setsockopt")) + goto close_skel; + + opts.flags = BPF_F_ALLOW_MULTI; + if (all_preorder) + opts.flags |= BPF_F_PREORDER; + prog = skel->progs.child; + prog_c_fd = bpf_program__fd(prog); + prog_c_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_c_fd, cg_child, prog_c_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-child")) + goto close_skel; + + opts.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER; + prog = skel->progs.child_2; + prog_c2_fd = bpf_program__fd(prog); + prog_c2_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_c2_fd, cg_child, prog_c2_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-child_2")) + goto detach_child; + + optlen = 1; + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (!ASSERT_OK(err, "getsockopt")) + goto detach_child_2; + + result = skel->bss->result; + if (all_preorder) + ASSERT_TRUE(result[0] == 1 && result[1] == 2, "child only"); + else + ASSERT_TRUE(result[0] == 2 && result[1] == 1, "child only"); + + skel->bss->idx = 0; + memset(result, 0, 4); + + opts.flags = BPF_F_ALLOW_MULTI; + if (all_preorder) + opts.flags |= BPF_F_PREORDER; + prog = skel->progs.parent; + prog_p_fd = bpf_program__fd(prog); + prog_p_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent")) + goto detach_child_2; + + opts.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER; + prog = skel->progs.parent_2; + prog_p2_fd = bpf_program__fd(prog); + prog_p2_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2")) + goto detach_parent; + + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (!ASSERT_OK(err, "getsockopt")) + goto detach_parent_2; + + if (all_preorder) + ASSERT_TRUE(result[0] == 3 && result[1] == 4 && result[2] == 1 && result[3] == 2, + "parent and child"); + else + ASSERT_TRUE(result[0] == 4 && result[1] == 2 && result[2] == 1 && result[3] == 3, + "parent and child"); + +detach_parent_2: + ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype), + "bpf_prog_detach2-parent_2"); +detach_parent: + ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype), + "bpf_prog_detach2-parent"); +detach_child_2: + ASSERT_OK(bpf_prog_detach2(prog_c2_fd, cg_child, prog_c2_atype), + "bpf_prog_detach2-child_2"); +detach_child: + ASSERT_OK(bpf_prog_detach2(prog_c_fd, cg_child, prog_c_atype), + "bpf_prog_detach2-child"); +close_skel: + cgroup_preorder__destroy(skel); + return err; +} + +void test_cgroup_preorder(void) +{ + int cg_parent = -1, cg_child = -1, sock_fd = -1; + + cg_parent = test__join_cgroup("/parent"); + if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent")) + goto out; + + cg_child = test__join_cgroup("/parent/child"); + if (!ASSERT_GE(cg_child, 0, "join_cgroup /parent/child")) + goto out; + + sock_fd = socket(AF_INET, SOCK_STREAM, 0); + if (!ASSERT_GE(sock_fd, 0, "socket")) + goto out; + + ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, false), "getsockopt_test_1"); + ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, true), "getsockopt_test_2"); + +out: + close(sock_fd); + close(cg_child); + close(cg_parent); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c index 64abba72ac10..37c1cc52ed98 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c @@ -10,12 +10,18 @@ static int run_test(int cgroup_fd, int server_fd, bool classid) { struct connect4_dropper *skel; - int fd, err = 0; + int fd, err = 0, port; skel = connect4_dropper__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open")) return -1; + port = get_socket_local_port(server_fd); + if (!ASSERT_GE(port, 0, "get_socket_local_port")) + return -1; + + skel->bss->port = ntohs(port); + skel->links.connect_v4_dropper = bpf_program__attach_cgroup(skel->progs.connect_v4_dropper, cgroup_fd); @@ -48,10 +54,9 @@ void test_cgroup_v1v2(void) { struct network_helper_opts opts = {}; int server_fd, client_fd, cgroup_fd; - static const int port = 60120; /* Step 1: Check base connectivity works without any BPF. */ - server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); if (!ASSERT_GE(server_fd, 0, "server_fd")) return; client_fd = connect_to_fd_opts(server_fd, &opts); @@ -66,7 +71,7 @@ void test_cgroup_v1v2(void) cgroup_fd = test__join_cgroup("/connect_dropper"); if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd")) return; - server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); if (!ASSERT_GE(server_fd, 0, "server_fd")) { close(cgroup_fd); return; diff --git a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c deleted file mode 100644 index 7526de379081..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c +++ /dev/null @@ -1,107 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "bpf/libbpf.h" -#include "changes_pkt_data_freplace.skel.h" -#include "changes_pkt_data.skel.h" -#include <test_progs.h> - -static void print_verifier_log(const char *log) -{ - if (env.verbosity >= VERBOSE_VERY) - fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log); -} - -static void test_aux(const char *main_prog_name, - const char *to_be_replaced, - const char *replacement, - bool expect_load) -{ - struct changes_pkt_data_freplace *freplace = NULL; - struct bpf_program *freplace_prog = NULL; - struct bpf_program *main_prog = NULL; - LIBBPF_OPTS(bpf_object_open_opts, opts); - struct changes_pkt_data *main = NULL; - char log[16*1024]; - int err; - - opts.kernel_log_buf = log; - opts.kernel_log_size = sizeof(log); - if (env.verbosity >= VERBOSE_SUPER) - opts.kernel_log_level = 1 | 2 | 4; - main = changes_pkt_data__open_opts(&opts); - if (!ASSERT_OK_PTR(main, "changes_pkt_data__open")) - goto out; - main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name); - if (!ASSERT_OK_PTR(main_prog, "main_prog")) - goto out; - bpf_program__set_autoload(main_prog, true); - err = changes_pkt_data__load(main); - print_verifier_log(log); - if (!ASSERT_OK(err, "changes_pkt_data__load")) - goto out; - freplace = changes_pkt_data_freplace__open_opts(&opts); - if (!ASSERT_OK_PTR(freplace, "changes_pkt_data_freplace__open")) - goto out; - freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement); - if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog")) - goto out; - bpf_program__set_autoload(freplace_prog, true); - bpf_program__set_autoattach(freplace_prog, true); - bpf_program__set_attach_target(freplace_prog, - bpf_program__fd(main_prog), - to_be_replaced); - err = changes_pkt_data_freplace__load(freplace); - print_verifier_log(log); - if (expect_load) { - ASSERT_OK(err, "changes_pkt_data_freplace__load"); - } else { - ASSERT_ERR(err, "changes_pkt_data_freplace__load"); - ASSERT_HAS_SUBSTR(log, "Extension program changes packet data", "error log"); - } - -out: - changes_pkt_data_freplace__destroy(freplace); - changes_pkt_data__destroy(main); -} - -/* There are two global subprograms in both changes_pkt_data.skel.h: - * - one changes packet data; - * - another does not. - * It is ok to freplace subprograms that change packet data with those - * that either do or do not. It is only ok to freplace subprograms - * that do not change packet data with those that do not as well. - * The below tests check outcomes for each combination of such freplace. - * Also test a case when main subprogram itself is replaced and is a single - * subprogram in a program. - */ -void test_changes_pkt_data_freplace(void) -{ - struct { - const char *main; - const char *to_be_replaced; - bool changes; - } mains[] = { - { "main_with_subprogs", "changes_pkt_data", true }, - { "main_with_subprogs", "does_not_change_pkt_data", false }, - { "main_changes", "main_changes", true }, - { "main_does_not_change", "main_does_not_change", false }, - }; - struct { - const char *func; - bool changes; - } replacements[] = { - { "changes_pkt_data", true }, - { "does_not_change_pkt_data", false } - }; - char buf[64]; - - for (int i = 0; i < ARRAY_SIZE(mains); ++i) { - for (int j = 0; j < ARRAY_SIZE(replacements); ++j) { - snprintf(buf, sizeof(buf), "%s_with_%s", - mains[i].to_be_replaced, replacements[j].func); - if (!test__start_subtest(buf)) - continue; - test_aux(mains[i].main, mains[i].to_be_replaced, replacements[j].func, - mains[i].changes || !replacements[j].changes); - } - } -} diff --git a/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c b/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c new file mode 100644 index 000000000000..285f20241fe1 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "compute_live_registers.skel.h" +#include "test_progs.h" + +void test_compute_live_registers(void) +{ + RUN_TESTS(compute_live_registers); +} diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index e10ea92c3fe2..08963c82f30b 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -85,11 +85,11 @@ static int duration = 0; #define NESTING_ERR_CASE(name) { \ NESTING_CASE_COMMON(name), \ .fails = true, \ - .run_btfgen_fails = true, \ + .run_btfgen_fails = true, \ } #define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \ - .a = { [2] = 1 }, \ + .a = { [2] = 1, [3] = 11 }, \ .b = { [1] = { [2] = { [3] = 2 } } }, \ .c = { [1] = { .c = 3 } }, \ .d = { [0] = { [0] = { .d = 4 } } }, \ @@ -108,6 +108,7 @@ static int duration = 0; .input_len = sizeof(struct core_reloc_##name), \ .output = STRUCT_TO_CHAR_PTR(core_reloc_arrays_output) { \ .a2 = 1, \ + .a3 = 12, \ .b123 = 2, \ .c1c = 3, \ .d00d = 4, \ @@ -602,6 +603,7 @@ static const struct core_reloc_test_case test_cases[] = { ARRAYS_ERR_CASE(arrays___err_non_array), ARRAYS_ERR_CASE(arrays___err_wrong_val_type), ARRAYS_ERR_CASE(arrays___err_bad_zero_sz_arr), + ARRAYS_ERR_CASE(arrays___err_bad_signed_arr_elem_sz), /* enum/ptr/int handling scenarios */ PRIMITIVES_CASE(primitives), diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index e58a04654238..6c45330a5ca3 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -25,6 +25,10 @@ static const char * const cpumask_success_testcases[] = { "test_global_mask_nested_deep_rcu", "test_global_mask_nested_deep_array_rcu", "test_cpumask_weight", + "test_refcount_null_tracking", + "test_populate_reject_small_mask", + "test_populate_reject_unaligned", + "test_populate", }; static void verify_success(const char *prog_name) @@ -78,6 +82,5 @@ void test_cpumask(void) verify_success(cpumask_success_testcases[i]); } - RUN_TESTS(cpumask_success); RUN_TESTS(cpumask_failure); } diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index b614a5272dfd..e29cc16124c2 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -10,6 +10,7 @@ enum test_setup_type { SETUP_SYSCALL_SLEEP, SETUP_SKB_PROG, SETUP_SKB_PROG_TP, + SETUP_XDP_PROG, }; static struct { @@ -18,6 +19,8 @@ static struct { } success_tests[] = { {"test_read_write", SETUP_SYSCALL_SLEEP}, {"test_dynptr_data", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_copy", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_copy_xdp", SETUP_XDP_PROG}, {"test_ringbuf", SETUP_SYSCALL_SLEEP}, {"test_skb_readonly", SETUP_SKB_PROG}, {"test_dynptr_skb_data", SETUP_SKB_PROG}, @@ -120,6 +123,24 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ break; } + case SETUP_XDP_PROG: + { + char data[5000]; + int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &data, + .data_size_in = sizeof(data), + .repeat = 1, + ); + + prog_fd = bpf_program__fd(prog); + err = bpf_prog_test_run_opts(prog_fd, &opts); + + if (!ASSERT_OK(err, "test_run")) + goto cleanup; + + break; + } } ASSERT_EQ(skel->bss->err, 0, "err"); diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c index a1d52e73fb16..9add890c2d37 100644 --- a/tools/testing/selftests/bpf/prog_tests/fd_array.c +++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c @@ -83,8 +83,8 @@ static inline int bpf_prog_get_map_ids(int prog_fd, __u32 *nr_map_ids, __u32 *ma int err; memset(&info, 0, len); - info.nr_map_ids = *nr_map_ids, - info.map_ids = ptr_to_u64(map_ids), + info.nr_map_ids = *nr_map_ids; + info.map_ids = ptr_to_u64(map_ids); err = bpf_prog_get_info_by_fd(prog_fd, &info, &len); if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c b/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c new file mode 100644 index 000000000000..568d3aa48a78 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "fexit_noreturns.skel.h" + +void test_fexit_noreturns(void) +{ + RUN_TESTS(fexit_noreturns); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c index 5a0b51157451..43a26ec69a8e 100644 --- a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c +++ b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c @@ -8,11 +8,12 @@ #include <unistd.h> #include <test_progs.h> #include "test_get_xattr.skel.h" +#include "test_set_remove_xattr.skel.h" #include "test_fsverity.skel.h" static const char testfile[] = "/tmp/test_progs_fs_kfuncs"; -static void test_xattr(void) +static void test_get_xattr(const char *name, const char *value, bool allow_access) { struct test_get_xattr *skel = NULL; int fd = -1, err; @@ -25,7 +26,7 @@ static void test_xattr(void) close(fd); fd = -1; - err = setxattr(testfile, "user.kfuncs", "hello", sizeof("hello"), 0); + err = setxattr(testfile, name, value, strlen(value) + 1, 0); if (err && errno == EOPNOTSUPP) { printf("%s:SKIP:local fs doesn't support xattr (%d)\n" "To run this test, make sure /tmp filesystem supports xattr.\n", @@ -48,16 +49,23 @@ static void test_xattr(void) goto out; fd = open(testfile, O_RDONLY, 0644); + if (!ASSERT_GE(fd, 0, "open_file")) goto out; - ASSERT_EQ(skel->bss->found_xattr_from_file, 1, "found_xattr_from_file"); - /* Trigger security_inode_getxattr */ - err = getxattr(testfile, "user.kfuncs", v, sizeof(v)); - ASSERT_EQ(err, -1, "getxattr_return"); - ASSERT_EQ(errno, EINVAL, "getxattr_errno"); - ASSERT_EQ(skel->bss->found_xattr_from_dentry, 1, "found_xattr_from_dentry"); + err = getxattr(testfile, name, v, sizeof(v)); + + if (allow_access) { + ASSERT_EQ(err, -1, "getxattr_return"); + ASSERT_EQ(errno, EINVAL, "getxattr_errno"); + ASSERT_EQ(skel->bss->found_xattr_from_file, 1, "found_xattr_from_file"); + ASSERT_EQ(skel->bss->found_xattr_from_dentry, 1, "found_xattr_from_dentry"); + } else { + ASSERT_EQ(err, strlen(value) + 1, "getxattr_return"); + ASSERT_EQ(skel->bss->found_xattr_from_file, 0, "found_xattr_from_file"); + ASSERT_EQ(skel->bss->found_xattr_from_dentry, 0, "found_xattr_from_dentry"); + } out: close(fd); @@ -65,6 +73,127 @@ out: remove(testfile); } +/* xattr value we will set to security.bpf.foo */ +static const char value_foo[] = "hello"; + +static void read_and_validate_foo(struct test_set_remove_xattr *skel) +{ + char value_out[32]; + int err; + + err = getxattr(testfile, skel->rodata->xattr_foo, value_out, sizeof(value_out)); + ASSERT_EQ(err, sizeof(value_foo), "getxattr size foo"); + ASSERT_EQ(strncmp(value_out, value_foo, sizeof(value_foo)), 0, "strncmp value_foo"); +} + +static void set_foo(struct test_set_remove_xattr *skel) +{ + ASSERT_OK(setxattr(testfile, skel->rodata->xattr_foo, value_foo, strlen(value_foo) + 1, 0), + "setxattr foo"); +} + +static void validate_bar_match(struct test_set_remove_xattr *skel) +{ + char value_out[32]; + int err; + + err = getxattr(testfile, skel->rodata->xattr_bar, value_out, sizeof(value_out)); + ASSERT_EQ(err, sizeof(skel->data->value_bar), "getxattr size bar"); + ASSERT_EQ(strncmp(value_out, skel->data->value_bar, sizeof(skel->data->value_bar)), 0, + "strncmp value_bar"); +} + +static void validate_bar_removed(struct test_set_remove_xattr *skel) +{ + char value_out[32]; + int err; + + err = getxattr(testfile, skel->rodata->xattr_bar, value_out, sizeof(value_out)); + ASSERT_LT(err, 0, "getxattr size bar should fail"); +} + +static void test_set_remove_xattr(void) +{ + struct test_set_remove_xattr *skel = NULL; + int fd = -1, err; + + fd = open(testfile, O_CREAT | O_RDONLY, 0644); + if (!ASSERT_GE(fd, 0, "create_file")) + return; + + close(fd); + fd = -1; + + skel = test_set_remove_xattr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_set_remove_xattr__open_and_load")) + return; + + /* Set security.bpf.foo to "hello" */ + err = setxattr(testfile, skel->rodata->xattr_foo, value_foo, strlen(value_foo) + 1, 0); + if (err && errno == EOPNOTSUPP) { + printf("%s:SKIP:local fs doesn't support xattr (%d)\n" + "To run this test, make sure /tmp filesystem supports xattr.\n", + __func__, errno); + test__skip(); + goto out; + } + + if (!ASSERT_OK(err, "setxattr")) + goto out; + + skel->bss->monitored_pid = getpid(); + err = test_set_remove_xattr__attach(skel); + if (!ASSERT_OK(err, "test_set_remove_xattr__attach")) + goto out; + + /* First, test not _locked version of the kfuncs with getxattr. */ + + /* Read security.bpf.foo and trigger test_inode_getxattr. This + * bpf program will set security.bpf.bar to "world". + */ + read_and_validate_foo(skel); + validate_bar_match(skel); + + /* Read security.bpf.foo and trigger test_inode_getxattr again. + * This will remove xattr security.bpf.bar. + */ + read_and_validate_foo(skel); + validate_bar_removed(skel); + + ASSERT_TRUE(skel->bss->set_security_bpf_bar_success, "set_security_bpf_bar_success"); + ASSERT_TRUE(skel->bss->remove_security_bpf_bar_success, "remove_security_bpf_bar_success"); + ASSERT_TRUE(skel->bss->set_security_selinux_fail, "set_security_selinux_fail"); + ASSERT_TRUE(skel->bss->remove_security_selinux_fail, "remove_security_selinux_fail"); + + /* Second, test _locked version of the kfuncs, with setxattr */ + + /* Set security.bpf.foo and trigger test_inode_setxattr. This + * bpf program will set security.bpf.bar to "world". + */ + set_foo(skel); + validate_bar_match(skel); + + /* Set security.bpf.foo and trigger test_inode_setxattr again. + * This will remove xattr security.bpf.bar. + */ + set_foo(skel); + validate_bar_removed(skel); + + ASSERT_TRUE(skel->bss->locked_set_security_bpf_bar_success, + "locked_set_security_bpf_bar_success"); + ASSERT_TRUE(skel->bss->locked_remove_security_bpf_bar_success, + "locked_remove_security_bpf_bar_success"); + ASSERT_TRUE(skel->bss->locked_set_security_selinux_fail, + "locked_set_security_selinux_fail"); + ASSERT_TRUE(skel->bss->locked_remove_security_selinux_fail, + "locked_remove_security_selinux_fail"); + +out: + close(fd); + test_set_remove_xattr__destroy(skel); + remove(testfile); +} + #ifndef SHA256_DIGEST_SIZE #define SHA256_DIGEST_SIZE 32 #endif @@ -141,8 +270,21 @@ out: void test_fs_kfuncs(void) { - if (test__start_subtest("xattr")) - test_xattr(); + /* Matches xattr_names in progs/test_get_xattr.c */ + if (test__start_subtest("user_xattr")) + test_get_xattr("user.kfuncs", "hello", true); + + if (test__start_subtest("security_bpf_xattr")) + test_get_xattr("security.bpf.xxx", "hello", true); + + if (test__start_subtest("security_bpf_xattr_error")) + test_get_xattr("security.bpf", "hello", false); + + if (test__start_subtest("security_selinux_xattr_error")) + test_get_xattr("security.selinux", "hello", false); + + if (test__start_subtest("set_remove_xattr")) + test_set_remove_xattr(); if (test__start_subtest("fsverity")) test_fsverity(); diff --git a/tools/testing/selftests/bpf/prog_tests/kernel_flag.c b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c new file mode 100644 index 000000000000..a133354ac9bc --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Microsoft */ +#include <test_progs.h> +#include "kfunc_call_test.skel.h" +#include "kfunc_call_test.lskel.h" +#include "test_kernel_flag.skel.h" + +void test_kernel_flag(void) +{ + struct test_kernel_flag *lsm_skel; + struct kfunc_call_test *skel = NULL; + struct kfunc_call_test_lskel *lskel = NULL; + int ret; + + lsm_skel = test_kernel_flag__open_and_load(); + if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel")) + return; + + lsm_skel->bss->monitored_tid = gettid(); + + ret = test_kernel_flag__attach(lsm_skel); + if (!ASSERT_OK(ret, "test_kernel_flag__attach")) + goto close_prog; + + /* Test with skel. This should pass the gatekeeper */ + skel = kfunc_call_test__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel")) + goto close_prog; + + /* Test with lskel. This should fail due to blocking kernel-based bpf() invocations */ + lskel = kfunc_call_test_lskel__open_and_load(); + if (!ASSERT_ERR_PTR(lskel, "lskel")) + goto close_prog; + +close_prog: + if (skel) + kfunc_call_test__destroy(skel); + if (lskel) + kfunc_call_test_lskel__destroy(lskel); + + lsm_skel->bss->monitored_tid = 0; + test_kernel_flag__destroy(lsm_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h index fb1eb8c67361..ccec0fcdabc1 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h @@ -5,7 +5,6 @@ #include <time.h> #include <net/if.h> -#include <linux/if_tun.h> #include <linux/icmp.h> #include "test_progs.h" @@ -37,34 +36,6 @@ static inline int netns_delete(void) return system("ip netns del " NETNS ">/dev/null 2>&1"); } -static int open_tuntap(const char *dev_name, bool need_mac) -{ - int err = 0; - struct ifreq ifr; - int fd = open("/dev/net/tun", O_RDWR); - - if (!ASSERT_GT(fd, 0, "open(/dev/net/tun)")) - return -1; - - ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN); - strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1); - ifr.ifr_name[IFNAMSIZ - 1] = '\0'; - - err = ioctl(fd, TUNSETIFF, &ifr); - if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) { - close(fd); - return -1; - } - - err = fcntl(fd, F_SETFL, O_NONBLOCK); - if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) { - close(fd); - return -1; - } - - return fd; -} - #define ICMP_PAYLOAD_SIZE 100 /* Match an ICMP packet with payload len ICMP_PAYLOAD_SIZE */ diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c b/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c new file mode 100644 index 000000000000..b6391af5f6f9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c @@ -0,0 +1,540 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <netinet/in.h> + +#include "network_helpers.h" +#include "test_progs.h" + +#define BPF_FILE "test_lwt_ip_encap.bpf.o" + +#define NETNS_NAME_SIZE 32 +#define NETNS_BASE "ns-lwt-ip-encap" + +#define IP4_ADDR_1 "172.16.1.100" +#define IP4_ADDR_2 "172.16.2.100" +#define IP4_ADDR_3 "172.16.3.100" +#define IP4_ADDR_4 "172.16.4.100" +#define IP4_ADDR_5 "172.16.5.100" +#define IP4_ADDR_6 "172.16.6.100" +#define IP4_ADDR_7 "172.16.7.100" +#define IP4_ADDR_8 "172.16.8.100" +#define IP4_ADDR_GRE "172.16.16.100" + +#define IP4_ADDR_SRC IP4_ADDR_1 +#define IP4_ADDR_DST IP4_ADDR_4 + +#define IP6_ADDR_1 "fb01::1" +#define IP6_ADDR_2 "fb02::1" +#define IP6_ADDR_3 "fb03::1" +#define IP6_ADDR_4 "fb04::1" +#define IP6_ADDR_5 "fb05::1" +#define IP6_ADDR_6 "fb06::1" +#define IP6_ADDR_7 "fb07::1" +#define IP6_ADDR_8 "fb08::1" +#define IP6_ADDR_GRE "fb10::1" + +#define IP6_ADDR_SRC IP6_ADDR_1 +#define IP6_ADDR_DST IP6_ADDR_4 + +/* Setup/topology: + * + * NS1 NS2 NS3 + * veth1 <---> veth2 veth3 <---> veth4 (the top route) + * veth5 <---> veth6 veth7 <---> veth8 (the bottom route) + * + * Each vethN gets IP[4|6]_ADDR_N address. + * + * IP*_ADDR_SRC = IP*_ADDR_1 + * IP*_ADDR_DST = IP*_ADDR_4 + * + * All tests test pings from IP*_ADDR__SRC to IP*_ADDR_DST. + * + * By default, routes are configured to allow packets to go + * IP*_ADDR_1 <=> IP*_ADDR_2 <=> IP*_ADDR_3 <=> IP*_ADDR_4 (the top route). + * + * A GRE device is installed in NS3 with IP*_ADDR_GRE, and + * NS1/NS2 are configured to route packets to IP*_ADDR_GRE via IP*_ADDR_8 + * (the bottom route). + * + * Tests: + * + * 1. Routes NS2->IP*_ADDR_DST are brought down, so the only way a ping + * from IP*_ADDR_SRC to IP*_ADDR_DST can work is via IP*_ADDR_GRE. + * + * 2a. In an egress test, a bpf LWT_XMIT program is installed on veth1 + * that encaps the packets with an IP/GRE header to route to IP*_ADDR_GRE. + * + * ping: SRC->[encap at veth1:egress]->GRE:decap->DST + * ping replies go DST->SRC directly + * + * 2b. In an ingress test, a bpf LWT_IN program is installed on veth2 + * that encaps the packets with an IP/GRE header to route to IP*_ADDR_GRE. + * + * ping: SRC->[encap at veth2:ingress]->GRE:decap->DST + * ping replies go DST->SRC directly + */ + +static int create_ns(char *name, size_t name_sz) +{ + if (!name) + goto fail; + + if (!ASSERT_OK(append_tid(name, name_sz), "append TID")) + goto fail; + + SYS(fail, "ip netns add %s", name); + + /* rp_filter gets confused by what these tests are doing, so disable it */ + SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.conf.all.rp_filter=0", name); + SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.conf.default.rp_filter=0", name); + /* Disable IPv6 DAD because it sometimes takes too long and fails tests */ + SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.all.accept_dad=0", name); + SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.default.accept_dad=0", name); + + return 0; +fail: + return -1; +} + +static int set_top_addr(const char *ns1, const char *ns2, const char *ns3) +{ + SYS(fail, "ip -n %s a add %s/24 dev veth1", ns1, IP4_ADDR_1); + SYS(fail, "ip -n %s a add %s/24 dev veth2", ns2, IP4_ADDR_2); + SYS(fail, "ip -n %s a add %s/24 dev veth3", ns2, IP4_ADDR_3); + SYS(fail, "ip -n %s a add %s/24 dev veth4", ns3, IP4_ADDR_4); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth1", ns1, IP6_ADDR_1); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth2", ns2, IP6_ADDR_2); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth3", ns2, IP6_ADDR_3); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth4", ns3, IP6_ADDR_4); + + SYS(fail, "ip -n %s link set dev veth1 up", ns1); + SYS(fail, "ip -n %s link set dev veth2 up", ns2); + SYS(fail, "ip -n %s link set dev veth3 up", ns2); + SYS(fail, "ip -n %s link set dev veth4 up", ns3); + + return 0; +fail: + return 1; +} + +static int set_bottom_addr(const char *ns1, const char *ns2, const char *ns3) +{ + SYS(fail, "ip -n %s a add %s/24 dev veth5", ns1, IP4_ADDR_5); + SYS(fail, "ip -n %s a add %s/24 dev veth6", ns2, IP4_ADDR_6); + SYS(fail, "ip -n %s a add %s/24 dev veth7", ns2, IP4_ADDR_7); + SYS(fail, "ip -n %s a add %s/24 dev veth8", ns3, IP4_ADDR_8); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth5", ns1, IP6_ADDR_5); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth6", ns2, IP6_ADDR_6); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth7", ns2, IP6_ADDR_7); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth8", ns3, IP6_ADDR_8); + + SYS(fail, "ip -n %s link set dev veth5 up", ns1); + SYS(fail, "ip -n %s link set dev veth6 up", ns2); + SYS(fail, "ip -n %s link set dev veth7 up", ns2); + SYS(fail, "ip -n %s link set dev veth8 up", ns3); + + return 0; +fail: + return 1; +} + +static int configure_vrf(const char *ns1, const char *ns2) +{ + if (!ns1 || !ns2) + goto fail; + + SYS(fail, "ip -n %s link add red type vrf table 1001", ns1); + SYS(fail, "ip -n %s link set red up", ns1); + SYS(fail, "ip -n %s route add table 1001 unreachable default metric 8192", ns1); + SYS(fail, "ip -n %s -6 route add table 1001 unreachable default metric 8192", ns1); + SYS(fail, "ip -n %s link set veth1 vrf red", ns1); + SYS(fail, "ip -n %s link set veth5 vrf red", ns1); + + SYS(fail, "ip -n %s link add red type vrf table 1001", ns2); + SYS(fail, "ip -n %s link set red up", ns2); + SYS(fail, "ip -n %s route add table 1001 unreachable default metric 8192", ns2); + SYS(fail, "ip -n %s -6 route add table 1001 unreachable default metric 8192", ns2); + SYS(fail, "ip -n %s link set veth2 vrf red", ns2); + SYS(fail, "ip -n %s link set veth3 vrf red", ns2); + SYS(fail, "ip -n %s link set veth6 vrf red", ns2); + SYS(fail, "ip -n %s link set veth7 vrf red", ns2); + + return 0; +fail: + return -1; +} + +static int configure_ns1(const char *ns1, const char *vrf) +{ + struct nstoken *nstoken = NULL; + + if (!ns1 || !vrf) + goto fail; + + nstoken = open_netns(ns1); + if (!ASSERT_OK_PTR(nstoken, "open ns1")) + goto fail; + + /* Top route */ + SYS(fail, "ip route add %s/32 dev veth1 %s", IP4_ADDR_2, vrf); + SYS(fail, "ip route add default dev veth1 via %s %s", IP4_ADDR_2, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth1 %s", IP6_ADDR_2, vrf); + SYS(fail, "ip -6 route add default dev veth1 via %s %s", IP6_ADDR_2, vrf); + /* Bottom route */ + SYS(fail, "ip route add %s/32 dev veth5 %s", IP4_ADDR_6, vrf); + SYS(fail, "ip route add %s/32 dev veth5 via %s %s", IP4_ADDR_7, IP4_ADDR_6, vrf); + SYS(fail, "ip route add %s/32 dev veth5 via %s %s", IP4_ADDR_8, IP4_ADDR_6, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth5 %s", IP6_ADDR_6, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth5 via %s %s", IP6_ADDR_7, IP6_ADDR_6, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth5 via %s %s", IP6_ADDR_8, IP6_ADDR_6, vrf); + + close_netns(nstoken); + return 0; +fail: + close_netns(nstoken); + return -1; +} + +static int configure_ns2(const char *ns2, const char *vrf) +{ + struct nstoken *nstoken = NULL; + + if (!ns2 || !vrf) + goto fail; + + nstoken = open_netns(ns2); + if (!ASSERT_OK_PTR(nstoken, "open ns2")) + goto fail; + + SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.ip_forward=1", ns2); + SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.all.forwarding=1", ns2); + + /* Top route */ + SYS(fail, "ip route add %s/32 dev veth2 %s", IP4_ADDR_1, vrf); + SYS(fail, "ip route add %s/32 dev veth3 %s", IP4_ADDR_4, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth2 %s", IP6_ADDR_1, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth3 %s", IP6_ADDR_4, vrf); + /* Bottom route */ + SYS(fail, "ip route add %s/32 dev veth6 %s", IP4_ADDR_5, vrf); + SYS(fail, "ip route add %s/32 dev veth7 %s", IP4_ADDR_8, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth6 %s", IP6_ADDR_5, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth7 %s", IP6_ADDR_8, vrf); + + close_netns(nstoken); + return 0; +fail: + close_netns(nstoken); + return -1; +} + +static int configure_ns3(const char *ns3) +{ + struct nstoken *nstoken = NULL; + + if (!ns3) + goto fail; + + nstoken = open_netns(ns3); + if (!ASSERT_OK_PTR(nstoken, "open ns3")) + goto fail; + + /* Top route */ + SYS(fail, "ip route add %s/32 dev veth4", IP4_ADDR_3); + SYS(fail, "ip route add %s/32 dev veth4 via %s", IP4_ADDR_1, IP4_ADDR_3); + SYS(fail, "ip route add %s/32 dev veth4 via %s", IP4_ADDR_2, IP4_ADDR_3); + SYS(fail, "ip -6 route add %s/128 dev veth4", IP6_ADDR_3); + SYS(fail, "ip -6 route add %s/128 dev veth4 via %s", IP6_ADDR_1, IP6_ADDR_3); + SYS(fail, "ip -6 route add %s/128 dev veth4 via %s", IP6_ADDR_2, IP6_ADDR_3); + /* Bottom route */ + SYS(fail, "ip route add %s/32 dev veth8", IP4_ADDR_7); + SYS(fail, "ip route add %s/32 dev veth8 via %s", IP4_ADDR_5, IP4_ADDR_7); + SYS(fail, "ip route add %s/32 dev veth8 via %s", IP4_ADDR_6, IP4_ADDR_7); + SYS(fail, "ip -6 route add %s/128 dev veth8", IP6_ADDR_7); + SYS(fail, "ip -6 route add %s/128 dev veth8 via %s", IP6_ADDR_5, IP6_ADDR_7); + SYS(fail, "ip -6 route add %s/128 dev veth8 via %s", IP6_ADDR_6, IP6_ADDR_7); + + /* Configure IPv4 GRE device */ + SYS(fail, "ip tunnel add gre_dev mode gre remote %s local %s ttl 255", + IP4_ADDR_1, IP4_ADDR_GRE); + SYS(fail, "ip link set gre_dev up"); + SYS(fail, "ip a add %s dev gre_dev", IP4_ADDR_GRE); + + /* Configure IPv6 GRE device */ + SYS(fail, "ip tunnel add gre6_dev mode ip6gre remote %s local %s ttl 255", + IP6_ADDR_1, IP6_ADDR_GRE); + SYS(fail, "ip link set gre6_dev up"); + SYS(fail, "ip a add %s dev gre6_dev", IP6_ADDR_GRE); + + close_netns(nstoken); + return 0; +fail: + close_netns(nstoken); + return -1; +} + +static int setup_network(char *ns1, char *ns2, char *ns3, const char *vrf) +{ + if (!ns1 || !ns2 || !ns3 || !vrf) + goto fail; + + SYS(fail, "ip -n %s link add veth1 type veth peer name veth2 netns %s", ns1, ns2); + SYS(fail, "ip -n %s link add veth3 type veth peer name veth4 netns %s", ns2, ns3); + SYS(fail, "ip -n %s link add veth5 type veth peer name veth6 netns %s", ns1, ns2); + SYS(fail, "ip -n %s link add veth7 type veth peer name veth8 netns %s", ns2, ns3); + + if (vrf[0]) { + if (!ASSERT_OK(configure_vrf(ns1, ns2), "configure vrf")) + goto fail; + } + if (!ASSERT_OK(set_top_addr(ns1, ns2, ns3), "set top addresses")) + goto fail; + + if (!ASSERT_OK(set_bottom_addr(ns1, ns2, ns3), "set bottom addresses")) + goto fail; + + if (!ASSERT_OK(configure_ns1(ns1, vrf), "configure ns1 routes")) + goto fail; + + if (!ASSERT_OK(configure_ns2(ns2, vrf), "configure ns2 routes")) + goto fail; + + if (!ASSERT_OK(configure_ns3(ns3), "configure ns3 routes")) + goto fail; + + /* Link bottom route to the GRE tunnels */ + SYS(fail, "ip -n %s route add %s/32 dev veth5 via %s %s", + ns1, IP4_ADDR_GRE, IP4_ADDR_6, vrf); + SYS(fail, "ip -n %s route add %s/32 dev veth7 via %s %s", + ns2, IP4_ADDR_GRE, IP4_ADDR_8, vrf); + SYS(fail, "ip -n %s -6 route add %s/128 dev veth5 via %s %s", + ns1, IP6_ADDR_GRE, IP6_ADDR_6, vrf); + SYS(fail, "ip -n %s -6 route add %s/128 dev veth7 via %s %s", + ns2, IP6_ADDR_GRE, IP6_ADDR_8, vrf); + + return 0; +fail: + return -1; +} + +static int remove_routes_to_gredev(const char *ns1, const char *ns2, const char *vrf) +{ + SYS(fail, "ip -n %s route del %s dev veth5 %s", ns1, IP4_ADDR_GRE, vrf); + SYS(fail, "ip -n %s route del %s dev veth7 %s", ns2, IP4_ADDR_GRE, vrf); + SYS(fail, "ip -n %s -6 route del %s/128 dev veth5 %s", ns1, IP6_ADDR_GRE, vrf); + SYS(fail, "ip -n %s -6 route del %s/128 dev veth7 %s", ns2, IP6_ADDR_GRE, vrf); + + return 0; +fail: + return -1; +} + +static int add_unreachable_routes_to_gredev(const char *ns1, const char *ns2, const char *vrf) +{ + SYS(fail, "ip -n %s route add unreachable %s/32 %s", ns1, IP4_ADDR_GRE, vrf); + SYS(fail, "ip -n %s route add unreachable %s/32 %s", ns2, IP4_ADDR_GRE, vrf); + SYS(fail, "ip -n %s -6 route add unreachable %s/128 %s", ns1, IP6_ADDR_GRE, vrf); + SYS(fail, "ip -n %s -6 route add unreachable %s/128 %s", ns2, IP6_ADDR_GRE, vrf); + + return 0; +fail: + return -1; +} + +#define GSO_SIZE 5000 +#define GSO_TCP_PORT 9000 +/* This tests the fix from commit ea0371f78799 ("net: fix GSO in bpf_lwt_push_ip_encap") */ +static int test_gso_fix(const char *ns1, const char *ns3, int family) +{ + const char *ip_addr = family == AF_INET ? IP4_ADDR_DST : IP6_ADDR_DST; + char gso_packet[GSO_SIZE] = {}; + struct nstoken *nstoken = NULL; + int sfd, cfd, afd; + ssize_t bytes; + int ret = -1; + + if (!ns1 || !ns3) + return ret; + + nstoken = open_netns(ns3); + if (!ASSERT_OK_PTR(nstoken, "open ns3")) + return ret; + + sfd = start_server_str(family, SOCK_STREAM, ip_addr, GSO_TCP_PORT, NULL); + if (!ASSERT_OK_FD(sfd, "start server")) + goto close_netns; + + close_netns(nstoken); + + nstoken = open_netns(ns1); + if (!ASSERT_OK_PTR(nstoken, "open ns1")) + goto close_server; + + cfd = connect_to_addr_str(family, SOCK_STREAM, ip_addr, GSO_TCP_PORT, NULL); + if (!ASSERT_OK_FD(cfd, "connect to server")) + goto close_server; + + close_netns(nstoken); + nstoken = NULL; + + afd = accept(sfd, NULL, NULL); + if (!ASSERT_OK_FD(afd, "accept")) + goto close_client; + + /* Send a packet larger than MTU */ + bytes = send(cfd, gso_packet, GSO_SIZE, 0); + if (!ASSERT_EQ(bytes, GSO_SIZE, "send packet")) + goto close_accept; + + /* Verify we received all expected bytes */ + bytes = read(afd, gso_packet, GSO_SIZE); + if (!ASSERT_EQ(bytes, GSO_SIZE, "receive packet")) + goto close_accept; + + ret = 0; + +close_accept: + close(afd); +close_client: + close(cfd); +close_server: + close(sfd); +close_netns: + close_netns(nstoken); + + return ret; +} + +static int check_ping_ok(const char *ns1) +{ + SYS(fail, "ip netns exec %s ping -c 1 -W1 -I veth1 %s > /dev/null", ns1, IP4_ADDR_DST); + SYS(fail, "ip netns exec %s ping6 -c 1 -W1 -I veth1 %s > /dev/null", ns1, IP6_ADDR_DST); + return 0; +fail: + return -1; +} + +static int check_ping_fails(const char *ns1) +{ + int ret; + + ret = SYS_NOFAIL("ip netns exec %s ping -c 1 -W1 -I veth1 %s", ns1, IP4_ADDR_DST); + if (!ret) + return -1; + + ret = SYS_NOFAIL("ip netns exec %s ping6 -c 1 -W1 -I veth1 %s", ns1, IP6_ADDR_DST); + if (!ret) + return -1; + + return 0; +} + +#define EGRESS true +#define INGRESS false +#define IPV4_ENCAP true +#define IPV6_ENCAP false +static void lwt_ip_encap(bool ipv4_encap, bool egress, const char *vrf) +{ + char ns1[NETNS_NAME_SIZE] = NETNS_BASE "-1-"; + char ns2[NETNS_NAME_SIZE] = NETNS_BASE "-2-"; + char ns3[NETNS_NAME_SIZE] = NETNS_BASE "-3-"; + char *sec = ipv4_encap ? "encap_gre" : "encap_gre6"; + + if (!vrf) + return; + + if (!ASSERT_OK(create_ns(ns1, NETNS_NAME_SIZE), "create ns1")) + goto out; + if (!ASSERT_OK(create_ns(ns2, NETNS_NAME_SIZE), "create ns2")) + goto out; + if (!ASSERT_OK(create_ns(ns3, NETNS_NAME_SIZE), "create ns3")) + goto out; + + if (!ASSERT_OK(setup_network(ns1, ns2, ns3, vrf), "setup network")) + goto out; + + /* By default, pings work */ + if (!ASSERT_OK(check_ping_ok(ns1), "ping OK")) + goto out; + + /* Remove NS2->DST routes, ping fails */ + SYS(out, "ip -n %s route del %s/32 dev veth3 %s", ns2, IP4_ADDR_DST, vrf); + SYS(out, "ip -n %s -6 route del %s/128 dev veth3 %s", ns2, IP6_ADDR_DST, vrf); + if (!ASSERT_OK(check_ping_fails(ns1), "ping expected fail")) + goto out; + + /* Install replacement routes (LWT/eBPF), pings succeed */ + if (egress) { + SYS(out, "ip -n %s route add %s encap bpf xmit obj %s sec %s dev veth1 %s", + ns1, IP4_ADDR_DST, BPF_FILE, sec, vrf); + SYS(out, "ip -n %s -6 route add %s encap bpf xmit obj %s sec %s dev veth1 %s", + ns1, IP6_ADDR_DST, BPF_FILE, sec, vrf); + } else { + SYS(out, "ip -n %s route add %s encap bpf in obj %s sec %s dev veth2 %s", + ns2, IP4_ADDR_DST, BPF_FILE, sec, vrf); + SYS(out, "ip -n %s -6 route add %s encap bpf in obj %s sec %s dev veth2 %s", + ns2, IP6_ADDR_DST, BPF_FILE, sec, vrf); + } + + if (!ASSERT_OK(check_ping_ok(ns1), "ping OK")) + goto out; + + /* Skip GSO tests with VRF: VRF routing needs properly assigned + * source IP/device, which is easy to do with ping but hard with TCP. + */ + if (egress && !vrf[0]) { + if (!ASSERT_OK(test_gso_fix(ns1, ns3, AF_INET), "test GSO")) + goto out; + } + + /* Negative test: remove routes to GRE devices: ping fails */ + if (!ASSERT_OK(remove_routes_to_gredev(ns1, ns2, vrf), "remove routes to gredev")) + goto out; + if (!ASSERT_OK(check_ping_fails(ns1), "ping expected fail")) + goto out; + + /* Another negative test */ + if (!ASSERT_OK(add_unreachable_routes_to_gredev(ns1, ns2, vrf), + "add unreachable routes")) + goto out; + ASSERT_OK(check_ping_fails(ns1), "ping expected fail"); + +out: + SYS_NOFAIL("ip netns del %s", ns1); + SYS_NOFAIL("ip netns del %s", ns2); + SYS_NOFAIL("ip netns del %s", ns3); +} + +void test_lwt_ip_encap_vrf_ipv6(void) +{ + if (test__start_subtest("egress")) + lwt_ip_encap(IPV6_ENCAP, EGRESS, "vrf red"); + + if (test__start_subtest("ingress")) + lwt_ip_encap(IPV6_ENCAP, INGRESS, "vrf red"); +} + +void test_lwt_ip_encap_vrf_ipv4(void) +{ + if (test__start_subtest("egress")) + lwt_ip_encap(IPV4_ENCAP, EGRESS, "vrf red"); + + if (test__start_subtest("ingress")) + lwt_ip_encap(IPV4_ENCAP, INGRESS, "vrf red"); +} + +void test_lwt_ip_encap_ipv6(void) +{ + if (test__start_subtest("egress")) + lwt_ip_encap(IPV6_ENCAP, EGRESS, ""); + + if (test__start_subtest("ingress")) + lwt_ip_encap(IPV6_ENCAP, INGRESS, ""); +} + +void test_lwt_ip_encap_ipv4(void) +{ + if (test__start_subtest("egress")) + lwt_ip_encap(IPV4_ENCAP, EGRESS, ""); + + if (test__start_subtest("ingress")) + lwt_ip_encap(IPV4_ENCAP, INGRESS, ""); +} diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c new file mode 100644 index 000000000000..3bc730b7c7fa --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Connects 6 network namespaces through veths. + * Each NS may have different IPv6 global scope addresses : + * + * NS1 NS2 NS3 NS4 NS5 NS6 + * lo veth1 <-> veth2 veth3 <-> veth4 veth5 <-> veth6 lo veth7 <-> veth8 veth9 <-> veth10 lo + * fb00 ::1 ::12 ::21 ::34 ::43 ::56 ::65 ::78 ::87 ::910 ::109 ::6 + * fd00 ::4 + * fc42 ::1 + * + * All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a + * IPv6 header with a Segment Routing Header, with segments : + * fd00::1 -> fd00::2 -> fd00::3 -> fd00::4 + * + * 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions : + * - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1 + * - fd00::2 : remove the TLV, change the flags, add a tag + * - fd00::3 : apply an End.T action to fd00::4, through routing table 117 + * + * fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet. + * Each End.BPF action will validate the operations applied on the SRH by the + * previous BPF program in the chain, otherwise the packet is dropped. + * + * An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this + * datagram can be read on NS6 when binding to fb00::6. + */ + +#include "network_helpers.h" +#include "test_progs.h" + +#define NETNS_BASE "lwt-seg6local-" +#define BPF_FILE "test_lwt_seg6local.bpf.o" + +static void cleanup(void) +{ + int ns; + + for (ns = 1; ns < 7; ns++) + SYS_NOFAIL("ip netns del %s%d", NETNS_BASE, ns); +} + +static int setup(void) +{ + int ns; + + for (ns = 1; ns < 7; ns++) + SYS(fail, "ip netns add %s%d", NETNS_BASE, ns); + + SYS(fail, "ip -n %s6 link set dev lo up", NETNS_BASE); + + for (ns = 1; ns < 6; ns++) { + int local_id = ns * 2 - 1; + int peer_id = ns * 2; + int next_ns = ns + 1; + + SYS(fail, "ip -n %s%d link add veth%d type veth peer name veth%d netns %s%d", + NETNS_BASE, ns, local_id, peer_id, NETNS_BASE, next_ns); + + SYS(fail, "ip -n %s%d link set dev veth%d up", NETNS_BASE, ns, local_id); + SYS(fail, "ip -n %s%d link set dev veth%d up", NETNS_BASE, next_ns, peer_id); + + /* All link scope addresses to veths */ + SYS(fail, "ip -n %s%d -6 addr add fb00::%d%d/16 dev veth%d scope link", + NETNS_BASE, ns, local_id, peer_id, local_id); + SYS(fail, "ip -n %s%d -6 addr add fb00::%d%d/16 dev veth%d scope link", + NETNS_BASE, next_ns, peer_id, local_id, peer_id); + } + + + SYS(fail, "ip -n %s5 -6 route add fb00::109 table 117 dev veth9 scope link", NETNS_BASE); + + SYS(fail, "ip -n %s1 -6 addr add fb00::1/16 dev lo", NETNS_BASE); + SYS(fail, "ip -n %s1 -6 route add fb00::6 dev veth1 via fb00::21", NETNS_BASE); + + SYS(fail, "ip -n %s2 -6 route add fb00::6 encap bpf in obj %s sec encap_srh dev veth2", + NETNS_BASE, BPF_FILE); + SYS(fail, "ip -n %s2 -6 route add fd00::1 dev veth3 via fb00::43 scope link", NETNS_BASE); + + SYS(fail, "ip -n %s3 -6 route add fc42::1 dev veth5 via fb00::65", NETNS_BASE); + SYS(fail, + "ip -n %s3 -6 route add fd00::1 encap seg6local action End.BPF endpoint obj %s sec add_egr_x dev veth4", + NETNS_BASE, BPF_FILE); + + SYS(fail, + "ip -n %s4 -6 route add fd00::2 encap seg6local action End.BPF endpoint obj %s sec pop_egr dev veth6", + NETNS_BASE, BPF_FILE); + SYS(fail, "ip -n %s4 -6 addr add fc42::1 dev lo", NETNS_BASE); + SYS(fail, "ip -n %s4 -6 route add fd00::3 dev veth7 via fb00::87", NETNS_BASE); + + SYS(fail, "ip -n %s5 -6 route add fd00::4 table 117 dev veth9 via fb00::109", NETNS_BASE); + SYS(fail, + "ip -n %s5 -6 route add fd00::3 encap seg6local action End.BPF endpoint obj %s sec inspect_t dev veth8", + NETNS_BASE, BPF_FILE); + + SYS(fail, "ip -n %s6 -6 addr add fb00::6/16 dev lo", NETNS_BASE); + SYS(fail, "ip -n %s6 -6 addr add fd00::4/16 dev lo", NETNS_BASE); + + for (ns = 1; ns < 6; ns++) + SYS(fail, "ip netns exec %s%d sysctl -wq net.ipv6.conf.all.forwarding=1", + NETNS_BASE, ns); + + SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.all.seg6_enabled=1", NETNS_BASE); + SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.lo.seg6_enabled=1", NETNS_BASE); + SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.veth10.seg6_enabled=1", NETNS_BASE); + + return 0; +fail: + return -1; +} + +#define SERVER_PORT 7330 +#define CLIENT_PORT 2121 +void test_lwt_seg6local(void) +{ + struct sockaddr_in6 server_addr = {}; + const char *ns1 = NETNS_BASE "1"; + const char *ns6 = NETNS_BASE "6"; + struct nstoken *nstoken = NULL; + const char *foobar = "foobar"; + ssize_t bytes; + int sfd, cfd; + char buf[7]; + + if (!ASSERT_OK(setup(), "setup")) + goto out; + + nstoken = open_netns(ns6); + if (!ASSERT_OK_PTR(nstoken, "open ns6")) + goto out; + + sfd = start_server_str(AF_INET6, SOCK_DGRAM, "fb00::6", SERVER_PORT, NULL); + if (!ASSERT_OK_FD(sfd, "start server")) + goto close_netns; + + close_netns(nstoken); + + nstoken = open_netns(ns1); + if (!ASSERT_OK_PTR(nstoken, "open ns1")) + goto close_server; + + cfd = start_server_str(AF_INET6, SOCK_DGRAM, "fb00::1", CLIENT_PORT, NULL); + if (!ASSERT_OK_FD(cfd, "start client")) + goto close_server; + + close_netns(nstoken); + nstoken = NULL; + + /* Send a packet larger than MTU */ + server_addr.sin6_family = AF_INET6; + server_addr.sin6_port = htons(SERVER_PORT); + if (!ASSERT_EQ(inet_pton(AF_INET6, "fb00::6", &server_addr.sin6_addr), 1, + "build target addr")) + goto close_client; + + bytes = sendto(cfd, foobar, sizeof(foobar), 0, + (struct sockaddr *)&server_addr, sizeof(server_addr)); + if (!ASSERT_EQ(bytes, sizeof(foobar), "send packet")) + goto close_client; + + /* Verify we received all expected bytes */ + bytes = read(sfd, buf, sizeof(buf)); + if (!ASSERT_EQ(bytes, sizeof(buf), "receive packet")) + goto close_client; + ASSERT_STREQ(buf, foobar, "check udp packet"); + +close_client: + close(cfd); +close_server: + close(sfd); +close_netns: + close_netns(nstoken); + +out: + cleanup(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/net_timestamping.c b/tools/testing/selftests/bpf/prog_tests/net_timestamping.c new file mode 100644 index 000000000000..dbfd87499b6b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/net_timestamping.c @@ -0,0 +1,239 @@ +#include <linux/net_tstamp.h> +#include <sys/time.h> +#include <linux/errqueue.h> +#include "test_progs.h" +#include "network_helpers.h" +#include "net_timestamping.skel.h" + +#define CG_NAME "/net-timestamping-test" +#define NSEC_PER_SEC 1000000000LL + +static const char addr4_str[] = "127.0.0.1"; +static const char addr6_str[] = "::1"; +static struct net_timestamping *skel; +static const int cfg_payload_len = 30; +static struct timespec usr_ts; +static u64 delay_tolerance_nsec = 10000000000; /* 10 seconds */ +int SK_TS_SCHED; +int SK_TS_TXSW; +int SK_TS_ACK; + +static int64_t timespec_to_ns64(struct timespec *ts) +{ + return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec; +} + +static void validate_key(int tskey, int tstype) +{ + static int expected_tskey = -1; + + if (tstype == SCM_TSTAMP_SCHED) + expected_tskey = cfg_payload_len - 1; + + ASSERT_EQ(expected_tskey, tskey, "tskey mismatch"); + + expected_tskey = tskey; +} + +static void validate_timestamp(struct timespec *cur, struct timespec *prev) +{ + int64_t cur_ns, prev_ns; + + cur_ns = timespec_to_ns64(cur); + prev_ns = timespec_to_ns64(prev); + + ASSERT_LT(cur_ns - prev_ns, delay_tolerance_nsec, "latency"); +} + +static void test_socket_timestamp(struct scm_timestamping *tss, int tstype, + int tskey) +{ + static struct timespec prev_ts; + + validate_key(tskey, tstype); + + switch (tstype) { + case SCM_TSTAMP_SCHED: + validate_timestamp(&tss->ts[0], &usr_ts); + SK_TS_SCHED += 1; + break; + case SCM_TSTAMP_SND: + validate_timestamp(&tss->ts[0], &prev_ts); + SK_TS_TXSW += 1; + break; + case SCM_TSTAMP_ACK: + validate_timestamp(&tss->ts[0], &prev_ts); + SK_TS_ACK += 1; + break; + } + + prev_ts = tss->ts[0]; +} + +static void test_recv_errmsg_cmsg(struct msghdr *msg) +{ + struct sock_extended_err *serr = NULL; + struct scm_timestamping *tss = NULL; + struct cmsghdr *cm; + + for (cm = CMSG_FIRSTHDR(msg); + cm && cm->cmsg_len; + cm = CMSG_NXTHDR(msg, cm)) { + if (cm->cmsg_level == SOL_SOCKET && + cm->cmsg_type == SCM_TIMESTAMPING) { + tss = (void *)CMSG_DATA(cm); + } else if ((cm->cmsg_level == SOL_IP && + cm->cmsg_type == IP_RECVERR) || + (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type == IPV6_RECVERR) || + (cm->cmsg_level == SOL_PACKET && + cm->cmsg_type == PACKET_TX_TIMESTAMP)) { + serr = (void *)CMSG_DATA(cm); + ASSERT_EQ(serr->ee_origin, SO_EE_ORIGIN_TIMESTAMPING, + "cmsg type"); + } + + if (serr && tss) + test_socket_timestamp(tss, serr->ee_info, + serr->ee_data); + } +} + +static bool socket_recv_errmsg(int fd) +{ + static char ctrl[1024 /* overprovision*/]; + char data[cfg_payload_len]; + static struct msghdr msg; + struct iovec entry; + int n = 0; + + memset(&msg, 0, sizeof(msg)); + memset(&entry, 0, sizeof(entry)); + memset(ctrl, 0, sizeof(ctrl)); + + entry.iov_base = data; + entry.iov_len = cfg_payload_len; + msg.msg_iov = &entry; + msg.msg_iovlen = 1; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_control = ctrl; + msg.msg_controllen = sizeof(ctrl); + + n = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (n == -1) + ASSERT_EQ(errno, EAGAIN, "recvmsg MSG_ERRQUEUE"); + + if (n >= 0) + test_recv_errmsg_cmsg(&msg); + + return n == -1; +} + +static void test_socket_timestamping(int fd) +{ + while (!socket_recv_errmsg(fd)); + + ASSERT_EQ(SK_TS_SCHED, 1, "SCM_TSTAMP_SCHED"); + ASSERT_EQ(SK_TS_TXSW, 1, "SCM_TSTAMP_SND"); + ASSERT_EQ(SK_TS_ACK, 1, "SCM_TSTAMP_ACK"); + + SK_TS_SCHED = 0; + SK_TS_TXSW = 0; + SK_TS_ACK = 0; +} + +static void test_tcp(int family, bool enable_socket_timestamping) +{ + struct net_timestamping__bss *bss; + char buf[cfg_payload_len]; + int sfd = -1, cfd = -1; + unsigned int sock_opt; + struct netns_obj *ns; + int cg_fd; + int ret; + + cg_fd = test__join_cgroup(CG_NAME); + if (!ASSERT_OK_FD(cg_fd, "join cgroup")) + return; + + ns = netns_new("net_timestamping_ns", true); + if (!ASSERT_OK_PTR(ns, "create ns")) + goto out; + + skel = net_timestamping__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open and load skel")) + goto out; + + if (!ASSERT_OK(net_timestamping__attach(skel), "attach skel")) + goto out; + + skel->links.skops_sockopt = + bpf_program__attach_cgroup(skel->progs.skops_sockopt, cg_fd); + if (!ASSERT_OK_PTR(skel->links.skops_sockopt, "attach cgroup")) + goto out; + + bss = skel->bss; + memset(bss, 0, sizeof(*bss)); + + skel->bss->monitored_pid = getpid(); + + sfd = start_server(family, SOCK_STREAM, + family == AF_INET6 ? addr6_str : addr4_str, 0, 0); + if (!ASSERT_OK_FD(sfd, "start_server")) + goto out; + + cfd = connect_to_fd(sfd, 0); + if (!ASSERT_OK_FD(cfd, "connect_to_fd_server")) + goto out; + + if (enable_socket_timestamping) { + sock_opt = SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID | + SOF_TIMESTAMPING_TX_SCHED | + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK; + ret = setsockopt(cfd, SOL_SOCKET, SO_TIMESTAMPING, + (char *) &sock_opt, sizeof(sock_opt)); + if (!ASSERT_OK(ret, "setsockopt SO_TIMESTAMPING")) + goto out; + + ret = clock_gettime(CLOCK_REALTIME, &usr_ts); + if (!ASSERT_OK(ret, "get user time")) + goto out; + } + + ret = write(cfd, buf, sizeof(buf)); + if (!ASSERT_EQ(ret, sizeof(buf), "send to server")) + goto out; + + if (enable_socket_timestamping) + test_socket_timestamping(cfd); + + ASSERT_EQ(bss->nr_active, 1, "nr_active"); + ASSERT_EQ(bss->nr_snd, 2, "nr_snd"); + ASSERT_EQ(bss->nr_sched, 1, "nr_sched"); + ASSERT_EQ(bss->nr_txsw, 1, "nr_txsw"); + ASSERT_EQ(bss->nr_ack, 1, "nr_ack"); + +out: + if (sfd >= 0) + close(sfd); + if (cfd >= 0) + close(cfd); + net_timestamping__destroy(skel); + netns_free(ns); + close(cg_fd); +} + +void test_net_timestamping(void) +{ + if (test__start_subtest("INET4: bpf timestamping")) + test_tcp(AF_INET, false); + if (test__start_subtest("INET4: bpf and socket timestamping")) + test_tcp(AF_INET, true); + if (test__start_subtest("INET6: bpf timestamping")) + test_tcp(AF_INET6, false); + if (test__start_subtest("INET6: bpf and socket timestamping")) + test_tcp(AF_INET6, true); +} diff --git a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c index ac3c3c097c0e..e00cd34586dd 100644 --- a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c @@ -33,20 +33,25 @@ void test_netns_cookie(void) skel->links.get_netns_cookie_sockops = bpf_program__attach_cgroup( skel->progs.get_netns_cookie_sockops, cgroup_fd); - if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach")) + if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach_sockops")) goto done; verdict = bpf_program__fd(skel->progs.get_netns_cookie_sk_msg); map = bpf_map__fd(skel->maps.sock_map); err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0); - if (!ASSERT_OK(err, "prog_attach")) + if (!ASSERT_OK(err, "prog_attach_sk_msg")) goto done; tc_fd = bpf_program__fd(skel->progs.get_netns_cookie_tcx); err = bpf_prog_attach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &opta); - if (!ASSERT_OK(err, "prog_attach")) + if (!ASSERT_OK(err, "prog_attach_tcx")) goto done; + skel->links.get_netns_cookie_cgroup_skb = bpf_program__attach_cgroup( + skel->progs.get_netns_cookie_cgroup_skb, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_cgroup_skb, "prog_attach_cgroup_skb")) + goto cleanup_tc; + server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno)) goto cleanup_tc; @@ -69,16 +74,18 @@ void test_netns_cookie(void) if (!ASSERT_OK(err, "getsockopt")) goto cleanup_tc; - ASSERT_EQ(val, cookie_expected_value, "cookie_value"); + ASSERT_EQ(val, cookie_expected_value, "cookie_value_sockops"); err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_msg_netns_cookies), &client_fd, &val); if (!ASSERT_OK(err, "map_lookup(sk_msg_netns_cookies)")) goto cleanup_tc; - ASSERT_EQ(val, cookie_expected_value, "cookie_value"); - ASSERT_EQ(skel->bss->tcx_init_netns_cookie, cookie_expected_value, "cookie_value"); - ASSERT_EQ(skel->bss->tcx_netns_cookie, cookie_expected_value, "cookie_value"); + ASSERT_EQ(val, cookie_expected_value, "cookie_value_sk_msg"); + ASSERT_EQ(skel->bss->tcx_init_netns_cookie, cookie_expected_value, "cookie_value_init_tcx"); + ASSERT_EQ(skel->bss->tcx_netns_cookie, cookie_expected_value, "cookie_value_tcx"); + ASSERT_EQ(skel->bss->cgroup_skb_init_netns_cookie, cookie_expected_value, "cookie_value_init_cgroup_skb"); + ASSERT_EQ(skel->bss->cgroup_skb_netns_cookie, cookie_expected_value, "cookie_value_cgroup_skb"); cleanup_tc: err = bpf_prog_detach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &optd); diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index 761ce24bce38..99c953f2be21 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -200,41 +200,28 @@ static void test_ns_current_pid_tgid_new_ns(int (*fn)(void *), void *arg) return; } -static void test_in_netns(int (*fn)(void *), void *arg) -{ - struct nstoken *nstoken = NULL; - - SYS(cleanup, "ip netns add ns_current_pid_tgid"); - SYS(cleanup, "ip -net ns_current_pid_tgid link set dev lo up"); - - nstoken = open_netns("ns_current_pid_tgid"); - if (!ASSERT_OK_PTR(nstoken, "open_netns")) - goto cleanup; - - test_ns_current_pid_tgid_new_ns(fn, arg); - -cleanup: - if (nstoken) - close_netns(nstoken); - SYS_NOFAIL("ip netns del ns_current_pid_tgid"); -} - /* TODO: use a different tracepoint */ -void serial_test_ns_current_pid_tgid(void) +void serial_test_current_pid_tgid(void) { if (test__start_subtest("root_ns_tp")) test_current_pid_tgid_tp(NULL); if (test__start_subtest("new_ns_tp")) test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_tp, NULL); - if (test__start_subtest("new_ns_cgrp")) { - int cgroup_fd = -1; - - cgroup_fd = test__join_cgroup("/sock_addr"); - if (ASSERT_GE(cgroup_fd, 0, "join_cgroup")) { - test_in_netns(test_current_pid_tgid_cgrp, &cgroup_fd); - close(cgroup_fd); - } +} + +void test_ns_current_pid_tgid_cgrp(void) +{ + int cgroup_fd = test__join_cgroup("/sock_addr"); + + if (ASSERT_OK_FD(cgroup_fd, "join_cgroup")) { + test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_cgrp, &cgroup_fd); + close(cgroup_fd); } - if (test__start_subtest("new_ns_sk_msg")) - test_in_netns(test_current_pid_tgid_sk_msg, NULL); } + +void test_ns_current_pid_tgid_sk_msg(void) +{ + test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_sk_msg, NULL); +} + + diff --git a/tools/testing/selftests/bpf/prog_tests/prepare.c b/tools/testing/selftests/bpf/prog_tests/prepare.c new file mode 100644 index 000000000000..fb5cdad97116 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/prepare.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta */ + +#include <test_progs.h> +#include <network_helpers.h> +#include "prepare.skel.h" + +static bool check_prepared(struct bpf_object *obj) +{ + bool is_prepared = true; + const struct bpf_map *map; + + bpf_object__for_each_map(map, obj) { + if (bpf_map__fd(map) < 0) + is_prepared = false; + } + + return is_prepared; +} + +static void test_prepare_no_load(void) +{ + struct prepare *skel; + int err; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + ); + + skel = prepare__open(); + if (!ASSERT_OK_PTR(skel, "prepare__open")) + return; + + if (!ASSERT_FALSE(check_prepared(skel->obj), "not check_prepared")) + goto cleanup; + + err = bpf_object__prepare(skel->obj); + + if (!ASSERT_TRUE(check_prepared(skel->obj), "check_prepared")) + goto cleanup; + + if (!ASSERT_OK(err, "bpf_object__prepare")) + goto cleanup; + +cleanup: + prepare__destroy(skel); +} + +static void test_prepare_load(void) +{ + struct prepare *skel; + int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + ); + + skel = prepare__open(); + if (!ASSERT_OK_PTR(skel, "prepare__open")) + return; + + if (!ASSERT_FALSE(check_prepared(skel->obj), "not check_prepared")) + goto cleanup; + + err = bpf_object__prepare(skel->obj); + if (!ASSERT_OK(err, "bpf_object__prepare")) + goto cleanup; + + err = prepare__load(skel); + if (!ASSERT_OK(err, "prepare__load")) + goto cleanup; + + if (!ASSERT_TRUE(check_prepared(skel->obj), "check_prepared")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.program); + if (!ASSERT_GE(prog_fd, 0, "prog_fd")) + goto cleanup; + + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + goto cleanup; + + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + goto cleanup; + + ASSERT_EQ(skel->bss->err, 0, "err"); + +cleanup: + prepare__destroy(skel); +} + +void test_prepare(void) +{ + if (test__start_subtest("prepare_load")) + test_prepare_load(); + if (test__start_subtest("prepare_no_load")) + test_prepare_no_load(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c b/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c index 509883e6823a..5d3c00a08a88 100644 --- a/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c +++ b/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c @@ -6,6 +6,7 @@ #include "epilogue_tailcall.skel.h" #include "pro_epilogue_goto_start.skel.h" #include "epilogue_exit.skel.h" +#include "pro_epilogue_with_kfunc.skel.h" struct st_ops_args { __u64 a; @@ -55,6 +56,7 @@ void test_pro_epilogue(void) RUN_TESTS(pro_epilogue); RUN_TESTS(pro_epilogue_goto_start); RUN_TESTS(epilogue_exit); + RUN_TESTS(pro_epilogue_with_kfunc); if (test__start_subtest("tailcall")) test_tailcall(); } diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c index ebe0c12b5536..c9f855e5da24 100644 --- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c @@ -81,6 +81,9 @@ static const char * const inproper_region_tests[] = { "nested_rcu_region", "rcu_read_lock_global_subprog_lock", "rcu_read_lock_global_subprog_unlock", + "rcu_read_lock_sleepable_helper_global_subprog", + "rcu_read_lock_sleepable_kfunc_global_subprog", + "rcu_read_lock_sleepable_global_subprog_indirect", }; static void test_inproper_region(void) diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c index c7b9ba8b1d06..a8d1eaa67020 100644 --- a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c +++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c @@ -24,6 +24,7 @@ struct read_ret_desc { { .name = "copy_from_user", .ret = -EFAULT }, { .name = "copy_from_user_task", .ret = -EFAULT }, { .name = "copy_from_user_str", .ret = -EFAULT }, + { .name = "copy_from_user_task_str", .ret = -EFAULT }, }; void test_read_vsyscall(void) diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c new file mode 100644 index 000000000000..115287ba441b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024-2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <network_helpers.h> +#include <sys/sysinfo.h> + +#include "res_spin_lock.skel.h" +#include "res_spin_lock_fail.skel.h" + +void test_res_spin_lock_failure(void) +{ + RUN_TESTS(res_spin_lock_fail); +} + +static volatile int skip; + +static void *spin_lock_thread(void *arg) +{ + int err, prog_fd = *(u32 *) arg; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 10000, + ); + + while (!READ_ONCE(skip)) { + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_OK(topts.retval, "test_run retval"); + } + pthread_exit(arg); +} + +void test_res_spin_lock_success(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct res_spin_lock *skel; + pthread_t thread_id[16]; + int prog_fd, i, err; + void *ret; + + if (get_nprocs() < 2) { + test__skip(); + return; + } + + skel = res_spin_lock__open_and_load(); + if (!ASSERT_OK_PTR(skel, "res_spin_lock__open_and_load")) + return; + /* AA deadlock */ + prog_fd = bpf_program__fd(skel->progs.res_spin_lock_test); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "error"); + ASSERT_OK(topts.retval, "retval"); + + prog_fd = bpf_program__fd(skel->progs.res_spin_lock_test_held_lock_max); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "error"); + ASSERT_OK(topts.retval, "retval"); + + /* Multi-threaded ABBA deadlock. */ + + prog_fd = bpf_program__fd(skel->progs.res_spin_lock_test_AB); + for (i = 0; i < 16; i++) { + int err; + + err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd); + if (!ASSERT_OK(err, "pthread_create")) + goto end; + } + + topts.retval = 0; + topts.repeat = 1000; + int fd = bpf_program__fd(skel->progs.res_spin_lock_test_BA); + while (!topts.retval && !err && !READ_ONCE(skel->bss->err)) { + err = bpf_prog_test_run_opts(fd, &topts); + } + + WRITE_ONCE(skip, true); + + for (i = 0; i < 16; i++) { + if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join")) + goto end; + if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd")) + goto end; + } + + ASSERT_EQ(READ_ONCE(skel->bss->err), -EDEADLK, "timeout err"); + ASSERT_OK(err, "err"); + ASSERT_EQ(topts.retval, -EDEADLK, "timeout"); +end: + res_spin_lock__destroy(skel); + return; +} diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c index e12255121c15..e4dac529d424 100644 --- a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c @@ -202,7 +202,7 @@ err_out: void test_setget_sockopt(void) { cg_fd = test__join_cgroup(CG_NAME); - if (cg_fd < 0) + if (!ASSERT_OK_FD(cg_fd, "join cgroup")) return; if (create_netns()) diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c index 2b0068742ef9..e3ea5dc2f697 100644 --- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c @@ -50,6 +50,9 @@ static struct { { "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" }, { "lock_global_subprog_call1", "global function calls are not allowed while holding a lock" }, { "lock_global_subprog_call2", "global function calls are not allowed while holding a lock" }, + { "lock_global_sleepable_helper_subprog", "global function calls are not allowed while holding a lock" }, + { "lock_global_sleepable_kfunc_subprog", "global function calls are not allowed while holding a lock" }, + { "lock_global_sleepable_subprog_indirect", "global function calls are not allowed while holding a lock" }, }; static int match_regex(const char *pattern, const char *string) diff --git a/tools/testing/selftests/bpf/prog_tests/summarization.c b/tools/testing/selftests/bpf/prog_tests/summarization.c new file mode 100644 index 000000000000..5dd6c120a838 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/summarization.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bpf/libbpf.h" +#include "summarization_freplace.skel.h" +#include "summarization.skel.h" +#include <test_progs.h> + +static void print_verifier_log(const char *log) +{ + if (env.verbosity >= VERBOSE_VERY) + fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log); +} + +static void test_aux(const char *main_prog_name, + const char *to_be_replaced, + const char *replacement, + bool expect_load, + const char *err_msg) +{ + struct summarization_freplace *freplace = NULL; + struct bpf_program *freplace_prog = NULL; + struct bpf_program *main_prog = NULL; + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct summarization *main = NULL; + char log[16*1024]; + int err; + + opts.kernel_log_buf = log; + opts.kernel_log_size = sizeof(log); + if (env.verbosity >= VERBOSE_SUPER) + opts.kernel_log_level = 1 | 2 | 4; + main = summarization__open_opts(&opts); + if (!ASSERT_OK_PTR(main, "summarization__open")) + goto out; + main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name); + if (!ASSERT_OK_PTR(main_prog, "main_prog")) + goto out; + bpf_program__set_autoload(main_prog, true); + err = summarization__load(main); + print_verifier_log(log); + if (!ASSERT_OK(err, "summarization__load")) + goto out; + freplace = summarization_freplace__open_opts(&opts); + if (!ASSERT_OK_PTR(freplace, "summarization_freplace__open")) + goto out; + freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement); + if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog")) + goto out; + bpf_program__set_autoload(freplace_prog, true); + bpf_program__set_autoattach(freplace_prog, true); + bpf_program__set_attach_target(freplace_prog, + bpf_program__fd(main_prog), + to_be_replaced); + err = summarization_freplace__load(freplace); + print_verifier_log(log); + + /* The might_sleep extension doesn't work yet as sleepable calls are not + * allowed, but preserve the check in case it's supported later and then + * this particular combination can be enabled. + */ + if (!strcmp("might_sleep", replacement) && err) { + ASSERT_HAS_SUBSTR(log, "helper call might sleep in a non-sleepable prog", "error log"); + ASSERT_EQ(err, -EINVAL, "err"); + test__skip(); + goto out; + } + + if (expect_load) { + ASSERT_OK(err, "summarization_freplace__load"); + } else { + ASSERT_ERR(err, "summarization_freplace__load"); + ASSERT_HAS_SUBSTR(log, err_msg, "error log"); + } + +out: + summarization_freplace__destroy(freplace); + summarization__destroy(main); +} + +/* There are two global subprograms in both summarization.skel.h: + * - one changes packet data; + * - another does not. + * It is ok to freplace subprograms that change packet data with those + * that either do or do not. It is only ok to freplace subprograms + * that do not change packet data with those that do not as well. + * The below tests check outcomes for each combination of such freplace. + * Also test a case when main subprogram itself is replaced and is a single + * subprogram in a program. + * + * This holds for might_sleep programs. It is ok to replace might_sleep with + * might_sleep and with does_not_sleep, but does_not_sleep cannot be replaced + * with might_sleep. + */ +void test_summarization_freplace(void) +{ + struct { + const char *main; + const char *to_be_replaced; + bool has_side_effect; + } mains[2][4] = { + { + { "main_changes_with_subprogs", "changes_pkt_data", true }, + { "main_changes_with_subprogs", "does_not_change_pkt_data", false }, + { "main_changes", "main_changes", true }, + { "main_does_not_change", "main_does_not_change", false }, + }, + { + { "main_might_sleep_with_subprogs", "might_sleep", true }, + { "main_might_sleep_with_subprogs", "does_not_sleep", false }, + { "main_might_sleep", "main_might_sleep", true }, + { "main_does_not_sleep", "main_does_not_sleep", false }, + }, + }; + const char *pkt_err = "Extension program changes packet data"; + const char *slp_err = "Extension program may sleep"; + struct { + const char *func; + bool has_side_effect; + const char *err_msg; + } replacements[2][2] = { + { + { "changes_pkt_data", true, pkt_err }, + { "does_not_change_pkt_data", false, pkt_err }, + }, + { + { "might_sleep", true, slp_err }, + { "does_not_sleep", false, slp_err }, + }, + }; + char buf[64]; + + for (int t = 0; t < 2; t++) { + for (int i = 0; i < ARRAY_SIZE(mains); ++i) { + for (int j = 0; j < ARRAY_SIZE(replacements); ++j) { + snprintf(buf, sizeof(buf), "%s_with_%s", + mains[t][i].to_be_replaced, replacements[t][j].func); + if (!test__start_subtest(buf)) + continue; + test_aux(mains[t][i].main, mains[t][i].to_be_replaced, replacements[t][j].func, + mains[t][i].has_side_effect || !replacements[t][j].has_side_effect, + replacements[t][j].err_msg); + } + } + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 544144620ca6..66a900327f91 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -1600,6 +1600,7 @@ static void test_tailcall_bpf2bpf_freplace(void) goto out; err = bpf_link__destroy(freplace_link); + freplace_link = NULL; if (!ASSERT_OK(err, "destroy link")) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c index 1af9ec1149aa..2186a24e7d8a 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_links.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c @@ -13,7 +13,7 @@ #include "netlink_helpers.h" #include "tc_helpers.h" -void serial_test_tc_links_basic(void) +void test_ns_tc_links_basic(void) { LIBBPF_OPTS(bpf_prog_query_opts, optq); LIBBPF_OPTS(bpf_tcx_opts, optl); @@ -260,7 +260,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_links_before(void) +void test_ns_tc_links_before(void) { test_tc_links_before_target(BPF_TCX_INGRESS); test_tc_links_before_target(BPF_TCX_EGRESS); @@ -414,7 +414,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_links_after(void) +void test_ns_tc_links_after(void) { test_tc_links_after_target(BPF_TCX_INGRESS); test_tc_links_after_target(BPF_TCX_EGRESS); @@ -514,7 +514,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_links_revision(void) +void test_ns_tc_links_revision(void) { test_tc_links_revision_target(BPF_TCX_INGRESS); test_tc_links_revision_target(BPF_TCX_EGRESS); @@ -618,7 +618,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_links_chain_classic(void) +void test_ns_tc_links_chain_classic(void) { test_tc_chain_classic(BPF_TCX_INGRESS, false); test_tc_chain_classic(BPF_TCX_EGRESS, false); @@ -846,7 +846,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_links_replace(void) +void test_ns_tc_links_replace(void) { test_tc_links_replace_target(BPF_TCX_INGRESS); test_tc_links_replace_target(BPF_TCX_EGRESS); @@ -1158,7 +1158,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_links_invalid(void) +void test_ns_tc_links_invalid(void) { test_tc_links_invalid_target(BPF_TCX_INGRESS); test_tc_links_invalid_target(BPF_TCX_EGRESS); @@ -1314,7 +1314,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_links_prepend(void) +void test_ns_tc_links_prepend(void) { test_tc_links_prepend_target(BPF_TCX_INGRESS); test_tc_links_prepend_target(BPF_TCX_EGRESS); @@ -1470,7 +1470,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_links_append(void) +void test_ns_tc_links_append(void) { test_tc_links_append_target(BPF_TCX_INGRESS); test_tc_links_append_target(BPF_TCX_EGRESS); @@ -1568,7 +1568,7 @@ cleanup: ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); } -void serial_test_tc_links_dev_cleanup(void) +void test_ns_tc_links_dev_cleanup(void) { test_tc_links_dev_cleanup_target(BPF_TCX_INGRESS); test_tc_links_dev_cleanup_target(BPF_TCX_EGRESS); @@ -1672,7 +1672,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_links_chain_mixed(void) +void test_ns_tc_links_chain_mixed(void) { test_tc_chain_mixed(BPF_TCX_INGRESS); test_tc_chain_mixed(BPF_TCX_EGRESS); @@ -1782,7 +1782,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_links_ingress(void) +void test_ns_tc_links_ingress(void) { test_tc_links_ingress(BPF_TCX_INGRESS, true, true); test_tc_links_ingress(BPF_TCX_INGRESS, true, false); @@ -1823,7 +1823,7 @@ static int qdisc_replace(int ifindex, const char *kind, bool block) return err; } -void serial_test_tc_links_dev_chain0(void) +void test_ns_tc_links_dev_chain0(void) { int err, ifindex; @@ -1955,7 +1955,7 @@ cleanup: ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); } -void serial_test_tc_links_dev_mixed(void) +void test_ns_tc_links_dev_mixed(void) { test_tc_links_dev_mixed(BPF_TCX_INGRESS); test_tc_links_dev_mixed(BPF_TCX_EGRESS); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c index f77f604389aa..dd7a138d8c3d 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c @@ -10,7 +10,7 @@ #include "test_tc_link.skel.h" #include "tc_helpers.h" -void serial_test_tc_opts_basic(void) +void test_ns_tc_opts_basic(void) { LIBBPF_OPTS(bpf_prog_attach_opts, opta); LIBBPF_OPTS(bpf_prog_detach_opts, optd); @@ -254,7 +254,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_before(void) +void test_ns_tc_opts_before(void) { test_tc_opts_before_target(BPF_TCX_INGRESS); test_tc_opts_before_target(BPF_TCX_EGRESS); @@ -445,7 +445,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_after(void) +void test_ns_tc_opts_after(void) { test_tc_opts_after_target(BPF_TCX_INGRESS); test_tc_opts_after_target(BPF_TCX_EGRESS); @@ -554,7 +554,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_revision(void) +void test_ns_tc_opts_revision(void) { test_tc_opts_revision_target(BPF_TCX_INGRESS); test_tc_opts_revision_target(BPF_TCX_EGRESS); @@ -655,7 +655,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_opts_chain_classic(void) +void test_ns_tc_opts_chain_classic(void) { test_tc_chain_classic(BPF_TCX_INGRESS, false); test_tc_chain_classic(BPF_TCX_EGRESS, false); @@ -864,7 +864,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_replace(void) +void test_ns_tc_opts_replace(void) { test_tc_opts_replace_target(BPF_TCX_INGRESS); test_tc_opts_replace_target(BPF_TCX_EGRESS); @@ -1017,7 +1017,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_invalid(void) +void test_ns_tc_opts_invalid(void) { test_tc_opts_invalid_target(BPF_TCX_INGRESS); test_tc_opts_invalid_target(BPF_TCX_EGRESS); @@ -1157,7 +1157,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_prepend(void) +void test_ns_tc_opts_prepend(void) { test_tc_opts_prepend_target(BPF_TCX_INGRESS); test_tc_opts_prepend_target(BPF_TCX_EGRESS); @@ -1297,7 +1297,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_append(void) +void test_ns_tc_opts_append(void) { test_tc_opts_append_target(BPF_TCX_INGRESS); test_tc_opts_append_target(BPF_TCX_EGRESS); @@ -1387,7 +1387,7 @@ cleanup: ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); } -void serial_test_tc_opts_dev_cleanup(void) +void test_ns_tc_opts_dev_cleanup(void) { test_tc_opts_dev_cleanup_target(BPF_TCX_INGRESS); test_tc_opts_dev_cleanup_target(BPF_TCX_EGRESS); @@ -1563,7 +1563,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_opts_mixed(void) +void test_ns_tc_opts_mixed(void) { test_tc_opts_mixed_target(BPF_TCX_INGRESS); test_tc_opts_mixed_target(BPF_TCX_EGRESS); @@ -1642,7 +1642,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_opts_demixed(void) +void test_ns_tc_opts_demixed(void) { test_tc_opts_demixed_target(BPF_TCX_INGRESS); test_tc_opts_demixed_target(BPF_TCX_EGRESS); @@ -1813,7 +1813,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_detach(void) +void test_ns_tc_opts_detach(void) { test_tc_opts_detach_target(BPF_TCX_INGRESS); test_tc_opts_detach_target(BPF_TCX_EGRESS); @@ -2020,7 +2020,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_detach_before(void) +void test_ns_tc_opts_detach_before(void) { test_tc_opts_detach_before_target(BPF_TCX_INGRESS); test_tc_opts_detach_before_target(BPF_TCX_EGRESS); @@ -2236,7 +2236,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_detach_after(void) +void test_ns_tc_opts_detach_after(void) { test_tc_opts_detach_after_target(BPF_TCX_INGRESS); test_tc_opts_detach_after_target(BPF_TCX_EGRESS); @@ -2265,7 +2265,7 @@ static void test_tc_opts_delete_empty(int target, bool chain_tc_old) assert_mprog_count(target, 0); } -void serial_test_tc_opts_delete_empty(void) +void test_ns_tc_opts_delete_empty(void) { test_tc_opts_delete_empty(BPF_TCX_INGRESS, false); test_tc_opts_delete_empty(BPF_TCX_EGRESS, false); @@ -2372,7 +2372,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_chain_mixed(void) +void test_ns_tc_opts_chain_mixed(void) { test_tc_chain_mixed(BPF_TCX_INGRESS); test_tc_chain_mixed(BPF_TCX_EGRESS); @@ -2446,7 +2446,7 @@ cleanup: ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); } -void serial_test_tc_opts_max(void) +void test_ns_tc_opts_max(void) { test_tc_opts_max_target(BPF_TCX_INGRESS, 0, false); test_tc_opts_max_target(BPF_TCX_EGRESS, 0, false); @@ -2748,7 +2748,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_query(void) +void test_ns_tc_opts_query(void) { test_tc_opts_query_target(BPF_TCX_INGRESS); test_tc_opts_query_target(BPF_TCX_EGRESS); @@ -2807,7 +2807,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_query_attach(void) +void test_ns_tc_opts_query_attach(void) { test_tc_opts_query_attach_target(BPF_TCX_INGRESS); test_tc_opts_query_attach_target(BPF_TCX_EGRESS); diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c new file mode 100644 index 000000000000..467cc72a3588 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c @@ -0,0 +1,16 @@ +#include <test_progs.h> + +#include "struct_ops_kptr_return.skel.h" +#include "struct_ops_kptr_return_fail__wrong_type.skel.h" +#include "struct_ops_kptr_return_fail__invalid_scalar.skel.h" +#include "struct_ops_kptr_return_fail__nonzero_offset.skel.h" +#include "struct_ops_kptr_return_fail__local_kptr.skel.h" + +void test_struct_ops_kptr_return(void) +{ + RUN_TESTS(struct_ops_kptr_return); + RUN_TESTS(struct_ops_kptr_return_fail__wrong_type); + RUN_TESTS(struct_ops_kptr_return_fail__invalid_scalar); + RUN_TESTS(struct_ops_kptr_return_fail__nonzero_offset); + RUN_TESTS(struct_ops_kptr_return_fail__local_kptr); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c new file mode 100644 index 000000000000..da60c715fc59 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c @@ -0,0 +1,14 @@ +#include <test_progs.h> + +#include "struct_ops_refcounted.skel.h" +#include "struct_ops_refcounted_fail__ref_leak.skel.h" +#include "struct_ops_refcounted_fail__global_subprog.skel.h" +#include "struct_ops_refcounted_fail__tail_call.skel.h" + +void test_struct_ops_refcounted(void) +{ + RUN_TESTS(struct_ops_refcounted); + RUN_TESTS(struct_ops_refcounted_fail__ref_leak); + RUN_TESTS(struct_ops_refcounted_fail__global_subprog); + RUN_TESTS(struct_ops_refcounted_fail__tail_call); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index cec746e77cd3..bae0e9de277d 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -71,6 +71,8 @@ #define IP4_ADDR2_VETH1 "172.16.1.20" #define IP4_ADDR_TUNL_DEV0 "10.1.1.100" #define IP4_ADDR_TUNL_DEV1 "10.1.1.200" +#define IP6_ADDR_TUNL_DEV0 "fc80::100" +#define IP6_ADDR_TUNL_DEV1 "fc80::200" #define IP6_ADDR_VETH0 "::11" #define IP6_ADDR1_VETH1 "::22" @@ -98,6 +100,27 @@ #define XFRM_SPI_IN_TO_OUT 0x1 #define XFRM_SPI_OUT_TO_IN 0x2 +#define GRE_TUNL_DEV0 "gre00" +#define GRE_TUNL_DEV1 "gre11" + +#define IP6GRE_TUNL_DEV0 "ip6gre00" +#define IP6GRE_TUNL_DEV1 "ip6gre11" + +#define ERSPAN_TUNL_DEV0 "erspan00" +#define ERSPAN_TUNL_DEV1 "erspan11" + +#define IP6ERSPAN_TUNL_DEV0 "ip6erspan00" +#define IP6ERSPAN_TUNL_DEV1 "ip6erspan11" + +#define GENEVE_TUNL_DEV0 "geneve00" +#define GENEVE_TUNL_DEV1 "geneve11" + +#define IP6GENEVE_TUNL_DEV0 "ip6geneve00" +#define IP6GENEVE_TUNL_DEV1 "ip6geneve11" + +#define IP6TNL_TUNL_DEV0 "ip6tnl00" +#define IP6TNL_TUNL_DEV1 "ip6tnl11" + #define PING_ARGS "-i 0.01 -c 3 -w 10 -q" static int config_device(void) @@ -216,6 +239,18 @@ fail: return -1; } +static int set_ipv4_addr(const char *dev0, const char *dev1) +{ + SYS(fail, "ip -n at_ns0 link set dev %s up", dev0); + SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", dev0, IP4_ADDR_TUNL_DEV0); + SYS(fail, "ip link set dev %s up", dev1); + SYS(fail, "ip addr add dev %s %s/24", dev1, IP4_ADDR_TUNL_DEV1); + + return 0; +fail: + return 1; +} + static int add_ipip_tunnel(enum ipip_encap encap) { int err; @@ -356,6 +391,99 @@ static void delete_xfrm_tunnel(void) IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN); } +static int add_ipv4_tunnel(const char *dev0, const char *dev1, + const char *type, const char *opt) +{ + if (!type || !opt || !dev0 || !dev1) + return -1; + + SYS(fail, "ip -n at_ns0 link add dev %s type %s %s local %s remote %s", + dev0, type, opt, IP4_ADDR_VETH0, IP4_ADDR1_VETH1); + + SYS(fail, "ip link add dev %s type %s external", dev1, type); + + return set_ipv4_addr(dev0, dev1); +fail: + return -1; +} + +static void delete_tunnel(const char *dev0, const char *dev1) +{ + if (!dev0 || !dev1) + return; + + SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s", dev0); + SYS_NOFAIL("ip link delete dev %s", dev1); +} + +static int set_ipv6_addr(const char *dev0, const char *dev1) +{ + /* disable IPv6 DAD because it might take too long and fail tests */ + SYS(fail, "ip -n at_ns0 addr add %s/96 dev veth0 nodad", IP6_ADDR_VETH0); + SYS(fail, "ip -n at_ns0 link set dev veth0 up"); + SYS(fail, "ip addr add %s/96 dev veth1 nodad", IP6_ADDR1_VETH1); + SYS(fail, "ip link set dev veth1 up"); + + SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", dev0, IP4_ADDR_TUNL_DEV0); + SYS(fail, "ip -n at_ns0 addr add dev %s %s/96 nodad", dev0, IP6_ADDR_TUNL_DEV0); + SYS(fail, "ip -n at_ns0 link set dev %s up", dev0); + + SYS(fail, "ip addr add dev %s %s/24", dev1, IP4_ADDR_TUNL_DEV1); + SYS(fail, "ip addr add dev %s %s/96 nodad", dev1, IP6_ADDR_TUNL_DEV1); + SYS(fail, "ip link set dev %s up", dev1); + return 0; +fail: + return 1; +} + +static int add_ipv6_tunnel(const char *dev0, const char *dev1, + const char *type, const char *opt) +{ + if (!type || !opt || !dev0 || !dev1) + return -1; + + SYS(fail, "ip -n at_ns0 link add dev %s type %s %s local %s remote %s", + dev0, type, opt, IP6_ADDR_VETH0, IP6_ADDR1_VETH1); + + SYS(fail, "ip link add dev %s type %s external", dev1, type); + + return set_ipv6_addr(dev0, dev1); +fail: + return -1; +} + +static int add_geneve_tunnel(const char *dev0, const char *dev1, + const char *type, const char *opt) +{ + if (!type || !opt || !dev0 || !dev1) + return -1; + + SYS(fail, "ip -n at_ns0 link add dev %s type %s id 2 %s remote %s", + dev0, type, opt, IP4_ADDR1_VETH1); + + SYS(fail, "ip link add dev %s type %s %s external", dev1, type, opt); + + return set_ipv4_addr(dev0, dev1); +fail: + return -1; +} + +static int add_ip6geneve_tunnel(const char *dev0, const char *dev1, + const char *type, const char *opt) +{ + if (!type || !opt || !dev0 || !dev1) + return -1; + + SYS(fail, "ip -n at_ns0 link add dev %s type %s id 22 %s remote %s", + dev0, type, opt, IP6_ADDR1_VETH1); + + SYS(fail, "ip link add dev %s type %s %s external", dev1, type, opt); + + return set_ipv6_addr(dev0, dev1); +fail: + return -1; +} + static int test_ping(int family, const char *addr) { SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr); @@ -364,32 +492,76 @@ fail: return -1; } -static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd) +static void ping_dev0(void) { + /* ping from root namespace test */ + test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); +} + +static void ping_dev1(void) +{ + struct nstoken *nstoken; + + /* ping from at_ns0 namespace test */ + nstoken = open_netns("at_ns0"); + if (!ASSERT_OK_PTR(nstoken, "setns")) + return; + + test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); + close_netns(nstoken); +} + +static void ping6_veth0(void) +{ + test_ping(AF_INET6, IP6_ADDR_VETH0); +} + +static void ping6_dev0(void) +{ + test_ping(AF_INET6, IP6_ADDR_TUNL_DEV0); +} + +static void ping6_dev1(void) +{ + struct nstoken *nstoken; + + /* ping from at_ns0 namespace test */ + nstoken = open_netns("at_ns0"); + if (!ASSERT_OK_PTR(nstoken, "setns")) + return; + + test_ping(AF_INET, IP6_ADDR_TUNL_DEV1); + close_netns(nstoken); +} + +static int attach_tc_prog(int ifindex, int igr_fd, int egr_fd) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex, + .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS); DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, .priority = 1, .prog_fd = igr_fd); DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1, .priority = 1, .prog_fd = egr_fd); int ret; - ret = bpf_tc_hook_create(hook); + ret = bpf_tc_hook_create(&hook); if (!ASSERT_OK(ret, "create tc hook")) return ret; if (igr_fd >= 0) { - hook->attach_point = BPF_TC_INGRESS; - ret = bpf_tc_attach(hook, &opts1); + hook.attach_point = BPF_TC_INGRESS; + ret = bpf_tc_attach(&hook, &opts1); if (!ASSERT_OK(ret, "bpf_tc_attach")) { - bpf_tc_hook_destroy(hook); + bpf_tc_hook_destroy(&hook); return ret; } } if (egr_fd >= 0) { - hook->attach_point = BPF_TC_EGRESS; - ret = bpf_tc_attach(hook, &opts2); + hook.attach_point = BPF_TC_EGRESS; + ret = bpf_tc_attach(&hook, &opts2); if (!ASSERT_OK(ret, "bpf_tc_attach")) { - bpf_tc_hook_destroy(hook); + bpf_tc_hook_destroy(&hook); return ret; } } @@ -397,6 +569,50 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd) return 0; } +static int generic_attach(const char *dev, int igr_fd, int egr_fd) +{ + int ifindex; + + if (!ASSERT_OK_FD(igr_fd, "check ingress fd")) + return -1; + if (!ASSERT_OK_FD(egr_fd, "check egress fd")) + return -1; + + ifindex = if_nametoindex(dev); + if (!ASSERT_NEQ(ifindex, 0, "get ifindex")) + return -1; + + return attach_tc_prog(ifindex, igr_fd, egr_fd); +} + +static int generic_attach_igr(const char *dev, int igr_fd) +{ + int ifindex; + + if (!ASSERT_OK_FD(igr_fd, "check ingress fd")) + return -1; + + ifindex = if_nametoindex(dev); + if (!ASSERT_NEQ(ifindex, 0, "get ifindex")) + return -1; + + return attach_tc_prog(ifindex, igr_fd, -1); +} + +static int generic_attach_egr(const char *dev, int egr_fd) +{ + int ifindex; + + if (!ASSERT_OK_FD(egr_fd, "check egress fd")) + return -1; + + ifindex = if_nametoindex(dev); + if (!ASSERT_NEQ(ifindex, 0, "get ifindex")) + return -1; + + return attach_tc_prog(ifindex, -1, egr_fd); +} + static void test_vxlan_tunnel(void) { struct test_tunnel_kern *skel = NULL; @@ -404,11 +620,9 @@ static void test_vxlan_tunnel(void) int local_ip_map_fd = -1; int set_src_prog_fd, get_src_prog_fd; int set_dst_prog_fd; - int key = 0, ifindex = -1; + int key = 0; uint local_ip; int err; - DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, - .attach_point = BPF_TC_INGRESS); /* add vxlan tunnel */ err = add_vxlan_tunnel(); @@ -419,42 +633,22 @@ static void test_vxlan_tunnel(void) skel = test_tunnel_kern__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) goto done; - ifindex = if_nametoindex(VXLAN_TUNL_DEV1); - if (!ASSERT_NEQ(ifindex, 0, "vxlan11 ifindex")) - goto done; - tc_hook.ifindex = ifindex; get_src_prog_fd = bpf_program__fd(skel->progs.vxlan_get_tunnel_src); set_src_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_src); - if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd")) - goto done; - if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) + if (generic_attach(VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd)) goto done; /* load and attach bpf prog to veth dev tc hook point */ - ifindex = if_nametoindex("veth1"); - if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) - goto done; - tc_hook.ifindex = ifindex; set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst); - if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, set_dst_prog_fd, -1)) + if (generic_attach_igr("veth1", set_dst_prog_fd)) goto done; /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ nstoken = open_netns("at_ns0"); if (!ASSERT_OK_PTR(nstoken, "setns src")) goto done; - ifindex = if_nametoindex(VXLAN_TUNL_DEV0); - if (!ASSERT_NEQ(ifindex, 0, "vxlan00 ifindex")) - goto done; - tc_hook.ifindex = ifindex; set_dst_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_dst); - if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd)) + if (generic_attach_egr(VXLAN_TUNL_DEV0, set_dst_prog_fd)) goto done; close_netns(nstoken); @@ -468,9 +662,7 @@ static void test_vxlan_tunnel(void) goto done; /* ping test */ - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); - if (!ASSERT_OK(err, "test_ping")) - goto done; + ping_dev0(); done: /* delete vxlan tunnel */ @@ -488,11 +680,9 @@ static void test_ip6vxlan_tunnel(void) int local_ip_map_fd = -1; int set_src_prog_fd, get_src_prog_fd; int set_dst_prog_fd; - int key = 0, ifindex = -1; + int key = 0; uint local_ip; int err; - DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, - .attach_point = BPF_TC_INGRESS); /* add vxlan tunnel */ err = add_ip6vxlan_tunnel(); @@ -503,31 +693,17 @@ static void test_ip6vxlan_tunnel(void) skel = test_tunnel_kern__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) goto done; - ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV1); - if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan11 ifindex")) - goto done; - tc_hook.ifindex = ifindex; get_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_get_tunnel_src); set_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_src); - if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd")) - goto done; - if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) + if (generic_attach(IP6VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd)) goto done; /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ nstoken = open_netns("at_ns0"); if (!ASSERT_OK_PTR(nstoken, "setns src")) goto done; - ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV0); - if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan00 ifindex")) - goto done; - tc_hook.ifindex = ifindex; set_dst_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_dst); - if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd)) + if (generic_attach_egr(IP6VXLAN_TUNL_DEV0, set_dst_prog_fd)) goto done; close_netns(nstoken); @@ -541,9 +717,7 @@ static void test_ip6vxlan_tunnel(void) goto done; /* ping test */ - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); - if (!ASSERT_OK(err, "test_ping")) - goto done; + ping_dev0(); done: /* delete ipv6 vxlan tunnel */ @@ -557,12 +731,8 @@ done: static void test_ipip_tunnel(enum ipip_encap encap) { struct test_tunnel_kern *skel = NULL; - struct nstoken *nstoken; int set_src_prog_fd, get_src_prog_fd; - int ifindex = -1; int err; - DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, - .attach_point = BPF_TC_INGRESS); /* add ipip tunnel */ err = add_ipip_tunnel(encap); @@ -573,10 +743,6 @@ static void test_ipip_tunnel(enum ipip_encap encap) skel = test_tunnel_kern__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) goto done; - ifindex = if_nametoindex(IPIP_TUNL_DEV1); - if (!ASSERT_NEQ(ifindex, 0, "ipip11 ifindex")) - goto done; - tc_hook.ifindex = ifindex; switch (encap) { case FOU: @@ -598,26 +764,11 @@ static void test_ipip_tunnel(enum ipip_encap encap) skel->progs.ipip_set_tunnel); } - if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd")) - goto done; - if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) + if (generic_attach(IPIP_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd)) goto done; - /* ping from root namespace test */ - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); - if (!ASSERT_OK(err, "test_ping")) - goto done; - - /* ping from at_ns0 namespace test */ - nstoken = open_netns("at_ns0"); - if (!ASSERT_OK_PTR(nstoken, "setns")) - goto done; - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); - if (!ASSERT_OK(err, "test_ping")) - goto done; - close_netns(nstoken); + ping_dev0(); + ping_dev1(); done: /* delete ipip tunnel */ @@ -628,11 +779,8 @@ done: static void test_xfrm_tunnel(void) { - DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, - .attach_point = BPF_TC_INGRESS); LIBBPF_OPTS(bpf_xdp_attach_opts, opts); struct test_tunnel_kern *skel = NULL; - struct nstoken *nstoken; int xdp_prog_fd; int tc_prog_fd; int ifindex; @@ -646,19 +794,16 @@ static void test_xfrm_tunnel(void) if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) goto done; - ifindex = if_nametoindex("veth1"); - if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) - goto done; /* attach tc prog to tunnel dev */ - tc_hook.ifindex = ifindex; tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state); - if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, tc_prog_fd, -1)) + if (generic_attach_igr("veth1", tc_prog_fd)) goto done; /* attach xdp prog to tunnel dev */ + ifindex = if_nametoindex("veth1"); + if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) + goto done; xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp); if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd")) goto done; @@ -666,14 +811,7 @@ static void test_xfrm_tunnel(void) if (!ASSERT_OK(err, "bpf_xdp_attach")) goto done; - /* ping from at_ns0 namespace test */ - nstoken = open_netns("at_ns0"); - if (!ASSERT_OK_PTR(nstoken, "setns")) - goto done; - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); - close_netns(nstoken); - if (!ASSERT_OK(err, "test_ping")) - goto done; + ping_dev1(); if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id")) goto done; @@ -690,6 +828,281 @@ done: test_tunnel_kern__destroy(skel); } +enum gre_test { + GRE, + GRE_NOKEY, + GRETAP, + GRETAP_NOKEY, +}; + +static void test_gre_tunnel(enum gre_test test) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + switch (test) { + case GRE: + err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gre", "seq"); + set_fd = bpf_program__fd(skel->progs.gre_set_tunnel_no_key); + get_fd = bpf_program__fd(skel->progs.gre_get_tunnel); + break; + case GRE_NOKEY: + err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gre", "seq key 2"); + set_fd = bpf_program__fd(skel->progs.gre_set_tunnel); + get_fd = bpf_program__fd(skel->progs.gre_get_tunnel); + break; + case GRETAP: + err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gretap", "seq"); + set_fd = bpf_program__fd(skel->progs.gre_set_tunnel_no_key); + get_fd = bpf_program__fd(skel->progs.gre_get_tunnel); + break; + case GRETAP_NOKEY: + err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gretap", "seq key 2"); + set_fd = bpf_program__fd(skel->progs.gre_set_tunnel); + get_fd = bpf_program__fd(skel->progs.gre_get_tunnel); + break; + } + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + if (generic_attach(GRE_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping_dev0(); + ping_dev1(); + +done: + delete_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + +enum ip6gre_test { + IP6GRE, + IP6GRETAP +}; + +static void test_ip6gre_tunnel(enum ip6gre_test test) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + switch (test) { + case IP6GRE: + err = add_ipv6_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1, + "ip6gre", "flowlabel 0xbcdef key 2"); + break; + case IP6GRETAP: + err = add_ipv6_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1, + "ip6gretap", "flowlabel 0xbcdef key 2"); + break; + } + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + set_fd = bpf_program__fd(skel->progs.ip6gretap_set_tunnel); + get_fd = bpf_program__fd(skel->progs.ip6gretap_get_tunnel); + if (generic_attach(IP6GRE_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping6_veth0(); + ping6_dev1(); + ping_dev0(); + ping_dev1(); +done: + delete_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + +enum erspan_test { + V1, + V2 +}; + +static void test_erspan_tunnel(enum erspan_test test) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + switch (test) { + case V1: + err = add_ipv4_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1, + "erspan", "seq key 2 erspan_ver 1 erspan 123"); + break; + case V2: + err = add_ipv4_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1, + "erspan", + "seq key 2 erspan_ver 2 erspan_dir egress erspan_hwid 3"); + break; + } + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + set_fd = bpf_program__fd(skel->progs.erspan_set_tunnel); + get_fd = bpf_program__fd(skel->progs.erspan_get_tunnel); + if (generic_attach(ERSPAN_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping_dev0(); + ping_dev1(); +done: + delete_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + +static void test_ip6erspan_tunnel(enum erspan_test test) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + switch (test) { + case V1: + err = add_ipv6_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1, + "ip6erspan", "seq key 2 erspan_ver 1 erspan 123"); + break; + case V2: + err = add_ipv6_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1, + "ip6erspan", + "seq key 2 erspan_ver 2 erspan_dir egress erspan_hwid 7"); + break; + } + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + set_fd = bpf_program__fd(skel->progs.ip4ip6erspan_set_tunnel); + get_fd = bpf_program__fd(skel->progs.ip4ip6erspan_get_tunnel); + if (generic_attach(IP6ERSPAN_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping6_veth0(); + ping_dev1(); +done: + delete_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + +static void test_geneve_tunnel(void) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + err = add_geneve_tunnel(GENEVE_TUNL_DEV0, GENEVE_TUNL_DEV1, + "geneve", "dstport 6081"); + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + set_fd = bpf_program__fd(skel->progs.geneve_set_tunnel); + get_fd = bpf_program__fd(skel->progs.geneve_get_tunnel); + if (generic_attach(GENEVE_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping_dev0(); + ping_dev1(); +done: + delete_tunnel(GENEVE_TUNL_DEV0, GENEVE_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + +static void test_ip6geneve_tunnel(void) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + err = add_ip6geneve_tunnel(IP6GENEVE_TUNL_DEV0, IP6GENEVE_TUNL_DEV1, + "geneve", ""); + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + set_fd = bpf_program__fd(skel->progs.ip6geneve_set_tunnel); + get_fd = bpf_program__fd(skel->progs.ip6geneve_get_tunnel); + if (generic_attach(IP6GENEVE_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping_dev0(); + ping_dev1(); +done: + delete_tunnel(IP6GENEVE_TUNL_DEV0, IP6GENEVE_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + +enum ip6tnl_test { + IPIP6, + IP6IP6 +}; + +static void test_ip6tnl_tunnel(enum ip6tnl_test test) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + err = add_ipv6_tunnel(IP6TNL_TUNL_DEV0, IP6TNL_TUNL_DEV1, "ip6tnl", ""); + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + switch (test) { + case IPIP6: + set_fd = bpf_program__fd(skel->progs.ipip6_set_tunnel); + get_fd = bpf_program__fd(skel->progs.ipip6_get_tunnel); + break; + case IP6IP6: + set_fd = bpf_program__fd(skel->progs.ip6ip6_set_tunnel); + get_fd = bpf_program__fd(skel->progs.ip6ip6_get_tunnel); + break; + } + if (generic_attach(IP6TNL_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping6_veth0(); + switch (test) { + case IPIP6: + ping_dev0(); + ping_dev1(); + break; + case IP6IP6: + ping6_dev0(); + ping6_dev1(); + break; + } + +done: + delete_tunnel(IP6TNL_TUNL_DEV0, IP6TNL_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + #define RUN_TEST(name, ...) \ ({ \ if (test__start_subtest(#name)) { \ @@ -707,6 +1120,20 @@ static void *test_tunnel_run_tests(void *arg) RUN_TEST(ipip_tunnel, FOU); RUN_TEST(ipip_tunnel, GUE); RUN_TEST(xfrm_tunnel); + RUN_TEST(gre_tunnel, GRE); + RUN_TEST(gre_tunnel, GRE_NOKEY); + RUN_TEST(gre_tunnel, GRETAP); + RUN_TEST(gre_tunnel, GRETAP_NOKEY); + RUN_TEST(ip6gre_tunnel, IP6GRE); + RUN_TEST(ip6gre_tunnel, IP6GRETAP); + RUN_TEST(erspan_tunnel, V1); + RUN_TEST(erspan_tunnel, V2); + RUN_TEST(ip6erspan_tunnel, V1); + RUN_TEST(ip6erspan_tunnel, V2); + RUN_TEST(geneve_tunnel); + RUN_TEST(ip6geneve_tunnel); + RUN_TEST(ip6tnl_tunnel, IPIP6); + RUN_TEST(ip6tnl_tunnel, IP6IP6); return NULL; } diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c new file mode 100644 index 000000000000..a95b42bf744a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <string.h> +#include <stdio.h> + +#define __CHECK_STR(str, name) \ + do { \ + if (!ASSERT_HAS_SUBSTR(fix->output, (str), (name))) \ + goto out; \ + } while (0) + +struct fixture { + char tmpfile[80]; + int fd; + char *output; + size_t sz; + char veristat[80]; +}; + +static struct fixture *init_fixture(void) +{ + struct fixture *fix = malloc(sizeof(struct fixture)); + + /* for no_alu32 and cpuv4 veristat is in parent folder */ + if (access("./veristat", F_OK) == 0) + strcpy(fix->veristat, "./veristat"); + else if (access("../veristat", F_OK) == 0) + strcpy(fix->veristat, "../veristat"); + else + PRINT_FAIL("Can't find veristat binary"); + + snprintf(fix->tmpfile, sizeof(fix->tmpfile), "/tmp/test_veristat.XXXXXX"); + fix->fd = mkstemp(fix->tmpfile); + fix->sz = 1000000; + fix->output = malloc(fix->sz); + return fix; +} + +static void teardown_fixture(struct fixture *fix) +{ + free(fix->output); + close(fix->fd); + remove(fix->tmpfile); + free(fix); +} + +static void test_set_global_vars_succeeds(void) +{ + struct fixture *fix = init_fixture(); + + SYS(out, + "%s set_global_vars.bpf.o"\ + " -G \"var_s64 = 0xf000000000000001\" "\ + " -G \"var_u64 = 0xfedcba9876543210\" "\ + " -G \"var_s32 = -0x80000000\" "\ + " -G \"var_u32 = 0x76543210\" "\ + " -G \"var_s16 = -32768\" "\ + " -G \"var_u16 = 60652\" "\ + " -G \"var_s8 = -128\" "\ + " -G \"var_u8 = 255\" "\ + " -G \"var_ea = EA2\" "\ + " -G \"var_eb = EB2\" "\ + " -G \"var_ec = EC2\" "\ + " -G \"var_b = 1\" "\ + "-vl2 > %s", fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("_w=0xf000000000000001 ", "var_s64 = 0xf000000000000001"); + __CHECK_STR("_w=0xfedcba9876543210 ", "var_u64 = 0xfedcba9876543210"); + __CHECK_STR("_w=0x80000000 ", "var_s32 = -0x80000000"); + __CHECK_STR("_w=0x76543210 ", "var_u32 = 0x76543210"); + __CHECK_STR("_w=0x8000 ", "var_s16 = -32768"); + __CHECK_STR("_w=0xecec ", "var_u16 = 60652"); + __CHECK_STR("_w=128 ", "var_s8 = -128"); + __CHECK_STR("_w=255 ", "var_u8 = 255"); + __CHECK_STR("_w=11 ", "var_ea = EA2"); + __CHECK_STR("_w=12 ", "var_eb = EB2"); + __CHECK_STR("_w=13 ", "var_ec = EC2"); + __CHECK_STR("_w=1 ", "var_b = 1"); + +out: + teardown_fixture(fix); +} + +static void test_set_global_vars_from_file_succeeds(void) +{ + struct fixture *fix = init_fixture(); + char input_file[80]; + const char *vars = "var_s16 = -32768\nvar_u16 = 60652"; + int fd; + + snprintf(input_file, sizeof(input_file), "/tmp/veristat_input.XXXXXX"); + fd = mkstemp(input_file); + if (!ASSERT_GE(fd, 0, "valid fd")) + goto out; + + write(fd, vars, strlen(vars)); + syncfs(fd); + SYS(out, "%s set_global_vars.bpf.o -G \"@%s\" -vl2 > %s", + fix->veristat, input_file, fix->tmpfile); + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("_w=0x8000 ", "var_s16 = -32768"); + __CHECK_STR("_w=0xecec ", "var_u16 = 60652"); + +out: + close(fd); + remove(input_file); + teardown_fixture(fix); +} + +static void test_set_global_vars_out_of_range(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"var_s32 = 2147483648\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("is out of range [-2147483648; 2147483647]", "out of range"); + +out: + teardown_fixture(fix); +} + +void test_veristat(void) +{ + if (test__start_subtest("set_global_vars_succeeds")) + test_set_global_vars_succeeds(); + + if (test__start_subtest("set_global_vars_out_of_range")) + test_set_global_vars_out_of_range(); + + if (test__start_subtest("set_global_vars_from_file_succeeds")) + test_set_global_vars_from_file_succeeds(); +} + +#undef __CHECK_STR diff --git a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c index 8d75424fe6bc..3e98a1665936 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c +++ b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c @@ -3,17 +3,50 @@ /* Create 3 namespaces with 3 veth peers, and forward packets in-between using * native XDP * - * XDP_TX - * NS1(veth11) NS2(veth22) NS3(veth33) - * | | | - * | | | - * (veth1, (veth2, (veth3, - * id:111) id:122) id:133) - * ^ | ^ | ^ | - * | | XDP_REDIRECT | | XDP_REDIRECT | | - * | ------------------ ------------------ | - * ----------------------------------------- - * XDP_REDIRECT + * Network topology: + * ---------- ---------- ---------- + * | NS1 | | NS2 | | NS3 | + * | veth11 | | veth22 | | veth33 | + * ----|----- -----|---- -----|---- + * | | | + * ----|------------------|----------------|---- + * | veth1 veth2 veth3 | + * | | + * | NSO | + * --------------------------------------------- + * + * Test cases: + * - [test_xdp_veth_redirect] : ping veth33 from veth11 + * + * veth11 veth22 veth33 + * (XDP_PASS) (XDP_TX) (XDP_PASS) + * | | | + * | | | + * veth1 veth2 veth3 + * (XDP_REDIRECT) (XDP_REDIRECT) (XDP_REDIRECT) + * ^ | ^ | ^ | + * | | | | | | + * | ------------------ ------------------ | + * ----------------------------------------- + * + * - [test_xdp_veth_broadcast_redirect]: broadcast from veth11 + * - IPv4 ping : BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS + * -> echo request received by all except veth11 + * - IPv4 ping : BPF_F_BROADCAST + * -> echo request received by all veth + * - [test_xdp_veth_egress]: + * - all src mac should be the magic mac + * + * veth11 veth22 veth33 + * (XDP_PASS) (XDP_PASS) (XDP_PASS) + * | | | + * | | | + * veth1 veth2 veth3 + * (XDP_REDIRECT) (XDP_REDIRECT) (XDP_REDIRECT) + * | ^ ^ + * | | | + * ---------------------------------------- + * */ #define _GNU_SOURCE @@ -22,192 +55,545 @@ #include "network_helpers.h" #include "xdp_dummy.skel.h" #include "xdp_redirect_map.skel.h" +#include "xdp_redirect_multi_kern.skel.h" #include "xdp_tx.skel.h" +#include <uapi/linux/if_link.h> #define VETH_PAIRS_COUNT 3 -#define NS_SUFFIX_LEN 6 -#define VETH_NAME_MAX_LEN 16 +#define VETH_NAME_MAX_LEN 32 +#define IP_MAX_LEN 16 #define IP_SRC "10.1.1.11" #define IP_DST "10.1.1.33" -#define IP_CMD_MAX_LEN 128 - -struct skeletons { - struct xdp_dummy *xdp_dummy; - struct xdp_tx *xdp_tx; - struct xdp_redirect_map *xdp_redirect_maps; -}; +#define IP_NEIGH "10.1.1.253" +#define PROG_NAME_MAX_LEN 128 +#define NS_NAME_MAX_LEN 32 struct veth_configuration { char local_veth[VETH_NAME_MAX_LEN]; /* Interface in main namespace */ char remote_veth[VETH_NAME_MAX_LEN]; /* Peer interface in dedicated namespace*/ - const char *namespace; /* Namespace for the remote veth */ - char next_veth[VETH_NAME_MAX_LEN]; /* Local interface to redirect traffic to */ - char *remote_addr; /* IP address of the remote veth */ + char namespace[NS_NAME_MAX_LEN]; /* Namespace for the remote veth */ + int next_veth; /* Local interface to redirect traffic to */ + char remote_addr[IP_MAX_LEN]; /* IP address of the remote veth */ }; -static struct veth_configuration config[VETH_PAIRS_COUNT] = { - { - .local_veth = "veth1", - .remote_veth = "veth11", - .next_veth = "veth2", - .remote_addr = IP_SRC, - .namespace = "ns-veth11" - }, - { - .local_veth = "veth2", - .remote_veth = "veth22", - .next_veth = "veth3", - .remote_addr = NULL, - .namespace = "ns-veth22" - }, +struct net_configuration { + char ns0_name[NS_NAME_MAX_LEN]; + struct veth_configuration veth_cfg[VETH_PAIRS_COUNT]; +}; + +static const struct net_configuration default_config = { + .ns0_name = "ns0-", { - .local_veth = "veth3", - .remote_veth = "veth33", - .next_veth = "veth1", - .remote_addr = IP_DST, - .namespace = "ns-veth33" + { + .local_veth = "veth1-", + .remote_veth = "veth11", + .next_veth = 1, + .remote_addr = IP_SRC, + .namespace = "ns-veth11-" + }, + { + .local_veth = "veth2-", + .remote_veth = "veth22", + .next_veth = 2, + .remote_addr = "", + .namespace = "ns-veth22-" + }, + { + .local_veth = "veth3-", + .remote_veth = "veth33", + .next_veth = 0, + .remote_addr = IP_DST, + .namespace = "ns-veth33-" + } } }; -static int attach_programs_to_veth_pair(struct skeletons *skeletons, int index) +struct prog_configuration { + char local_name[PROG_NAME_MAX_LEN]; /* BPF prog to attach to local_veth */ + char remote_name[PROG_NAME_MAX_LEN]; /* BPF prog to attach to remote_veth */ + u32 local_flags; /* XDP flags to use on local_veth */ + u32 remote_flags; /* XDP flags to use on remote_veth */ +}; + +static int attach_programs_to_veth_pair(struct bpf_object **objs, size_t nb_obj, + struct net_configuration *net_config, + struct prog_configuration *prog, int index) { struct bpf_program *local_prog, *remote_prog; - struct bpf_link **local_link, **remote_link; struct nstoken *nstoken; - struct bpf_link *link; - int interface; - - switch (index) { - case 0: - local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_0; - local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_0; - remote_prog = skeletons->xdp_dummy->progs.xdp_dummy_prog; - remote_link = &skeletons->xdp_dummy->links.xdp_dummy_prog; - break; - case 1: - local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_1; - local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_1; - remote_prog = skeletons->xdp_tx->progs.xdp_tx; - remote_link = &skeletons->xdp_tx->links.xdp_tx; - break; - case 2: - local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_2; - local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_2; - remote_prog = skeletons->xdp_dummy->progs.xdp_dummy_prog; - remote_link = &skeletons->xdp_dummy->links.xdp_dummy_prog; - break; + int interface, ret, i; + + for (i = 0; i < nb_obj; i++) { + local_prog = bpf_object__find_program_by_name(objs[i], prog[index].local_name); + if (local_prog) + break; } - interface = if_nametoindex(config[index].local_veth); + if (!ASSERT_OK_PTR(local_prog, "find local program")) + return -1; + + for (i = 0; i < nb_obj; i++) { + remote_prog = bpf_object__find_program_by_name(objs[i], prog[index].remote_name); + if (remote_prog) + break; + } + if (!ASSERT_OK_PTR(remote_prog, "find remote program")) + return -1; + + interface = if_nametoindex(net_config->veth_cfg[index].local_veth); if (!ASSERT_NEQ(interface, 0, "non zero interface index")) return -1; - link = bpf_program__attach_xdp(local_prog, interface); - if (!ASSERT_OK_PTR(link, "attach xdp program to local veth")) + + ret = bpf_xdp_attach(interface, bpf_program__fd(local_prog), + prog[index].local_flags, NULL); + if (!ASSERT_OK(ret, "attach xdp program to local veth")) return -1; - *local_link = link; - nstoken = open_netns(config[index].namespace); + + nstoken = open_netns(net_config->veth_cfg[index].namespace); if (!ASSERT_OK_PTR(nstoken, "switch to remote veth namespace")) return -1; - interface = if_nametoindex(config[index].remote_veth); + + interface = if_nametoindex(net_config->veth_cfg[index].remote_veth); if (!ASSERT_NEQ(interface, 0, "non zero interface index")) { close_netns(nstoken); return -1; } - link = bpf_program__attach_xdp(remote_prog, interface); - *remote_link = link; - close_netns(nstoken); - if (!ASSERT_OK_PTR(link, "attach xdp program to remote veth")) + + ret = bpf_xdp_attach(interface, bpf_program__fd(remote_prog), + prog[index].remote_flags, NULL); + if (!ASSERT_OK(ret, "attach xdp program to remote veth")) { + close_netns(nstoken); return -1; + } + close_netns(nstoken); return 0; } -static int configure_network(struct skeletons *skeletons) +static int create_network(struct net_configuration *net_config) { - int interface_id; - int map_fd; - int err; - int i = 0; + struct nstoken *nstoken = NULL; + int i, err; + + memcpy(net_config, &default_config, sizeof(struct net_configuration)); + + /* Create unique namespaces */ + err = append_tid(net_config->ns0_name, NS_NAME_MAX_LEN); + if (!ASSERT_OK(err, "append TID to ns0 name")) + goto fail; + SYS(fail, "ip netns add %s", net_config->ns0_name); - /* First create and configure all interfaces */ for (i = 0; i < VETH_PAIRS_COUNT; i++) { - SYS(fail, "ip netns add %s", config[i].namespace); - SYS(fail, "ip link add %s type veth peer name %s netns %s", - config[i].local_veth, config[i].remote_veth, config[i].namespace); - SYS(fail, "ip link set dev %s up", config[i].local_veth); - if (config[i].remote_addr) - SYS(fail, "ip -n %s addr add %s/24 dev %s", config[i].namespace, - config[i].remote_addr, config[i].remote_veth); - SYS(fail, "ip -n %s link set dev %s up", config[i].namespace, - config[i].remote_veth); + err = append_tid(net_config->veth_cfg[i].namespace, NS_NAME_MAX_LEN); + if (!ASSERT_OK(err, "append TID to ns name")) + goto fail; + SYS(fail, "ip netns add %s", net_config->veth_cfg[i].namespace); } - /* Then configure the redirect map and attach programs to interfaces */ - map_fd = bpf_map__fd(skeletons->xdp_redirect_maps->maps.tx_port); - if (!ASSERT_GE(map_fd, 0, "open redirect map")) + /* Create interfaces */ + nstoken = open_netns(net_config->ns0_name); + if (!nstoken) goto fail; + for (i = 0; i < VETH_PAIRS_COUNT; i++) { - interface_id = if_nametoindex(config[i].next_veth); - if (!ASSERT_NEQ(interface_id, 0, "non zero interface index")) - goto fail; - err = bpf_map_update_elem(map_fd, &i, &interface_id, BPF_ANY); - if (!ASSERT_OK(err, "configure interface redirection through map")) - goto fail; - if (attach_programs_to_veth_pair(skeletons, i)) - goto fail; + SYS(fail, "ip link add %s type veth peer name %s netns %s", + net_config->veth_cfg[i].local_veth, net_config->veth_cfg[i].remote_veth, + net_config->veth_cfg[i].namespace); + SYS(fail, "ip link set dev %s up", net_config->veth_cfg[i].local_veth); + if (net_config->veth_cfg[i].remote_addr[0]) + SYS(fail, "ip -n %s addr add %s/24 dev %s", + net_config->veth_cfg[i].namespace, + net_config->veth_cfg[i].remote_addr, + net_config->veth_cfg[i].remote_veth); + SYS(fail, "ip -n %s link set dev %s up", net_config->veth_cfg[i].namespace, + net_config->veth_cfg[i].remote_veth); } + close_netns(nstoken); return 0; fail: + close_netns(nstoken); return -1; } -static void cleanup_network(void) +static void cleanup_network(struct net_configuration *net_config) { int i; - /* Deleting namespaces is enough to automatically remove veth pairs as well - */ + SYS_NOFAIL("ip netns del %s", net_config->ns0_name); for (i = 0; i < VETH_PAIRS_COUNT; i++) - SYS_NOFAIL("ip netns del %s", config[i].namespace); + SYS_NOFAIL("ip netns del %s", net_config->veth_cfg[i].namespace); } -static int check_ping(struct skeletons *skeletons) +#define VETH_REDIRECT_SKEL_NB 3 +static void xdp_veth_redirect(u32 flags) { + struct prog_configuration ping_config[VETH_PAIRS_COUNT] = { + { + .local_name = "xdp_redirect_map_0", + .remote_name = "xdp_dummy_prog", + .local_flags = flags, + .remote_flags = flags, + }, + { + .local_name = "xdp_redirect_map_1", + .remote_name = "xdp_tx", + .local_flags = flags, + .remote_flags = flags, + }, + { + .local_name = "xdp_redirect_map_2", + .remote_name = "xdp_dummy_prog", + .local_flags = flags, + .remote_flags = flags, + } + }; + struct bpf_object *bpf_objs[VETH_REDIRECT_SKEL_NB]; + struct xdp_redirect_map *xdp_redirect_map; + struct net_configuration net_config; + struct nstoken *nstoken = NULL; + struct xdp_dummy *xdp_dummy; + struct xdp_tx *xdp_tx; + int map_fd; + int i; + + xdp_dummy = xdp_dummy__open_and_load(); + if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load")) + return; + + xdp_tx = xdp_tx__open_and_load(); + if (!ASSERT_OK_PTR(xdp_tx, "xdp_tx__open_and_load")) + goto destroy_xdp_dummy; + + xdp_redirect_map = xdp_redirect_map__open_and_load(); + if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load")) + goto destroy_xdp_tx; + + if (!ASSERT_OK(create_network(&net_config), "create network")) + goto destroy_xdp_redirect_map; + + /* Then configure the redirect map and attach programs to interfaces */ + map_fd = bpf_map__fd(xdp_redirect_map->maps.tx_port); + if (!ASSERT_OK_FD(map_fd, "open redirect map")) + goto destroy_xdp_redirect_map; + + bpf_objs[0] = xdp_dummy->obj; + bpf_objs[1] = xdp_tx->obj; + bpf_objs[2] = xdp_redirect_map->obj; + + nstoken = open_netns(net_config.ns0_name); + if (!ASSERT_OK_PTR(nstoken, "open NS0")) + goto destroy_xdp_redirect_map; + + for (i = 0; i < VETH_PAIRS_COUNT; i++) { + int next_veth = net_config.veth_cfg[i].next_veth; + int interface_id; + int err; + + interface_id = if_nametoindex(net_config.veth_cfg[next_veth].local_veth); + if (!ASSERT_NEQ(interface_id, 0, "non zero interface index")) + goto destroy_xdp_redirect_map; + err = bpf_map_update_elem(map_fd, &i, &interface_id, BPF_ANY); + if (!ASSERT_OK(err, "configure interface redirection through map")) + goto destroy_xdp_redirect_map; + if (attach_programs_to_veth_pair(bpf_objs, VETH_REDIRECT_SKEL_NB, + &net_config, ping_config, i)) + goto destroy_xdp_redirect_map; + } + /* Test: if all interfaces are properly configured, we must be able to ping * veth33 from veth11 */ - return SYS_NOFAIL("ip netns exec %s ping -c 1 -W 1 %s > /dev/null", - config[0].namespace, IP_DST); + ASSERT_OK(SYS_NOFAIL("ip netns exec %s ping -c 1 -W 1 %s > /dev/null", + net_config.veth_cfg[0].namespace, IP_DST), "ping"); + +destroy_xdp_redirect_map: + close_netns(nstoken); + xdp_redirect_map__destroy(xdp_redirect_map); +destroy_xdp_tx: + xdp_tx__destroy(xdp_tx); +destroy_xdp_dummy: + xdp_dummy__destroy(xdp_dummy); + + cleanup_network(&net_config); } -void test_xdp_veth_redirect(void) +#define BROADCAST_REDIRECT_SKEL_NB 2 +static void xdp_veth_broadcast_redirect(u32 attach_flags, u64 redirect_flags) { - struct skeletons skeletons = {}; + struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = { + { + .local_name = "xdp_redirect_map_multi_prog", + .remote_name = "xdp_count_0", + .local_flags = attach_flags, + .remote_flags = attach_flags, + }, + { + .local_name = "xdp_redirect_map_multi_prog", + .remote_name = "xdp_count_1", + .local_flags = attach_flags, + .remote_flags = attach_flags, + }, + { + .local_name = "xdp_redirect_map_multi_prog", + .remote_name = "xdp_count_2", + .local_flags = attach_flags, + .remote_flags = attach_flags, + } + }; + struct bpf_object *bpf_objs[BROADCAST_REDIRECT_SKEL_NB]; + struct xdp_redirect_multi_kern *xdp_redirect_multi_kern; + struct xdp_redirect_map *xdp_redirect_map; + struct bpf_devmap_val devmap_val = {}; + struct net_configuration net_config; + struct nstoken *nstoken = NULL; + u16 protocol = ETH_P_IP; + int group_map; + int flags_map; + int cnt_map; + u64 cnt = 0; + int i, err; - skeletons.xdp_dummy = xdp_dummy__open_and_load(); - if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load")) + xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load(); + if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load")) return; - skeletons.xdp_tx = xdp_tx__open_and_load(); - if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load")) + xdp_redirect_map = xdp_redirect_map__open_and_load(); + if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load")) + goto destroy_xdp_redirect_multi_kern; + + if (!ASSERT_OK(create_network(&net_config), "create network")) + goto destroy_xdp_redirect_map; + + group_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_all); + if (!ASSERT_OK_FD(group_map, "open map_all")) + goto destroy_xdp_redirect_map; + + flags_map = bpf_map__fd(xdp_redirect_multi_kern->maps.redirect_flags); + if (!ASSERT_OK_FD(group_map, "open map_all")) + goto destroy_xdp_redirect_map; + + err = bpf_map_update_elem(flags_map, &protocol, &redirect_flags, BPF_NOEXIST); + if (!ASSERT_OK(err, "init IP count")) + goto destroy_xdp_redirect_map; + + cnt_map = bpf_map__fd(xdp_redirect_map->maps.rxcnt); + if (!ASSERT_OK_FD(cnt_map, "open rxcnt map")) + goto destroy_xdp_redirect_map; + + bpf_objs[0] = xdp_redirect_multi_kern->obj; + bpf_objs[1] = xdp_redirect_map->obj; + + nstoken = open_netns(net_config.ns0_name); + if (!ASSERT_OK_PTR(nstoken, "open NS0")) + goto destroy_xdp_redirect_map; + + for (i = 0; i < VETH_PAIRS_COUNT; i++) { + int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth); + + if (attach_programs_to_veth_pair(bpf_objs, BROADCAST_REDIRECT_SKEL_NB, + &net_config, prog_cfg, i)) + goto destroy_xdp_redirect_map; + + SYS(destroy_xdp_redirect_map, + "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s", + net_config.veth_cfg[i].namespace, IP_NEIGH, net_config.veth_cfg[i].remote_veth); + + devmap_val.ifindex = ifindex; + err = bpf_map_update_elem(group_map, &ifindex, &devmap_val, 0); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto destroy_xdp_redirect_map; + + } + + SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ", + net_config.veth_cfg[0].namespace, IP_NEIGH); + + for (i = 0; i < VETH_PAIRS_COUNT; i++) { + err = bpf_map_lookup_elem(cnt_map, &i, &cnt); + if (!ASSERT_OK(err, "get IP cnt")) + goto destroy_xdp_redirect_map; + + if (redirect_flags & BPF_F_EXCLUDE_INGRESS) + /* veth11 shouldn't receive the ICMP requests; + * others should + */ + ASSERT_EQ(cnt, i ? 4 : 0, "compare IP cnt"); + else + /* All remote veth should receive the ICMP requests */ + ASSERT_EQ(cnt, 4, "compare IP cnt"); + } + +destroy_xdp_redirect_map: + close_netns(nstoken); + xdp_redirect_map__destroy(xdp_redirect_map); +destroy_xdp_redirect_multi_kern: + xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern); + + cleanup_network(&net_config); +} + +#define VETH_EGRESS_SKEL_NB 3 +static void xdp_veth_egress(u32 flags) +{ + struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = { + { + .local_name = "xdp_redirect_map_all_prog", + .remote_name = "xdp_dummy_prog", + .local_flags = flags, + .remote_flags = flags, + }, + { + .local_name = "xdp_redirect_map_all_prog", + .remote_name = "store_mac_1", + .local_flags = flags, + .remote_flags = flags, + }, + { + .local_name = "xdp_redirect_map_all_prog", + .remote_name = "store_mac_2", + .local_flags = flags, + .remote_flags = flags, + } + }; + const char magic_mac[6] = { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF}; + struct xdp_redirect_multi_kern *xdp_redirect_multi_kern; + struct bpf_object *bpf_objs[VETH_EGRESS_SKEL_NB]; + struct xdp_redirect_map *xdp_redirect_map; + struct bpf_devmap_val devmap_val = {}; + struct net_configuration net_config; + int mac_map, egress_map, res_map; + struct nstoken *nstoken = NULL; + struct xdp_dummy *xdp_dummy; + int err; + int i; + + xdp_dummy = xdp_dummy__open_and_load(); + if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load")) + return; + + xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load(); + if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load")) goto destroy_xdp_dummy; - skeletons.xdp_redirect_maps = xdp_redirect_map__open_and_load(); - if (!ASSERT_OK_PTR(skeletons.xdp_redirect_maps, "xdp_redirect_map__open_and_load")) - goto destroy_xdp_tx; + xdp_redirect_map = xdp_redirect_map__open_and_load(); + if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load")) + goto destroy_xdp_redirect_multi_kern; - if (configure_network(&skeletons)) + if (!ASSERT_OK(create_network(&net_config), "create network")) goto destroy_xdp_redirect_map; - ASSERT_OK(check_ping(&skeletons), "ping"); + mac_map = bpf_map__fd(xdp_redirect_multi_kern->maps.mac_map); + if (!ASSERT_OK_FD(mac_map, "open mac_map")) + goto destroy_xdp_redirect_map; + + egress_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_egress); + if (!ASSERT_OK_FD(egress_map, "open map_egress")) + goto destroy_xdp_redirect_map; + + devmap_val.bpf_prog.fd = bpf_program__fd(xdp_redirect_multi_kern->progs.xdp_devmap_prog); + + bpf_objs[0] = xdp_dummy->obj; + bpf_objs[1] = xdp_redirect_multi_kern->obj; + bpf_objs[2] = xdp_redirect_map->obj; + + nstoken = open_netns(net_config.ns0_name); + if (!ASSERT_OK_PTR(nstoken, "open NS0")) + goto destroy_xdp_redirect_map; + + for (i = 0; i < VETH_PAIRS_COUNT; i++) { + int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth); + + SYS(destroy_xdp_redirect_map, + "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s", + net_config.veth_cfg[i].namespace, IP_NEIGH, net_config.veth_cfg[i].remote_veth); + + if (attach_programs_to_veth_pair(bpf_objs, VETH_REDIRECT_SKEL_NB, + &net_config, prog_cfg, i)) + goto destroy_xdp_redirect_map; + + err = bpf_map_update_elem(mac_map, &ifindex, magic_mac, 0); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto destroy_xdp_redirect_map; + + devmap_val.ifindex = ifindex; + err = bpf_map_update_elem(egress_map, &ifindex, &devmap_val, 0); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto destroy_xdp_redirect_map; + } + + SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ", + net_config.veth_cfg[0].namespace, IP_NEIGH); + + res_map = bpf_map__fd(xdp_redirect_map->maps.rx_mac); + if (!ASSERT_OK_FD(res_map, "open rx_map")) + goto destroy_xdp_redirect_map; + + for (i = 0; i < 2; i++) { + u32 key = i; + u64 res; + + err = bpf_map_lookup_elem(res_map, &key, &res); + if (!ASSERT_OK(err, "get MAC res")) + goto destroy_xdp_redirect_map; + + ASSERT_STRNEQ((const char *)&res, magic_mac, ETH_ALEN, "compare mac"); + } destroy_xdp_redirect_map: - xdp_redirect_map__destroy(skeletons.xdp_redirect_maps); -destroy_xdp_tx: - xdp_tx__destroy(skeletons.xdp_tx); + close_netns(nstoken); + xdp_redirect_map__destroy(xdp_redirect_map); +destroy_xdp_redirect_multi_kern: + xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern); destroy_xdp_dummy: - xdp_dummy__destroy(skeletons.xdp_dummy); + xdp_dummy__destroy(xdp_dummy); + + cleanup_network(&net_config); +} + +void test_xdp_veth_redirect(void) +{ + if (test__start_subtest("0")) + xdp_veth_redirect(0); + + if (test__start_subtest("DRV_MODE")) + xdp_veth_redirect(XDP_FLAGS_DRV_MODE); + + if (test__start_subtest("SKB_MODE")) + xdp_veth_redirect(XDP_FLAGS_SKB_MODE); +} + +void test_xdp_veth_broadcast_redirect(void) +{ + if (test__start_subtest("0/BROADCAST")) + xdp_veth_broadcast_redirect(0, BPF_F_BROADCAST); + + if (test__start_subtest("0/(BROADCAST | EXCLUDE_INGRESS)")) + xdp_veth_broadcast_redirect(0, BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); + + if (test__start_subtest("DRV_MODE/BROADCAST")) + xdp_veth_broadcast_redirect(XDP_FLAGS_DRV_MODE, BPF_F_BROADCAST); + + if (test__start_subtest("DRV_MODE/(BROADCAST | EXCLUDE_INGRESS)")) + xdp_veth_broadcast_redirect(XDP_FLAGS_DRV_MODE, + BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); + + if (test__start_subtest("SKB_MODE/BROADCAST")) + xdp_veth_broadcast_redirect(XDP_FLAGS_SKB_MODE, BPF_F_BROADCAST); + + if (test__start_subtest("SKB_MODE/(BROADCAST | EXCLUDE_INGRESS)")) + xdp_veth_broadcast_redirect(XDP_FLAGS_SKB_MODE, + BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); +} + +void test_xdp_veth_egress(void) +{ + if (test__start_subtest("0/egress")) + xdp_veth_egress(0); + + if (test__start_subtest("DRV_MODE/egress")) + xdp_veth_egress(XDP_FLAGS_DRV_MODE); - cleanup_network(); + if (test__start_subtest("SKB_MODE/egress")) + xdp_veth_egress(XDP_FLAGS_SKB_MODE); } diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index c3ab9b6fb069..f9392df23f8a 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -19,6 +19,7 @@ #include "priv_prog.skel.h" #include "dummy_st_ops_success.skel.h" #include "token_lsm.skel.h" +#include "priv_freplace_prog.skel.h" static inline int sys_mount(const char *dev_name, const char *dir_name, const char *type, unsigned long flags, @@ -788,6 +789,84 @@ static int userns_obj_priv_prog(int mnt_fd, struct token_lsm *lsm_skel) return 0; } +static int userns_obj_priv_freplace_setup(int mnt_fd, struct priv_freplace_prog **fr_skel, + struct priv_prog **skel, int *tgt_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + int err; + char buf[256]; + + /* use bpf_token_path to provide BPF FS path */ + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + *skel = priv_prog__open_opts(&opts); + if (!ASSERT_OK_PTR(*skel, "priv_prog__open_opts")) + return -EINVAL; + err = priv_prog__load(*skel); + if (!ASSERT_OK(err, "priv_prog__load")) + return -EINVAL; + + *fr_skel = priv_freplace_prog__open_opts(&opts); + if (!ASSERT_OK_PTR(*skel, "priv_freplace_prog__open_opts")) + return -EINVAL; + + *tgt_fd = bpf_program__fd((*skel)->progs.xdp_prog1); + return 0; +} + +/* Verify that freplace works from user namespace, because bpf token is loaded + * in bpf_object__prepare + */ +static int userns_obj_priv_freplace_prog(int mnt_fd, struct token_lsm *lsm_skel) +{ + struct priv_freplace_prog *fr_skel = NULL; + struct priv_prog *skel = NULL; + int err, tgt_fd; + + err = userns_obj_priv_freplace_setup(mnt_fd, &fr_skel, &skel, &tgt_fd); + if (!ASSERT_OK(err, "setup")) + goto out; + + err = bpf_object__prepare(fr_skel->obj); + if (!ASSERT_OK(err, "freplace__prepare")) + goto out; + + err = bpf_program__set_attach_target(fr_skel->progs.new_xdp_prog2, tgt_fd, "xdp_prog1"); + if (!ASSERT_OK(err, "set_attach_target")) + goto out; + + err = priv_freplace_prog__load(fr_skel); + ASSERT_OK(err, "priv_freplace_prog__load"); + +out: + priv_freplace_prog__destroy(fr_skel); + priv_prog__destroy(skel); + return err; +} + +/* Verify that replace fails to set attach target from user namespace without bpf token */ +static int userns_obj_priv_freplace_prog_fail(int mnt_fd, struct token_lsm *lsm_skel) +{ + struct priv_freplace_prog *fr_skel = NULL; + struct priv_prog *skel = NULL; + int err, tgt_fd; + + err = userns_obj_priv_freplace_setup(mnt_fd, &fr_skel, &skel, &tgt_fd); + if (!ASSERT_OK(err, "setup")) + goto out; + + err = bpf_program__set_attach_target(fr_skel->progs.new_xdp_prog2, tgt_fd, "xdp_prog1"); + if (ASSERT_ERR(err, "attach fails")) + err = 0; + else + err = -EINVAL; + +out: + priv_freplace_prog__destroy(fr_skel); + priv_prog__destroy(skel); + return err; +} + /* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command, * which should cause struct_ops application to fail, as BTF won't be uploaded * into the kernel, even if STRUCT_OPS programs themselves are allowed @@ -1004,12 +1083,28 @@ void test_token(void) if (test__start_subtest("obj_priv_prog")) { struct bpffs_opts opts = { .cmds = bit(BPF_PROG_LOAD), - .progs = bit(BPF_PROG_TYPE_KPROBE), + .progs = bit(BPF_PROG_TYPE_XDP), .attachs = ~0ULL, }; subtest_userns(&opts, userns_obj_priv_prog); } + if (test__start_subtest("obj_priv_freplace_prog")) { + struct bpffs_opts opts = { + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_PROG_LOAD) | bit(BPF_BTF_GET_FD_BY_ID), + .progs = bit(BPF_PROG_TYPE_EXT) | bit(BPF_PROG_TYPE_XDP), + .attachs = ~0ULL, + }; + subtest_userns(&opts, userns_obj_priv_freplace_prog); + } + if (test__start_subtest("obj_priv_freplace_prog_fail")) { + struct bpffs_opts opts = { + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_PROG_LOAD) | bit(BPF_BTF_GET_FD_BY_ID), + .progs = bit(BPF_PROG_TYPE_EXT) | bit(BPF_PROG_TYPE_XDP), + .attachs = ~0ULL, + }; + subtest_userns(&opts, userns_obj_priv_freplace_prog_fail); + } if (test__start_subtest("obj_priv_btf_fail")) { struct bpffs_opts opts = { /* disallow BTF loading */ diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index 56ed1eb9b527..495d66414b57 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -45,7 +45,7 @@ static void subtest_basic_usdt(void) LIBBPF_OPTS(bpf_usdt_opts, opts); struct test_usdt *skel; struct test_usdt__bss *bss; - int err; + int err, i; skel = test_usdt__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open")) @@ -75,6 +75,7 @@ static void subtest_basic_usdt(void) ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie"); ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt"); ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret"); + ASSERT_EQ(bss->usdt0_arg_size, -ENOENT, "usdt0_arg_size"); /* auto-attached usdt3 gets default zero cookie value */ ASSERT_EQ(bss->usdt3_cookie, 0, "usdt3_cookie"); @@ -86,6 +87,9 @@ static void subtest_basic_usdt(void) ASSERT_EQ(bss->usdt3_args[0], 1, "usdt3_arg1"); ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2"); ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3"); + ASSERT_EQ(bss->usdt3_arg_sizes[0], 4, "usdt3_arg1_size"); + ASSERT_EQ(bss->usdt3_arg_sizes[1], 8, "usdt3_arg2_size"); + ASSERT_EQ(bss->usdt3_arg_sizes[2], 8, "usdt3_arg3_size"); /* auto-attached usdt12 gets default zero cookie value */ ASSERT_EQ(bss->usdt12_cookie, 0, "usdt12_cookie"); @@ -104,6 +108,11 @@ static void subtest_basic_usdt(void) ASSERT_EQ(bss->usdt12_args[10], nums[idx], "usdt12_arg11"); ASSERT_EQ(bss->usdt12_args[11], t1.y, "usdt12_arg12"); + int usdt12_expected_arg_sizes[12] = { 4, 4, 8, 8, 4, 8, 8, 8, 4, 2, 2, 1 }; + + for (i = 0; i < 12; i++) + ASSERT_EQ(bss->usdt12_arg_sizes[i], usdt12_expected_arg_sizes[i], "usdt12_arg_size"); + /* trigger_func() is marked __always_inline, so USDT invocations will be * inlined in two different places, meaning that each USDT will have * at least 2 different places to be attached to. This verifies that diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 8a0e1ff8a2dc..e66a57970d28 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -45,6 +45,7 @@ #include "verifier_ldsx.skel.h" #include "verifier_leak_ptr.skel.h" #include "verifier_linked_scalars.skel.h" +#include "verifier_load_acquire.skel.h" #include "verifier_loops1.skel.h" #include "verifier_lwt.skel.h" #include "verifier_map_in_map.skel.h" @@ -80,6 +81,7 @@ #include "verifier_spill_fill.skel.h" #include "verifier_spin_lock.skel.h" #include "verifier_stack_ptr.skel.h" +#include "verifier_store_release.skel.h" #include "verifier_subprog_precision.skel.h" #include "verifier_subreg.skel.h" #include "verifier_tailcall_jit.skel.h" @@ -121,7 +123,7 @@ static void run_tests_aux(const char *skel_name, /* test_verifier tests are executed w/o CAP_SYS_ADMIN, do the same here */ err = cap_disable_effective(1ULL << CAP_SYS_ADMIN, &old_caps); if (err) { - PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(err)); + PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(-err)); return; } @@ -131,7 +133,7 @@ static void run_tests_aux(const char *skel_name, err = cap_enable_effective(old_caps, NULL); if (err) - PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(err)); + PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(-err)); } #define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes, NULL) @@ -173,6 +175,7 @@ void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); } void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); } void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); } void test_verifier_jit_convergence(void) { RUN(verifier_jit_convergence); } +void test_verifier_load_acquire(void) { RUN(verifier_load_acquire); } void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } void test_verifier_ldsx(void) { RUN(verifier_ldsx); } void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); } @@ -211,6 +214,7 @@ void test_verifier_sockmap_mutate(void) { RUN(verifier_sockmap_mutate); } void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); } void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); } void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); } +void test_verifier_store_release(void) { RUN(verifier_store_release); } void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); } void test_verifier_subreg(void) { RUN(verifier_subreg); } void test_verifier_tailcall_jit(void) { RUN(verifier_tailcall_jit); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 937da9b7532a..b9d9f0a502ce 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -4,12 +4,20 @@ #include "test_xdp_context_test_run.skel.h" #include "test_xdp_meta.skel.h" -#define TX_ADDR "10.0.0.1" -#define RX_ADDR "10.0.0.2" #define RX_NAME "veth0" #define TX_NAME "veth1" #define TX_NETNS "xdp_context_tx" #define RX_NETNS "xdp_context_rx" +#define TAP_NAME "tap0" +#define TAP_NETNS "xdp_context_tuntap" + +#define TEST_PAYLOAD_LEN 32 +static const __u8 test_payload[TEST_PAYLOAD_LEN] = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, +}; void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts, __u32 data_meta, __u32 data, __u32 data_end, @@ -112,7 +120,59 @@ void test_xdp_context_test_run(void) test_xdp_context_test_run__destroy(skel); } -void test_xdp_context_functional(void) +static int send_test_packet(int ifindex) +{ + int n, sock = -1; + __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; + + /* The ethernet header is not relevant for this test and doesn't need to + * be meaningful. + */ + struct ethhdr eth = { 0 }; + + memcpy(packet, ð, sizeof(eth)); + memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN); + + sock = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW); + if (!ASSERT_GE(sock, 0, "socket")) + goto err; + + struct sockaddr_ll saddr = { + .sll_family = PF_PACKET, + .sll_ifindex = ifindex, + .sll_halen = ETH_ALEN + }; + n = sendto(sock, packet, sizeof(packet), 0, (struct sockaddr *)&saddr, + sizeof(saddr)); + if (!ASSERT_EQ(n, sizeof(packet), "sendto")) + goto err; + + close(sock); + return 0; + +err: + if (sock >= 0) + close(sock); + return -1; +} + +static void assert_test_result(struct test_xdp_meta *skel) +{ + int err; + __u32 map_key = 0; + __u8 map_value[TEST_PAYLOAD_LEN]; + + err = bpf_map__lookup_elem(skel->maps.test_result, &map_key, + sizeof(map_key), &map_value, + TEST_PAYLOAD_LEN, BPF_ANY); + if (!ASSERT_OK(err, "lookup test_result")) + return; + + ASSERT_MEMEQ(&map_value, &test_payload, TEST_PAYLOAD_LEN, + "test_result map contains test payload"); +} + +void test_xdp_context_veth(void) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); @@ -120,7 +180,7 @@ void test_xdp_context_functional(void) struct bpf_program *tc_prog, *xdp_prog; struct test_xdp_meta *skel = NULL; struct nstoken *nstoken = NULL; - int rx_ifindex; + int rx_ifindex, tx_ifindex; int ret; tx_ns = netns_new(TX_NETNS, false); @@ -138,7 +198,6 @@ void test_xdp_context_functional(void) if (!ASSERT_OK_PTR(nstoken, "setns rx_ns")) goto close; - SYS(close, "ip addr add " RX_ADDR "/24 dev " RX_NAME); SYS(close, "ip link set dev " RX_NAME " up"); skel = test_xdp_meta__open_and_load(); @@ -179,9 +238,17 @@ void test_xdp_context_functional(void) if (!ASSERT_OK_PTR(nstoken, "setns tx_ns")) goto close; - SYS(close, "ip addr add " TX_ADDR "/24 dev " TX_NAME); SYS(close, "ip link set dev " TX_NAME " up"); - ASSERT_OK(SYS_NOFAIL("ping -c 1 " RX_ADDR), "ping"); + + tx_ifindex = if_nametoindex(TX_NAME); + if (!ASSERT_GE(tx_ifindex, 0, "if_nametoindex tx")) + goto close; + + ret = send_test_packet(tx_ifindex); + if (!ASSERT_OK(ret, "send_test_packet")) + goto close; + + assert_test_result(skel); close: close_netns(nstoken); @@ -190,3 +257,67 @@ close: netns_free(tx_ns); } +void test_xdp_context_tuntap(void) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + struct netns_obj *ns = NULL; + struct test_xdp_meta *skel = NULL; + __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; + int tap_fd = -1; + int tap_ifindex; + int ret; + + ns = netns_new(TAP_NETNS, true); + if (!ASSERT_OK_PTR(ns, "create and open ns")) + return; + + tap_fd = open_tuntap(TAP_NAME, true); + if (!ASSERT_GE(tap_fd, 0, "open_tuntap")) + goto close; + + SYS(close, "ip link set dev " TAP_NAME " up"); + + skel = test_xdp_meta__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open and load skeleton")) + goto close; + + tap_ifindex = if_nametoindex(TAP_NAME); + if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex")) + goto close; + + tc_hook.ifindex = tap_ifindex; + ret = bpf_tc_hook_create(&tc_hook); + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + goto close; + + tc_opts.prog_fd = bpf_program__fd(skel->progs.ing_cls); + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + + ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(skel->progs.ing_xdp), + 0, NULL); + if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) + goto close; + + /* The ethernet header is not relevant for this test and doesn't need to + * be meaningful. + */ + struct ethhdr eth = { 0 }; + + memcpy(packet, ð, sizeof(eth)); + memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN); + + ret = write(tap_fd, packet, sizeof(packet)); + if (!ASSERT_EQ(ret, sizeof(packet), "write packet")) + goto close; + + assert_test_result(skel); + +close: + if (tap_fd >= 0) + close(tap_fd); + test_xdp_meta__destroy(skel); + netns_free(ns); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c b/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c new file mode 100644 index 000000000000..18dd25344de7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Network topology: + * ----------- ----------- + * | NS1 | | NS2 | + * | veth0 -|--------|- veth0 | + * ----------- ----------- + * + */ + +#define _GNU_SOURCE +#include <net/if.h> +#include <uapi/linux/if_link.h> + +#include "network_helpers.h" +#include "test_progs.h" +#include "test_xdp_vlan.skel.h" + + +#define VETH_NAME "veth0" +#define NS_MAX_SIZE 32 +#define NS1_NAME "ns-xdp-vlan-1-" +#define NS2_NAME "ns-xdp-vlan-2-" +#define NS1_IP_ADDR "100.64.10.1" +#define NS2_IP_ADDR "100.64.10.2" +#define VLAN_ID 4011 + +static int setup_network(char *ns1, char *ns2) +{ + if (!ASSERT_OK(append_tid(ns1, NS_MAX_SIZE), "create ns1 name")) + goto fail; + if (!ASSERT_OK(append_tid(ns2, NS_MAX_SIZE), "create ns2 name")) + goto fail; + + SYS(fail, "ip netns add %s", ns1); + SYS(fail, "ip netns add %s", ns2); + SYS(fail, "ip -n %s link add %s type veth peer name %s netns %s", + ns1, VETH_NAME, VETH_NAME, ns2); + + /* NOTICE: XDP require VLAN header inside packet payload + * - Thus, disable VLAN offloading driver features + */ + SYS(fail, "ip netns exec %s ethtool -K %s rxvlan off txvlan off", ns1, VETH_NAME); + SYS(fail, "ip netns exec %s ethtool -K %s rxvlan off txvlan off", ns2, VETH_NAME); + + /* NS1 configuration */ + SYS(fail, "ip -n %s addr add %s/24 dev %s", ns1, NS1_IP_ADDR, VETH_NAME); + SYS(fail, "ip -n %s link set %s up", ns1, VETH_NAME); + + /* NS2 configuration */ + SYS(fail, "ip -n %s link add link %s name %s.%d type vlan id %d", + ns2, VETH_NAME, VETH_NAME, VLAN_ID, VLAN_ID); + SYS(fail, "ip -n %s addr add %s/24 dev %s.%d", ns2, NS2_IP_ADDR, VETH_NAME, VLAN_ID); + SYS(fail, "ip -n %s link set %s up", ns2, VETH_NAME); + SYS(fail, "ip -n %s link set %s.%d up", ns2, VETH_NAME, VLAN_ID); + + /* At this point ping should fail because VLAN tags are only used by NS2 */ + return !SYS_NOFAIL("ip netns exec %s ping -W 1 -c1 %s", ns2, NS1_IP_ADDR); + +fail: + return -1; +} + +static void cleanup_network(const char *ns1, const char *ns2) +{ + SYS_NOFAIL("ip netns del %s", ns1); + SYS_NOFAIL("ip netns del %s", ns2); +} + +static void xdp_vlan(struct bpf_program *xdp, struct bpf_program *tc, u32 flags) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_EGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + char ns1[NS_MAX_SIZE] = NS1_NAME; + char ns2[NS_MAX_SIZE] = NS2_NAME; + struct nstoken *nstoken = NULL; + int interface; + int ret; + + if (!ASSERT_OK(setup_network(ns1, ns2), "setup network")) + goto cleanup; + + nstoken = open_netns(ns1); + if (!ASSERT_OK_PTR(nstoken, "open NS1")) + goto cleanup; + + interface = if_nametoindex(VETH_NAME); + if (!ASSERT_NEQ(interface, 0, "get interface index")) + goto cleanup; + + ret = bpf_xdp_attach(interface, bpf_program__fd(xdp), flags, NULL); + if (!ASSERT_OK(ret, "attach xdp_vlan_change")) + goto cleanup; + + tc_hook.ifindex = interface; + ret = bpf_tc_hook_create(&tc_hook); + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + goto detach_xdp; + + /* Now we'll use BPF programs to pop/push the VLAN tags */ + tc_opts.prog_fd = bpf_program__fd(tc); + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto detach_xdp; + + close_netns(nstoken); + nstoken = NULL; + + /* Now the namespaces can reach each-other, test with pings */ + SYS(detach_tc, "ip netns exec %s ping -i 0.2 -W 2 -c 2 %s > /dev/null", ns1, NS2_IP_ADDR); + SYS(detach_tc, "ip netns exec %s ping -i 0.2 -W 2 -c 2 %s > /dev/null", ns2, NS1_IP_ADDR); + + +detach_tc: + bpf_tc_detach(&tc_hook, &tc_opts); +detach_xdp: + bpf_xdp_detach(interface, flags, NULL); +cleanup: + close_netns(nstoken); + cleanup_network(ns1, ns2); +} + +/* First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change" + * egress use TC to add back VLAN tag 4011 + */ +void test_xdp_vlan_change(void) +{ + struct test_xdp_vlan *skel; + + skel = test_xdp_vlan__open_and_load(); + if (!ASSERT_OK_PTR(skel, "xdp_vlan__open_and_load")) + return; + + if (test__start_subtest("0")) + xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push, 0); + + if (test__start_subtest("DRV_MODE")) + xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push, + XDP_FLAGS_DRV_MODE); + + if (test__start_subtest("SKB_MODE")) + xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push, + XDP_FLAGS_SKB_MODE); + + test_xdp_vlan__destroy(skel); +} + +/* Second test: XDP prog fully remove vlan header + * + * Catch kernel bug for generic-XDP, that doesn't allow us to + * remove a VLAN header, because skb->protocol still contain VLAN + * ETH_P_8021Q indication, and this cause overwriting of our changes. + */ +void test_xdp_vlan_remove(void) +{ + struct test_xdp_vlan *skel; + + skel = test_xdp_vlan__open_and_load(); + if (!ASSERT_OK_PTR(skel, "xdp_vlan__open_and_load")) + return; + + if (test__start_subtest("0")) + xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push, 0); + + if (test__start_subtest("DRV_MODE")) + xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push, + XDP_FLAGS_DRV_MODE); + + if (test__start_subtest("SKB_MODE")) + xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push, + XDP_FLAGS_SKB_MODE); + + test_xdp_vlan__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c index 40dd57fca5cc..a52feff98112 100644 --- a/tools/testing/selftests/bpf/progs/arena_atomics.c +++ b/tools/testing/selftests/bpf/progs/arena_atomics.c @@ -6,6 +6,8 @@ #include <stdbool.h> #include <stdatomic.h> #include "bpf_arena_common.h" +#include "../../../include/linux/filter.h" +#include "bpf_misc.h" struct { __uint(type, BPF_MAP_TYPE_ARENA); @@ -19,9 +21,17 @@ struct { } arena SEC(".maps"); #if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST) -bool skip_tests __attribute((__section__(".data"))) = false; +bool skip_all_tests __attribute((__section__(".data"))) = false; #else -bool skip_tests = true; +bool skip_all_tests = true; +#endif + +#if defined(ENABLE_ATOMICS_TESTS) && \ + defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \ + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) +bool skip_lacq_srel_tests __attribute((__section__(".data"))) = false; +#else +bool skip_lacq_srel_tests = true; #endif __u32 pid = 0; @@ -274,4 +284,111 @@ int uaf(const void *ctx) return 0; } +#if __clang_major__ >= 18 +__u8 __arena_global load_acquire8_value = 0x12; +__u16 __arena_global load_acquire16_value = 0x1234; +__u32 __arena_global load_acquire32_value = 0x12345678; +__u64 __arena_global load_acquire64_value = 0x1234567890abcdef; + +__u8 __arena_global load_acquire8_result = 0; +__u16 __arena_global load_acquire16_result = 0; +__u32 __arena_global load_acquire32_result = 0; +__u64 __arena_global load_acquire64_result = 0; +#else +/* clang-17 crashes if the .addr_space.1 ELF section has holes. Work around + * this issue by defining the below variables as 64-bit. + */ +__u64 __arena_global load_acquire8_value; +__u64 __arena_global load_acquire16_value; +__u64 __arena_global load_acquire32_value; +__u64 __arena_global load_acquire64_value; + +__u64 __arena_global load_acquire8_result; +__u64 __arena_global load_acquire16_result; +__u64 __arena_global load_acquire32_result; +__u64 __arena_global load_acquire64_result; +#endif + +SEC("raw_tp/sys_enter") +int load_acquire(const void *ctx) +{ +#if defined(ENABLE_ATOMICS_TESTS) && \ + defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \ + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) + +#define LOAD_ACQUIRE_ARENA(SIZEOP, SIZE, SRC, DST) \ + { asm volatile ( \ + "r1 = %[" #SRC "] ll;" \ + "r1 = addr_space_cast(r1, 0x0, 0x1);" \ + ".8byte %[load_acquire_insn];" \ + "r3 = %[" #DST "] ll;" \ + "r3 = addr_space_cast(r3, 0x0, 0x1);" \ + "*(" #SIZE " *)(r3 + 0) = r2;" \ + : \ + : __imm_addr(SRC), \ + __imm_insn(load_acquire_insn, \ + BPF_ATOMIC_OP(BPF_##SIZEOP, BPF_LOAD_ACQ, \ + BPF_REG_2, BPF_REG_1, 0)), \ + __imm_addr(DST) \ + : __clobber_all); } \ + + LOAD_ACQUIRE_ARENA(B, u8, load_acquire8_value, load_acquire8_result) + LOAD_ACQUIRE_ARENA(H, u16, load_acquire16_value, + load_acquire16_result) + LOAD_ACQUIRE_ARENA(W, u32, load_acquire32_value, + load_acquire32_result) + LOAD_ACQUIRE_ARENA(DW, u64, load_acquire64_value, + load_acquire64_result) +#undef LOAD_ACQUIRE_ARENA + +#endif + return 0; +} + +#if __clang_major__ >= 18 +__u8 __arena_global store_release8_result = 0; +__u16 __arena_global store_release16_result = 0; +__u32 __arena_global store_release32_result = 0; +__u64 __arena_global store_release64_result = 0; +#else +/* clang-17 crashes if the .addr_space.1 ELF section has holes. Work around + * this issue by defining the below variables as 64-bit. + */ +__u64 __arena_global store_release8_result; +__u64 __arena_global store_release16_result; +__u64 __arena_global store_release32_result; +__u64 __arena_global store_release64_result; +#endif + +SEC("raw_tp/sys_enter") +int store_release(const void *ctx) +{ +#if defined(ENABLE_ATOMICS_TESTS) && \ + defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \ + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) + +#define STORE_RELEASE_ARENA(SIZEOP, DST, VAL) \ + { asm volatile ( \ + "r1 = " VAL ";" \ + "r2 = %[" #DST "] ll;" \ + "r2 = addr_space_cast(r2, 0x0, 0x1);" \ + ".8byte %[store_release_insn];" \ + : \ + : __imm_addr(DST), \ + __imm_insn(store_release_insn, \ + BPF_ATOMIC_OP(BPF_##SIZEOP, BPF_STORE_REL, \ + BPF_REG_2, BPF_REG_1, 0)) \ + : __clobber_all); } \ + + STORE_RELEASE_ARENA(B, store_release8_result, "0x12") + STORE_RELEASE_ARENA(H, store_release16_result, "0x1234") + STORE_RELEASE_ARENA(W, store_release32_result, "0x12345678") + STORE_RELEASE_ARENA(DW, store_release64_result, + "0x1234567890abcdef ll") +#undef STORE_RELEASE_ARENA + +#endif + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/arena_spin_lock.c b/tools/testing/selftests/bpf/progs/arena_spin_lock.c new file mode 100644 index 000000000000..c4500c37f85e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/arena_spin_lock.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_arena_spin_lock.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARENA); + __uint(map_flags, BPF_F_MMAPABLE); + __uint(max_entries, 100); /* number of pages */ +#ifdef __TARGET_ARCH_arm64 + __ulong(map_extra, 0x1ull << 32); /* start of mmap() region */ +#else + __ulong(map_extra, 0x1ull << 44); /* start of mmap() region */ +#endif +} arena SEC(".maps"); + +int cs_count; + +#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST) +arena_spinlock_t __arena lock; +int test_skip = 1; +#else +int test_skip = 2; +#endif + +int counter; +int limit; + +SEC("tc") +int prog(void *ctx) +{ + int ret = -2; + +#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST) + unsigned long flags; + + if ((ret = arena_spin_lock_irqsave(&lock, flags))) + return ret; + if (counter != limit) + counter++; + bpf_repeat(cs_count); + ret = 0; + arena_spin_unlock_irqrestore(&lock, flags); +#endif + return ret; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c index bc10c4e4b4fa..966ee5a7b066 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c @@ -9,6 +9,13 @@ char _license[] SEC("license") = "GPL"; uint32_t tid = 0; int num_unknown_tid = 0; int num_known_tid = 0; +void *user_ptr = 0; +void *user_ptr_long = 0; +uint32_t pid = 0; + +static char big_str1[5000]; +static char big_str2[5005]; +static char big_str3[4996]; SEC("iter/task") int dump_task(struct bpf_iter__task *ctx) @@ -35,7 +42,9 @@ int dump_task(struct bpf_iter__task *ctx) } int num_expected_failure_copy_from_user_task = 0; +int num_expected_failure_copy_from_user_task_str = 0; int num_success_copy_from_user_task = 0; +int num_success_copy_from_user_task_str = 0; SEC("iter.s/task") int dump_task_sleepable(struct bpf_iter__task *ctx) @@ -44,6 +53,9 @@ int dump_task_sleepable(struct bpf_iter__task *ctx) struct task_struct *task = ctx->task; static const char info[] = " === END ==="; struct pt_regs *regs; + char task_str1[10] = "aaaaaaaaaa"; + char task_str2[10], task_str3[10]; + char task_str4[20] = "aaaaaaaaaaaaaaaaaaaa"; void *ptr; uint32_t user_data = 0; int ret; @@ -78,8 +90,106 @@ int dump_task_sleepable(struct bpf_iter__task *ctx) BPF_SEQ_PRINTF(seq, "%s\n", info); return 0; } + ++num_success_copy_from_user_task; + /* Read an invalid pointer and ensure we get an error */ + ptr = NULL; + ret = bpf_copy_from_user_task_str((char *)task_str1, sizeof(task_str1), ptr, task, 0); + if (ret >= 0 || task_str1[9] != 'a' || task_str1[0] != '\0') { + BPF_SEQ_PRINTF(seq, "%s\n", info); + return 0; + } + + /* Read an invalid pointer and ensure we get error with pad zeros flag */ + ptr = NULL; + ret = bpf_copy_from_user_task_str((char *)task_str1, sizeof(task_str1), + ptr, task, BPF_F_PAD_ZEROS); + if (ret >= 0 || task_str1[9] != '\0' || task_str1[0] != '\0') { + BPF_SEQ_PRINTF(seq, "%s\n", info); + return 0; + } + + ++num_expected_failure_copy_from_user_task_str; + + /* Same length as the string */ + ret = bpf_copy_from_user_task_str((char *)task_str2, 10, user_ptr, task, 0); + /* only need to do the task pid check once */ + if (bpf_strncmp(task_str2, 10, "test_data\0") != 0 || ret != 10 || task->tgid != pid) { + BPF_SEQ_PRINTF(seq, "%s\n", info); + return 0; + } + + /* Shorter length than the string */ + ret = bpf_copy_from_user_task_str((char *)task_str3, 2, user_ptr, task, 0); + if (bpf_strncmp(task_str3, 2, "t\0") != 0 || ret != 2) { + BPF_SEQ_PRINTF(seq, "%s\n", info); + return 0; + } + + /* Longer length than the string */ + ret = bpf_copy_from_user_task_str((char *)task_str4, 20, user_ptr, task, 0); + if (bpf_strncmp(task_str4, 10, "test_data\0") != 0 || ret != 10 + || task_str4[sizeof(task_str4) - 1] != 'a') { + BPF_SEQ_PRINTF(seq, "%s\n", info); + return 0; + } + + /* Longer length than the string with pad zeros flag */ + ret = bpf_copy_from_user_task_str((char *)task_str4, 20, user_ptr, task, BPF_F_PAD_ZEROS); + if (bpf_strncmp(task_str4, 10, "test_data\0") != 0 || ret != 10 + || task_str4[sizeof(task_str4) - 1] != '\0') { + BPF_SEQ_PRINTF(seq, "%s\n", info); + return 0; + } + + /* Longer length than the string past a page boundary */ + ret = bpf_copy_from_user_task_str(big_str1, 5000, user_ptr, task, 0); + if (bpf_strncmp(big_str1, 10, "test_data\0") != 0 || ret != 10) { + BPF_SEQ_PRINTF(seq, "%s\n", info); + return 0; + } + + /* String that crosses a page boundary */ + ret = bpf_copy_from_user_task_str(big_str1, 5000, user_ptr_long, task, BPF_F_PAD_ZEROS); + if (bpf_strncmp(big_str1, 4, "baba") != 0 || ret != 5000 + || bpf_strncmp(big_str1 + 4996, 4, "bab\0") != 0) { + BPF_SEQ_PRINTF(seq, "%s\n", info); + return 0; + } + + for (int i = 0; i < 4999; ++i) { + if (i % 2 == 0) { + if (big_str1[i] != 'b') { + BPF_SEQ_PRINTF(seq, "%s\n", info); + return 0; + } + } else { + if (big_str1[i] != 'a') { + BPF_SEQ_PRINTF(seq, "%s\n", info); + return 0; + } + } + } + + /* Longer length than the string that crosses a page boundary */ + ret = bpf_copy_from_user_task_str(big_str2, 5005, user_ptr_long, task, BPF_F_PAD_ZEROS); + if (bpf_strncmp(big_str2, 4, "baba") != 0 || ret != 5000 + || bpf_strncmp(big_str2 + 4996, 5, "bab\0\0") != 0) { + BPF_SEQ_PRINTF(seq, "%s\n", info); + return 0; + } + + /* Shorter length than the string that crosses a page boundary */ + ret = bpf_copy_from_user_task_str(big_str3, 4996, user_ptr_long, task, 0); + if (bpf_strncmp(big_str3, 4, "baba") != 0 || ret != 4996 + || bpf_strncmp(big_str3 + 4992, 4, "bab\0") != 0) { + BPF_SEQ_PRINTF(seq, "%s\n", info); + return 0; + } + + ++num_success_copy_from_user_task_str; + if (ctx->meta->seq_num == 0) BPF_SEQ_PRINTF(seq, " tgid gid data\n"); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c index d22449c69363..164640db3a29 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c @@ -99,10 +99,10 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp, icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; - timer_expires = icsk->icsk_timeout; + timer_expires = icsk->icsk_retransmit_timer.expires; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; - timer_expires = icsk->icsk_timeout; + timer_expires = icsk->icsk_retransmit_timer.expires; } else if (timer_pending(&sp->sk_timer)) { timer_active = 2; timer_expires = sp->sk_timer.expires; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c index 8b072666f9d9..591c703f5032 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c @@ -99,10 +99,10 @@ static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp, icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; - timer_expires = icsk->icsk_timeout; + timer_expires = icsk->icsk_retransmit_timer.expires; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; - timer_expires = icsk->icsk_timeout; + timer_expires = icsk->icsk_retransmit_timer.expires; } else if (timer_pending(&sp->sk_timer)) { timer_active = 2; timer_expires = sp->sk_timer.expires; diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index f45f4352feeb..13a2e22f5465 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -135,6 +135,8 @@ #define __arch_arm64 __arch("ARM64") #define __arch_riscv64 __arch("RISCV64") #define __caps_unpriv(caps) __attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps)))) +#define __load_if_JITed() __attribute__((btf_decl_tag("comment:load_mode=jited"))) +#define __load_if_no_JITed() __attribute__((btf_decl_tag("comment:load_mode=no_jited"))) /* Define common capabilities tested using __caps_unpriv */ #define CAP_NET_ADMIN 12 @@ -172,6 +174,9 @@ #elif defined(__TARGET_ARCH_riscv) #define SYSCALL_WRAPPER 1 #define SYS_PREFIX "__riscv_" +#elif defined(__TARGET_ARCH_powerpc) +#define SYSCALL_WRAPPER 1 +#define SYS_PREFIX "" #else #define SYSCALL_WRAPPER 0 #define SYS_PREFIX "__se_" @@ -208,4 +213,21 @@ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ + __clang_major__ >= 18 +#define CAN_USE_GOTOL +#endif + +#if _clang_major__ >= 18 +#define CAN_USE_BPF_ST +#endif + +#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \ + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) +#define CAN_USE_LOAD_ACQ_STORE_REL +#endif + #endif diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 59843b430f76..659694162739 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -15,7 +15,11 @@ #define SO_KEEPALIVE 9 #define SO_PRIORITY 12 #define SO_REUSEPORT 15 +#if defined(__TARGET_ARCH_powerpc) +#define SO_RCVLOWAT 16 +#else #define SO_RCVLOWAT 18 +#endif #define SO_BINDTODEVICE 25 #define SO_MARK 36 #define SO_MAX_PACING_RATE 47 @@ -49,6 +53,7 @@ #define TCP_SAVED_SYN 28 #define TCP_CA_NAME_MAX 16 #define TCP_NAGLE_OFF 1 +#define TCP_RTO_MAX_MS 44 #define TCP_ECN_OK 1 #define TCP_ECN_QUEUE_CWR 2 diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c new file mode 100644 index 000000000000..21a560427b10 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_arrays___err_bad_signed_arr_elem_sz x) {} diff --git a/tools/testing/selftests/bpf/progs/cgroup_preorder.c b/tools/testing/selftests/bpf/progs/cgroup_preorder.c new file mode 100644 index 000000000000..4ef6202baa0a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_preorder.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +unsigned int idx; +__u8 result[4]; + +SEC("cgroup/getsockopt") +int child(struct bpf_sockopt *ctx) +{ + if (idx < 4) + result[idx++] = 1; + return 1; +} + +SEC("cgroup/getsockopt") +int child_2(struct bpf_sockopt *ctx) +{ + if (idx < 4) + result[idx++] = 2; + return 1; +} + +SEC("cgroup/getsockopt") +int parent(struct bpf_sockopt *ctx) +{ + if (idx < 4) + result[idx++] = 3; + return 1; +} + +SEC("cgroup/getsockopt") +int parent_2(struct bpf_sockopt *ctx) +{ + if (idx < 4) + result[idx++] = 4; + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data.c b/tools/testing/selftests/bpf/progs/changes_pkt_data.c deleted file mode 100644 index 43cada48b28a..000000000000 --- a/tools/testing/selftests/bpf/progs/changes_pkt_data.c +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include <linux/bpf.h> -#include <bpf/bpf_helpers.h> - -__noinline -long changes_pkt_data(struct __sk_buff *sk) -{ - return bpf_skb_pull_data(sk, 0); -} - -__noinline __weak -long does_not_change_pkt_data(struct __sk_buff *sk) -{ - return 0; -} - -SEC("?tc") -int main_with_subprogs(struct __sk_buff *sk) -{ - changes_pkt_data(sk); - does_not_change_pkt_data(sk); - return 0; -} - -SEC("?tc") -int main_changes(struct __sk_buff *sk) -{ - bpf_skb_pull_data(sk, 0); - return 0; -} - -SEC("?tc") -int main_does_not_change(struct __sk_buff *sk) -{ - return 0; -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/compute_live_registers.c b/tools/testing/selftests/bpf/progs/compute_live_registers.c new file mode 100644 index 000000000000..f3d79aecbf93 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/compute_live_registers.c @@ -0,0 +1,424 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "../../../include/linux/filter.h" +#include "bpf_arena_common.h" +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} test_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARENA); + __uint(map_flags, BPF_F_MMAPABLE); + __uint(max_entries, 1); +} arena SEC(".maps"); + +SEC("socket") +__log_level(2) +__msg(" 0: .......... (b7) r0 = 42") +__msg(" 1: 0......... (bf) r1 = r0") +__msg(" 2: .1........ (bf) r2 = r1") +__msg(" 3: ..2....... (bf) r3 = r2") +__msg(" 4: ...3...... (bf) r4 = r3") +__msg(" 5: ....4..... (bf) r5 = r4") +__msg(" 6: .....5.... (bf) r6 = r5") +__msg(" 7: ......6... (bf) r7 = r6") +__msg(" 8: .......7.. (bf) r8 = r7") +__msg(" 9: ........8. (bf) r9 = r8") +__msg("10: .........9 (bf) r0 = r9") +__msg("11: 0......... (95) exit") +__naked void assign_chain(void) +{ + asm volatile ( + "r0 = 42;" + "r1 = r0;" + "r2 = r1;" + "r3 = r2;" + "r4 = r3;" + "r5 = r4;" + "r6 = r5;" + "r7 = r6;" + "r8 = r7;" + "r9 = r8;" + "r0 = r9;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("0: .......... (b7) r1 = 7") +__msg("1: .1........ (07) r1 += 7") +__msg("2: .......... (b7) r2 = 7") +__msg("3: ..2....... (b7) r3 = 42") +__msg("4: ..23...... (0f) r2 += r3") +__msg("5: .......... (b7) r0 = 0") +__msg("6: 0......... (95) exit") +__naked void arithmetics(void) +{ + asm volatile ( + "r1 = 7;" + "r1 += 7;" + "r2 = 7;" + "r3 = 42;" + "r2 += r3;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +#ifdef CAN_USE_BPF_ST +SEC("socket") +__log_level(2) +__msg(" 1: .1........ (07) r1 += -8") +__msg(" 2: .1........ (7a) *(u64 *)(r1 +0) = 7") +__msg(" 3: .1........ (b7) r2 = 42") +__msg(" 4: .12....... (7b) *(u64 *)(r1 +0) = r2") +__msg(" 5: .12....... (7b) *(u64 *)(r1 +0) = r2") +__msg(" 6: .......... (b7) r0 = 0") +__naked void store(void) +{ + asm volatile ( + "r1 = r10;" + "r1 += -8;" + "*(u64 *)(r1 +0) = 7;" + "r2 = 42;" + "*(u64 *)(r1 +0) = r2;" + "*(u64 *)(r1 +0) = r2;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} +#endif + +SEC("socket") +__log_level(2) +__msg("1: ....4..... (07) r4 += -8") +__msg("2: ....4..... (79) r5 = *(u64 *)(r4 +0)") +__msg("3: ....45.... (07) r4 += -8") +__naked void load(void) +{ + asm volatile ( + "r4 = r10;" + "r4 += -8;" + "r5 = *(u64 *)(r4 +0);" + "r4 += -8;" + "r0 = r5;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("0: .1........ (61) r2 = *(u32 *)(r1 +0)") +__msg("1: ..2....... (d4) r2 = le64 r2") +__msg("2: ..2....... (bf) r0 = r2") +__naked void endian(void) +{ + asm volatile ( + "r2 = *(u32 *)(r1 +0);" + "r2 = le64 r2;" + "r0 = r2;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg(" 8: 0......... (b7) r1 = 1") +__msg(" 9: 01........ (db) r1 = atomic64_fetch_add((u64 *)(r0 +0), r1)") +__msg("10: 01........ (c3) lock *(u32 *)(r0 +0) += r1") +__msg("11: 01........ (db) r1 = atomic64_xchg((u64 *)(r0 +0), r1)") +__msg("12: 01........ (bf) r2 = r0") +__msg("13: .12....... (bf) r0 = r1") +__msg("14: 012....... (db) r0 = atomic64_cmpxchg((u64 *)(r2 +0), r0, r1)") +__naked void atomic(void) +{ + asm volatile ( + "r2 = r10;" + "r2 += -8;" + "r1 = 0;" + "*(u64 *)(r2 +0) = r1;" + "r1 = %[test_map] ll;" + "call %[bpf_map_lookup_elem];" + "if r0 == 0 goto 1f;" + "r1 = 1;" + "r1 = atomic_fetch_add((u64 *)(r0 +0), r1);" + ".8byte %[add_nofetch];" /* same as "lock *(u32 *)(r0 +0) += r1;" */ + "r1 = xchg_64(r0 + 0, r1);" + "r2 = r0;" + "r0 = r1;" + "r0 = cmpxchg_64(r2 + 0, r0, r1);" + "1: exit;" + : + : __imm(bpf_map_lookup_elem), + __imm_addr(test_map), + __imm_insn(add_nofetch, BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_0, BPF_REG_1, 0)) + : __clobber_all); +} + +#ifdef CAN_USE_LOAD_ACQ_STORE_REL + +SEC("socket") +__log_level(2) +__msg("2: .12....... (db) store_release((u64 *)(r2 -8), r1)") +__msg("3: .......... (bf) r3 = r10") +__msg("4: ...3...... (db) r4 = load_acquire((u64 *)(r3 -8))") +__naked void atomic_load_acq_store_rel(void) +{ + asm volatile ( + "r1 = 42;" + "r2 = r10;" + ".8byte %[store_release_insn];" /* store_release((u64 *)(r2 - 8), r1); */ + "r3 = r10;" + ".8byte %[load_acquire_insn];" /* r4 = load_acquire((u64 *)(r3 + 0)); */ + "r0 = r4;" + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_2, BPF_REG_1, -8)), + __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_4, BPF_REG_3, -8)) + : __clobber_all); +} + +#endif /* CAN_USE_LOAD_ACQ_STORE_REL */ + +SEC("socket") +__log_level(2) +__msg("4: .12....7.. (85) call bpf_trace_printk#6") +__msg("5: 0......7.. (0f) r0 += r7") +__naked void regular_call(void) +{ + asm volatile ( + "r7 = 1;" + "r1 = r10;" + "r1 += -8;" + "r2 = 1;" + "call %[bpf_trace_printk];" + "r0 += r7;" + "exit;" + : + : __imm(bpf_trace_printk) + : __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("2: 012....... (25) if r1 > 0x7 goto pc+1") +__msg("3: ..2....... (bf) r0 = r2") +__naked void if1(void) +{ + asm volatile ( + "r0 = 1;" + "r2 = 2;" + "if r1 > 0x7 goto +1;" + "r0 = r2;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("3: 0123...... (2d) if r1 > r3 goto pc+1") +__msg("4: ..2....... (bf) r0 = r2") +__naked void if2(void) +{ + asm volatile ( + "r0 = 1;" + "r2 = 2;" + "r3 = 7;" + "if r1 > r3 goto +1;" + "r0 = r2;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("0: .......... (b7) r1 = 0") +__msg("1: .1........ (b7) r2 = 7") +__msg("2: .12....... (25) if r1 > 0x7 goto pc+4") +__msg("3: .12....... (07) r1 += 1") +__msg("4: .12....... (27) r2 *= 2") +__msg("5: .12....... (05) goto pc+0") +__msg("6: .12....... (05) goto pc-5") +__msg("7: .......... (b7) r0 = 0") +__msg("8: 0......... (95) exit") +__naked void loop(void) +{ + asm volatile ( + "r1 = 0;" + "r2 = 7;" + "if r1 > 0x7 goto +4;" + "r1 += 1;" + "r2 *= 2;" + "goto +0;" + "goto -5;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_trace_printk) + : __clobber_all); +} + +#ifdef CAN_USE_GOTOL +SEC("socket") +__log_level(2) +__msg("2: .123...... (25) if r1 > 0x7 goto pc+2") +__msg("3: ..2....... (bf) r0 = r2") +__msg("4: 0......... (06) gotol pc+1") +__msg("5: ...3...... (bf) r0 = r3") +__msg("6: 0......... (95) exit") +__naked void gotol(void) +{ + asm volatile ( + "r2 = 42;" + "r3 = 24;" + "if r1 > 0x7 goto +2;" + "r0 = r2;" + "gotol +1;" + "r0 = r3;" + "exit;" + : + : __imm(bpf_trace_printk) + : __clobber_all); +} +#endif + +SEC("socket") +__log_level(2) +__msg("0: .......... (b7) r1 = 1") +__msg("1: .1........ (e5) may_goto pc+1") +__msg("2: .......... (05) goto pc-3") +__msg("3: .1........ (bf) r0 = r1") +__msg("4: 0......... (95) exit") +__naked void may_goto(void) +{ + asm volatile ( + "1: r1 = 1;" + ".8byte %[may_goto];" + "goto 1b;" + "r0 = r1;" + "exit;" + : + : __imm(bpf_get_smp_processor_id), + __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, +1 /* offset */, 0)) + : __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("1: 0......... (18) r2 = 0x7") +__msg("3: 0.2....... (0f) r0 += r2") +__naked void ldimm64(void) +{ + asm volatile ( + "r0 = 0;" + "r2 = 0x7 ll;" + "r0 += r2;" + "exit;" + : + :: __clobber_all); +} + +/* No rules specific for LD_ABS/LD_IND, default behaviour kicks in */ +SEC("socket") +__log_level(2) +__msg("2: 0123456789 (30) r0 = *(u8 *)skb[42]") +__msg("3: 012.456789 (0f) r7 += r0") +__msg("4: 012.456789 (b7) r3 = 42") +__msg("5: 0123456789 (50) r0 = *(u8 *)skb[r3 + 0]") +__msg("6: 0......7.. (0f) r7 += r0") +__naked void ldabs(void) +{ + asm volatile ( + "r6 = r1;" + "r7 = 0;" + "r0 = *(u8 *)skb[42];" + "r7 += r0;" + "r3 = 42;" + ".8byte %[ld_ind];" /* same as "r0 = *(u8 *)skb[r3];" */ + "r7 += r0;" + "r0 = r7;" + "exit;" + : + : __imm_insn(ld_ind, BPF_LD_IND(BPF_B, BPF_REG_3, 0)) + : __clobber_all); +} + + +#ifdef __BPF_FEATURE_ADDR_SPACE_CAST +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__log_level(2) +__msg(" 6: .12345.... (85) call bpf_arena_alloc_pages") +__msg(" 7: 0......... (bf) r1 = addr_space_cast(r0, 0, 1)") +__msg(" 8: .1........ (b7) r2 = 42") +__naked void addr_space_cast(void) +{ + asm volatile ( + "r1 = %[arena] ll;" + "r2 = 0;" + "r3 = 1;" + "r4 = 0;" + "r5 = 0;" + "call %[bpf_arena_alloc_pages];" + "r1 = addr_space_cast(r0, 0, 1);" + "r2 = 42;" + "*(u64 *)(r1 +0) = r2;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_arena_alloc_pages), + __imm_addr(arena) + : __clobber_all); +} +#endif + +static __used __naked int aux1(void) +{ + asm volatile ( + "r0 = r1;" + "r0 += r2;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("0: ....45.... (b7) r1 = 1") +__msg("1: .1..45.... (b7) r2 = 2") +__msg("2: .12.45.... (b7) r3 = 3") +/* Conservative liveness for subprog parameters. */ +__msg("3: .12345.... (85) call pc+2") +__msg("4: .......... (b7) r0 = 0") +__msg("5: 0......... (95) exit") +__msg("6: .12....... (bf) r0 = r1") +__msg("7: 0.2....... (0f) r0 += r2") +/* Conservative liveness for subprog return value. */ +__msg("8: 0......... (95) exit") +__naked void subprog1(void) +{ + asm volatile ( + "r1 = 1;" + "r2 = 2;" + "r3 = 3;" + "call aux1;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +/* to retain debug info for BTF generation */ +void kfunc_root(void) +{ + bpf_arena_alloc_pages(0, 0, 0, 0, 0); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/connect4_dropper.c b/tools/testing/selftests/bpf/progs/connect4_dropper.c index d3f4c5e4fb69..a3819a5d09c8 100644 --- a/tools/testing/selftests/bpf/progs/connect4_dropper.c +++ b/tools/testing/selftests/bpf/progs/connect4_dropper.c @@ -13,12 +13,14 @@ #define VERDICT_REJECT 0 #define VERDICT_PROCEED 1 +int port; + SEC("cgroup/connect4") int connect_v4_dropper(struct bpf_sock_addr *ctx) { if (ctx->type != SOCK_STREAM) return VERDICT_PROCEED; - if (ctx->user_port == bpf_htons(60120)) + if (ctx->user_port == bpf_htons(port)) return VERDICT_REJECT; return VERDICT_PROCEED; } diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index fd8e1b4c6762..5760ae015e09 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -347,6 +347,7 @@ struct core_reloc_nesting___err_too_deep { */ struct core_reloc_arrays_output { int a2; + int a3; char b123; int c1c; int d00d; @@ -455,6 +456,15 @@ struct core_reloc_arrays___err_bad_zero_sz_arr { struct core_reloc_arrays_substruct d[1][2]; }; +struct core_reloc_arrays___err_bad_signed_arr_elem_sz { + /* int -> short (signed!): not supported case */ + short a[5]; + char b[2][3][4]; + struct core_reloc_arrays_substruct c[3]; + struct core_reloc_arrays_substruct d[1][2]; + struct core_reloc_arrays_substruct f[][2]; +}; + /* * PRIMITIVES */ diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index 4ece7873ba60..86085b79f5ca 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -61,6 +61,7 @@ u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym __weak; u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak; u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym __weak; +int bpf_cpumask_populate(struct cpumask *cpumask, void *src, size_t src__sz) __ksym __weak; void bpf_rcu_read_lock(void) __ksym __weak; void bpf_rcu_read_unlock(void) __ksym __weak; diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c index b40b52548ffb..8a2fd596c8a3 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_failure.c +++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c @@ -222,3 +222,41 @@ int BPF_PROG(test_invalid_nested_array, struct task_struct *task, u64 clone_flag return 0; } + +SEC("tp_btf/task_newtask") +__failure __msg("type=scalar expected=fp") +int BPF_PROG(test_populate_invalid_destination, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *invalid = (struct bpf_cpumask *)0x123456; + u64 bits; + int ret; + + ret = bpf_cpumask_populate((struct cpumask *)invalid, &bits, sizeof(bits)); + if (!ret) + err = 2; + + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("leads to invalid memory access") +int BPF_PROG(test_populate_invalid_source, struct task_struct *task, u64 clone_flags) +{ + void *garbage = (void *)0x123456; + struct bpf_cpumask *local; + int ret; + + local = create_cpumask(); + if (!local) { + err = 1; + return 0; + } + + ret = bpf_cpumask_populate((struct cpumask *)local, garbage, 8); + if (!ret) + err = 2; + + bpf_cpumask_release(local); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index 80ee469b0b60..0e04c31b91c0 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -749,7 +749,6 @@ out: } SEC("tp_btf/task_newtask") -__success int BPF_PROG(test_refcount_null_tracking, struct task_struct *task, u64 clone_flags) { struct bpf_cpumask *mask1, *mask2; @@ -770,3 +769,122 @@ free_masks_return: bpf_cpumask_release(mask2); return 0; } + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_populate_reject_small_mask, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *local; + u8 toofewbits; + int ret; + + if (!is_test_task()) + return 0; + + local = create_cpumask(); + if (!local) + return 0; + + /* The kfunc should prevent this operation */ + ret = bpf_cpumask_populate((struct cpumask *)local, &toofewbits, sizeof(toofewbits)); + if (ret != -EACCES) + err = 2; + + bpf_cpumask_release(local); + + return 0; +} + +/* Mask is guaranteed to be large enough for bpf_cpumask_t. */ +#define CPUMASK_TEST_MASKLEN (sizeof(cpumask_t)) + +/* Add an extra word for the test_populate_reject_unaligned test. */ +u64 bits[CPUMASK_TEST_MASKLEN / 8 + 1]; +extern bool CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS __kconfig __weak; + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_populate_reject_unaligned, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *mask; + char *src; + int ret; + + if (!is_test_task()) + return 0; + + /* Skip if unaligned accesses are fine for this arch. */ + if (CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + return 0; + + mask = bpf_cpumask_create(); + if (!mask) { + err = 1; + return 0; + } + + /* Misalign the source array by a byte. */ + src = &((char *)bits)[1]; + + ret = bpf_cpumask_populate((struct cpumask *)mask, src, CPUMASK_TEST_MASKLEN); + if (ret != -EINVAL) + err = 2; + + bpf_cpumask_release(mask); + + return 0; +} + + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_populate, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *mask; + bool bit; + int ret; + int i; + + if (!is_test_task()) + return 0; + + /* Set only odd bits. */ + __builtin_memset(bits, 0xaa, CPUMASK_TEST_MASKLEN); + + mask = bpf_cpumask_create(); + if (!mask) { + err = 1; + return 0; + } + + /* Pass the entire bits array, the kfunc will only copy the valid bits. */ + ret = bpf_cpumask_populate((struct cpumask *)mask, bits, CPUMASK_TEST_MASKLEN); + if (ret) { + err = 2; + goto out; + } + + /* + * Test is there to appease the verifier. We cannot directly + * access NR_CPUS, the upper bound for nr_cpus, so we infer + * it from the size of cpumask_t. + */ + if (nr_cpus < 0 || nr_cpus >= CPUMASK_TEST_MASKLEN * 8) { + err = 3; + goto out; + } + + bpf_for(i, 0, nr_cpus) { + /* Odd-numbered bits should be set, even ones unset. */ + bit = bpf_cpumask_test_cpu(i, (const struct cpumask *)mask); + if (bit == (i % 2 != 0)) + continue; + + err = 4; + break; + } + +out: + bpf_cpumask_release(mask); + + return 0; +} + +#undef CPUMASK_TEST_MASKLEN diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index bfcc85686cf0..e1fba28e4a86 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -1,20 +1,19 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022 Facebook */ +#include <vmlinux.h> #include <string.h> #include <stdbool.h> -#include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include "bpf_misc.h" -#include "bpf_kfuncs.h" #include "errno.h" char _license[] SEC("license") = "GPL"; int pid, err, val; -struct sample { +struct ringbuf_sample { int pid; int seq; long value; @@ -121,7 +120,7 @@ int test_dynptr_data(void *ctx) static int ringbuf_callback(__u32 index, void *data) { - struct sample *sample; + struct ringbuf_sample *sample; struct bpf_dynptr *ptr = (struct bpf_dynptr *)data; @@ -138,7 +137,7 @@ SEC("?tp/syscalls/sys_enter_nanosleep") int test_ringbuf(void *ctx) { struct bpf_dynptr ptr; - struct sample *sample; + struct ringbuf_sample *sample; if (bpf_get_current_pid_tgid() >> 32 != pid) return 0; @@ -567,3 +566,117 @@ int BPF_PROG(test_dynptr_skb_tp_btf, void *skb, void *location) return 1; } + +static inline int bpf_memcmp(const char *a, const char *b, u32 size) +{ + int i; + + bpf_for(i, 0, size) { + if (a[i] != b[i]) + return a[i] < b[i] ? -1 : 1; + } + return 0; +} + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_copy(void *ctx) +{ + char data[] = "hello there, world!!"; + char buf[32] = {'\0'}; + __u32 sz = sizeof(data); + struct bpf_dynptr src, dst; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sz, 0, &src); + bpf_ringbuf_reserve_dynptr(&ringbuf, sz, 0, &dst); + + /* Test basic case of copying contiguous memory backed dynptrs */ + err = bpf_dynptr_write(&src, 0, data, sz, 0); + err = err ?: bpf_dynptr_copy(&dst, 0, &src, 0, sz); + err = err ?: bpf_dynptr_read(buf, sz, &dst, 0, 0); + err = err ?: bpf_memcmp(data, buf, sz); + + /* Test that offsets are handled correctly */ + err = err ?: bpf_dynptr_copy(&dst, 3, &src, 5, sz - 5); + err = err ?: bpf_dynptr_read(buf, sz - 5, &dst, 3, 0); + err = err ?: bpf_memcmp(data + 5, buf, sz - 5); + + bpf_ringbuf_discard_dynptr(&src, 0); + bpf_ringbuf_discard_dynptr(&dst, 0); + return 0; +} + +SEC("xdp") +int test_dynptr_copy_xdp(struct xdp_md *xdp) +{ + struct bpf_dynptr ptr_buf, ptr_xdp; + char data[] = "qwertyuiopasdfghjkl"; + char buf[32] = {'\0'}; + __u32 len = sizeof(data); + int i, chunks = 200; + + /* ptr_xdp is backed by non-contiguous memory */ + bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp); + bpf_ringbuf_reserve_dynptr(&ringbuf, len * chunks, 0, &ptr_buf); + + /* Destination dynptr is backed by non-contiguous memory */ + bpf_for(i, 0, chunks) { + err = bpf_dynptr_write(&ptr_buf, i * len, data, len, 0); + if (err) + goto out; + } + + err = bpf_dynptr_copy(&ptr_xdp, 0, &ptr_buf, 0, len * chunks); + if (err) + goto out; + + bpf_for(i, 0, chunks) { + __builtin_memset(buf, 0, sizeof(buf)); + err = bpf_dynptr_read(&buf, len, &ptr_xdp, i * len, 0); + if (err) + goto out; + if (bpf_memcmp(data, buf, len) != 0) + goto out; + } + + /* Source dynptr is backed by non-contiguous memory */ + __builtin_memset(buf, 0, sizeof(buf)); + bpf_for(i, 0, chunks) { + err = bpf_dynptr_write(&ptr_buf, i * len, buf, len, 0); + if (err) + goto out; + } + + err = bpf_dynptr_copy(&ptr_buf, 0, &ptr_xdp, 0, len * chunks); + if (err) + goto out; + + bpf_for(i, 0, chunks) { + __builtin_memset(buf, 0, sizeof(buf)); + err = bpf_dynptr_read(&buf, len, &ptr_buf, i * len, 0); + if (err) + goto out; + if (bpf_memcmp(data, buf, len) != 0) + goto out; + } + + /* Both source and destination dynptrs are backed by non-contiguous memory */ + err = bpf_dynptr_copy(&ptr_xdp, 2, &ptr_xdp, len, len * (chunks - 1)); + if (err) + goto out; + + bpf_for(i, 0, chunks - 1) { + __builtin_memset(buf, 0, sizeof(buf)); + err = bpf_dynptr_read(&buf, len, &ptr_xdp, 2 + i * len, 0); + if (err) + goto out; + if (bpf_memcmp(data, buf, len) != 0) + goto out; + } + + if (bpf_dynptr_copy(&ptr_xdp, 2000, &ptr_xdp, 0, len * chunks) != -E2BIG) + err = 1; + +out: + bpf_ringbuf_discard_dynptr(&ptr_buf, 0); + return XDP_DROP; +} diff --git a/tools/testing/selftests/bpf/progs/fexit_noreturns.c b/tools/testing/selftests/bpf/progs/fexit_noreturns.c new file mode 100644 index 000000000000..54654539f550 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fexit_noreturns.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +SEC("fexit/do_exit") +__failure __msg("Attaching fexit/fmod_ret to __noreturn functions is rejected.") +int BPF_PROG(noreturns) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/irq.c b/tools/testing/selftests/bpf/progs/irq.c index b0b53d980964..74d912b22de9 100644 --- a/tools/testing/selftests/bpf/progs/irq.c +++ b/tools/testing/selftests/bpf/progs/irq.c @@ -11,6 +11,9 @@ extern void bpf_local_irq_save(unsigned long *) __weak __ksym; extern void bpf_local_irq_restore(unsigned long *) __weak __ksym; extern int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void *unsafe_ptr__ign, u64 flags) __weak __ksym; +struct bpf_res_spin_lock lockA __hidden SEC(".data.A"); +struct bpf_res_spin_lock lockB __hidden SEC(".data.B"); + SEC("?tc") __failure __msg("arg#0 doesn't point to an irq flag on stack") int irq_save_bad_arg(struct __sk_buff *ctx) @@ -222,7 +225,7 @@ int __noinline global_local_irq_balance(void) } SEC("?tc") -__failure __msg("global function calls are not allowed with IRQs disabled") +__success int irq_global_subprog(struct __sk_buff *ctx) { unsigned long flags; @@ -441,4 +444,123 @@ int irq_ooo_refs_array(struct __sk_buff *ctx) return 0; } +int __noinline +global_subprog(int i) +{ + if (i) + bpf_printk("%p", &i); + return i; +} + +int __noinline +global_sleepable_helper_subprog(int i) +{ + if (i) + bpf_copy_from_user(&i, sizeof(i), NULL); + return i; +} + +int __noinline +global_sleepable_kfunc_subprog(int i) +{ + if (i) + bpf_copy_from_user_str(&i, sizeof(i), NULL, 0); + global_subprog(i); + return i; +} + +int __noinline +global_subprog_calling_sleepable_global(int i) +{ + if (!i) + global_sleepable_kfunc_subprog(i); + return i; +} + +SEC("?syscall") +__success +int irq_non_sleepable_global_subprog(void *ctx) +{ + unsigned long flags; + + bpf_local_irq_save(&flags); + global_subprog(0); + bpf_local_irq_restore(&flags); + return 0; +} + +SEC("?syscall") +__failure __msg("global functions that may sleep are not allowed in non-sleepable context") +int irq_sleepable_helper_global_subprog(void *ctx) +{ + unsigned long flags; + + bpf_local_irq_save(&flags); + global_sleepable_helper_subprog(0); + bpf_local_irq_restore(&flags); + return 0; +} + +SEC("?syscall") +__failure __msg("global functions that may sleep are not allowed in non-sleepable context") +int irq_sleepable_global_subprog_indirect(void *ctx) +{ + unsigned long flags; + + bpf_local_irq_save(&flags); + global_subprog_calling_sleepable_global(0); + bpf_local_irq_restore(&flags); + return 0; +} + +SEC("?tc") +__failure __msg("cannot restore irq state out of order") +int irq_ooo_lock_cond_inv(struct __sk_buff *ctx) +{ + unsigned long flags1, flags2; + + if (bpf_res_spin_lock_irqsave(&lockA, &flags1)) + return 0; + if (bpf_res_spin_lock_irqsave(&lockB, &flags2)) { + bpf_res_spin_unlock_irqrestore(&lockA, &flags1); + return 0; + } + + bpf_res_spin_unlock_irqrestore(&lockB, &flags1); + bpf_res_spin_unlock_irqrestore(&lockA, &flags2); + return 0; +} + +SEC("?tc") +__failure __msg("function calls are not allowed") +int irq_wrong_kfunc_class_1(struct __sk_buff *ctx) +{ + unsigned long flags1; + + if (bpf_res_spin_lock_irqsave(&lockA, &flags1)) + return 0; + /* For now, bpf_local_irq_restore is not allowed in critical section, + * but this test ensures error will be caught with kfunc_class when it's + * opened up. Tested by temporarily permitting this kfunc in critical + * section. + */ + bpf_local_irq_restore(&flags1); + bpf_res_spin_unlock_irqrestore(&lockA, &flags1); + return 0; +} + +SEC("?tc") +__failure __msg("function calls are not allowed") +int irq_wrong_kfunc_class_2(struct __sk_buff *ctx) +{ + unsigned long flags1, flags2; + + bpf_local_irq_save(&flags1); + if (bpf_res_spin_lock_irqsave(&lockA, &flags2)) + return 0; + bpf_local_irq_restore(&flags2); + bpf_res_spin_unlock_irqrestore(&lockA, &flags1); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 190822b2f08b..427b72954b87 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -7,6 +7,8 @@ #include "bpf_misc.h" #include "bpf_compiler.h" +#define unlikely(x) __builtin_expect(!!(x), 0) + static volatile int zero = 0; int my_pid; @@ -1175,6 +1177,122 @@ __naked int loop_state_deps2(void) } SEC("?raw_tp") +__failure +__msg("math between fp pointer and register with unbounded") +__flag(BPF_F_TEST_STATE_FREQ) +__naked int loop_state_deps3(void) +{ + /* This is equivalent to a C program below. + * + * if (random() != 24) { // assume false branch is placed first + * i = iter_new(); // fp[-8] + * while (iter_next(i)); + * iter_destroy(i); + * return; + * } + * + * for (i = 10; i > 0; i--); // increase dfs_depth for child states + * + * i = iter_new(); // fp[-8] + * b = -24; // r8 + * for (;;) { // checkpoint (L) + * if (iter_next(i)) // checkpoint (N) + * break; + * if (random() == 77) { // assume false branch is placed first + * *(u64 *)(r10 + b) = 7; // this is not safe when b == -25 + * iter_destroy(i); + * return; + * } + * if (random() == 42) { // assume false branch is placed first + * b = -25; + * } + * } + * iter_destroy(i); + * + * In case of a buggy verifier first loop might poison + * env->cur_state->loop_entry with a state having 0 branches + * and small dfs_depth. This would trigger NOT_EXACT states + * comparison for some states within second loop. + * Specifically, checkpoint (L) might be problematic if: + * - branch with '*(u64 *)(r10 + b) = 7' is not explored yet; + * - checkpoint (L) is first reached in state {b=-24}; + * - traversal is pruned at checkpoint (N) setting checkpoint's (L) + * branch count to 0, thus making it eligible for use in pruning; + * - checkpoint (L) is next reached in state {b=-25}, + * this would cause NOT_EXACT comparison with a state {b=-24} + * while 'b' is not marked precise yet. + */ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "if r0 == 24 goto 2f;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 5;" + "call %[bpf_iter_num_new];" + "1:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 != 0 goto 1b;" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "2:" + /* loop to increase dfs_depth */ + "r0 = 10;" + "3:" + "r0 -= 1;" + "if r0 != 0 goto 3b;" + /* end of loop */ + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r8 = -24;" + "main_loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto main_loop_end_%=;" + /* first if */ + "call %[bpf_get_prandom_u32];" + "if r0 == 77 goto unsafe_write_%=;" + /* second if */ + "call %[bpf_get_prandom_u32];" + "if r0 == 42 goto poison_r8_%=;" + /* iterate */ + "goto main_loop_%=;" + "main_loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + + "unsafe_write_%=:" + "r0 = r10;" + "r0 += r8;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "goto main_loop_end_%=;" + + "poison_r8_%=:" + "r8 = -25;" + "goto main_loop_%=;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy) + : __clobber_all + ); +} + +SEC("?raw_tp") __success __naked int triple_continue(void) { @@ -1512,4 +1630,25 @@ int iter_destroy_bad_arg(const void *ctx) return 0; } +SEC("raw_tp") +__success +int clean_live_states(const void *ctx) +{ + char buf[1]; + int i, j, k, l, m, n, o; + + bpf_for(i, 0, 10) + bpf_for(j, 0, 10) + bpf_for(k, 0, 10) + bpf_for(l, 0, 10) + bpf_for(m, 0, 10) + bpf_for(n, 0, 10) + bpf_for(o, 0, 10) { + if (unlikely(bpf_get_prandom_u32())) + buf[0] = 42; + bpf_printk("%s", buf); + } + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/net_timestamping.c b/tools/testing/selftests/bpf/progs/net_timestamping.c new file mode 100644 index 000000000000..b4c2f0f2be11 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/net_timestamping.c @@ -0,0 +1,248 @@ +#include "vmlinux.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "bpf_kfuncs.h" +#include <errno.h> + +__u32 monitored_pid = 0; + +int nr_active; +int nr_snd; +int nr_passive; +int nr_sched; +int nr_txsw; +int nr_ack; + +struct sk_stg { + __u64 sendmsg_ns; /* record ts when sendmsg is called */ +}; + +struct sk_tskey { + u64 cookie; + u32 tskey; +}; + +struct delay_info { + u64 sendmsg_ns; /* record ts when sendmsg is called */ + u32 sched_delay; /* SCHED_CB - sendmsg_ns */ + u32 snd_sw_delay; /* SND_SW_CB - SCHED_CB */ + u32 ack_delay; /* ACK_CB - SND_SW_CB */ +}; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct sk_stg); +} sk_stg_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct sk_tskey); + __type(value, struct delay_info); + __uint(max_entries, 1024); +} time_map SEC(".maps"); + +static u64 delay_tolerance_nsec = 10000000000; /* 10 second as an example */ + +extern int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops, u64 flags) __ksym; + +static int bpf_test_sockopt(void *ctx, const struct sock *sk, int expected) +{ + int tmp, new = SK_BPF_CB_TX_TIMESTAMPING; + int opt = SK_BPF_CB_FLAGS; + int level = SOL_SOCKET; + + if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)) != expected) + return 1; + + if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) != expected || + (!expected && tmp != new)) + return 1; + + return 0; +} + +static bool bpf_test_access_sockopt(void *ctx, const struct sock *sk) +{ + if (bpf_test_sockopt(ctx, sk, -EOPNOTSUPP)) + return true; + return false; +} + +static bool bpf_test_access_load_hdr_opt(struct bpf_sock_ops *skops) +{ + u8 opt[3] = {0}; + int load_flags = 0; + int ret; + + ret = bpf_load_hdr_opt(skops, opt, sizeof(opt), load_flags); + if (ret != -EOPNOTSUPP) + return true; + + return false; +} + +static bool bpf_test_access_cb_flags_set(struct bpf_sock_ops *skops) +{ + int ret; + + ret = bpf_sock_ops_cb_flags_set(skops, 0); + if (ret != -EOPNOTSUPP) + return true; + + return false; +} + +/* In the timestamping callbacks, we're not allowed to call the following + * BPF CALLs for the safety concern. Return false if expected. + */ +static bool bpf_test_access_bpf_calls(struct bpf_sock_ops *skops, + const struct sock *sk) +{ + if (bpf_test_access_sockopt(skops, sk)) + return true; + + if (bpf_test_access_load_hdr_opt(skops)) + return true; + + if (bpf_test_access_cb_flags_set(skops)) + return true; + + return false; +} + +static bool bpf_test_delay(struct bpf_sock_ops *skops, const struct sock *sk) +{ + struct bpf_sock_ops_kern *skops_kern; + u64 timestamp = bpf_ktime_get_ns(); + struct skb_shared_info *shinfo; + struct delay_info dinfo = {0}; + struct sk_tskey key = {0}; + struct delay_info *val; + struct sk_buff *skb; + struct sk_stg *stg; + u64 prior_ts, delay; + + if (bpf_test_access_bpf_calls(skops, sk)) + return false; + + skops_kern = bpf_cast_to_kern_ctx(skops); + skb = skops_kern->skb; + shinfo = bpf_core_cast(skb->head + skb->end, struct skb_shared_info); + + key.cookie = bpf_get_socket_cookie(skops); + if (!key.cookie) + return false; + + if (skops->op == BPF_SOCK_OPS_TSTAMP_SENDMSG_CB) { + stg = bpf_sk_storage_get(&sk_stg_map, (void *)sk, 0, 0); + if (!stg) + return false; + dinfo.sendmsg_ns = stg->sendmsg_ns; + bpf_sock_ops_enable_tx_tstamp(skops_kern, 0); + key.tskey = shinfo->tskey; + if (!key.tskey) + return false; + bpf_map_update_elem(&time_map, &key, &dinfo, BPF_ANY); + return true; + } + + key.tskey = shinfo->tskey; + if (!key.tskey) + return false; + + val = bpf_map_lookup_elem(&time_map, &key); + if (!val) + return false; + + switch (skops->op) { + case BPF_SOCK_OPS_TSTAMP_SCHED_CB: + val->sched_delay = timestamp - val->sendmsg_ns; + delay = val->sched_delay; + break; + case BPF_SOCK_OPS_TSTAMP_SND_SW_CB: + prior_ts = val->sched_delay + val->sendmsg_ns; + val->snd_sw_delay = timestamp - prior_ts; + delay = val->snd_sw_delay; + break; + case BPF_SOCK_OPS_TSTAMP_ACK_CB: + prior_ts = val->snd_sw_delay + val->sched_delay + val->sendmsg_ns; + val->ack_delay = timestamp - prior_ts; + delay = val->ack_delay; + break; + } + + if (delay >= delay_tolerance_nsec) + return false; + + /* Since it's the last one, remove from the map after latency check */ + if (skops->op == BPF_SOCK_OPS_TSTAMP_ACK_CB) + bpf_map_delete_elem(&time_map, &key); + + return true; +} + +SEC("fentry/tcp_sendmsg_locked") +int BPF_PROG(trace_tcp_sendmsg_locked, struct sock *sk, struct msghdr *msg, + size_t size) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + u64 timestamp = bpf_ktime_get_ns(); + u32 flag = sk->sk_bpf_cb_flags; + struct sk_stg *stg; + + if (pid != monitored_pid || !flag) + return 0; + + stg = bpf_sk_storage_get(&sk_stg_map, sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!stg) + return 0; + + stg->sendmsg_ns = timestamp; + nr_snd += 1; + return 0; +} + +SEC("sockops") +int skops_sockopt(struct bpf_sock_ops *skops) +{ + struct bpf_sock *bpf_sk = skops->sk; + const struct sock *sk; + + if (!bpf_sk) + return 1; + + sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk); + if (!sk) + return 1; + + switch (skops->op) { + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + nr_active += !bpf_test_sockopt(skops, sk, 0); + break; + case BPF_SOCK_OPS_TSTAMP_SENDMSG_CB: + if (bpf_test_delay(skops, sk)) + nr_snd += 1; + break; + case BPF_SOCK_OPS_TSTAMP_SCHED_CB: + if (bpf_test_delay(skops, sk)) + nr_sched += 1; + break; + case BPF_SOCK_OPS_TSTAMP_SND_SW_CB: + if (bpf_test_delay(skops, sk)) + nr_txsw += 1; + break; + case BPF_SOCK_OPS_TSTAMP_ACK_CB: + if (bpf_test_delay(skops, sk)) + nr_ack += 1; + break; + } + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c index c6edf8dbefeb..94040714af18 100644 --- a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c +++ b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c @@ -28,6 +28,7 @@ struct { } sock_map SEC(".maps"); int tcx_init_netns_cookie, tcx_netns_cookie; +int cgroup_skb_init_netns_cookie, cgroup_skb_netns_cookie; SEC("sockops") int get_netns_cookie_sockops(struct bpf_sock_ops *ctx) @@ -91,4 +92,12 @@ int get_netns_cookie_tcx(struct __sk_buff *skb) return TCX_PASS; } +SEC("cgroup_skb/ingress") +int get_netns_cookie_cgroup_skb(struct __sk_buff *skb) +{ + cgroup_skb_init_netns_cookie = bpf_get_netns_cookie(NULL); + cgroup_skb_netns_cookie = bpf_get_netns_cookie(skb); + return SK_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/preempt_lock.c b/tools/testing/selftests/bpf/progs/preempt_lock.c index 6c5797bf0ead..7d04254e61f1 100644 --- a/tools/testing/selftests/bpf/progs/preempt_lock.c +++ b/tools/testing/selftests/bpf/progs/preempt_lock.c @@ -134,7 +134,7 @@ int __noinline preempt_global_subprog(void) } SEC("?tc") -__failure __msg("global function calls are not allowed with preemption disabled") +__success int preempt_global_subprog_test(struct __sk_buff *ctx) { preempt_disable(); @@ -143,4 +143,70 @@ int preempt_global_subprog_test(struct __sk_buff *ctx) return 0; } +int __noinline +global_subprog(int i) +{ + if (i) + bpf_printk("%p", &i); + return i; +} + +int __noinline +global_sleepable_helper_subprog(int i) +{ + if (i) + bpf_copy_from_user(&i, sizeof(i), NULL); + return i; +} + +int __noinline +global_sleepable_kfunc_subprog(int i) +{ + if (i) + bpf_copy_from_user_str(&i, sizeof(i), NULL, 0); + global_subprog(i); + return i; +} + +int __noinline +global_subprog_calling_sleepable_global(int i) +{ + if (!i) + global_sleepable_kfunc_subprog(i); + return i; +} + +SEC("?syscall") +__failure __msg("global functions that may sleep are not allowed in non-sleepable context") +int preempt_global_sleepable_helper_subprog(struct __sk_buff *ctx) +{ + preempt_disable(); + if (ctx->mark) + global_sleepable_helper_subprog(ctx->mark); + preempt_enable(); + return 0; +} + +SEC("?syscall") +__failure __msg("global functions that may sleep are not allowed in non-sleepable context") +int preempt_global_sleepable_kfunc_subprog(struct __sk_buff *ctx) +{ + preempt_disable(); + if (ctx->mark) + global_sleepable_kfunc_subprog(ctx->mark); + preempt_enable(); + return 0; +} + +SEC("?syscall") +__failure __msg("global functions that may sleep are not allowed in non-sleepable context") +int preempt_global_sleepable_subprog_indirect(struct __sk_buff *ctx) +{ + preempt_disable(); + if (ctx->mark) + global_subprog_calling_sleepable_global(ctx->mark); + preempt_enable(); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/prepare.c b/tools/testing/selftests/bpf/progs/prepare.c new file mode 100644 index 000000000000..1f1dd547e4ee --- /dev/null +++ b/tools/testing/selftests/bpf/progs/prepare.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +//#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +int err; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 4096); +} ringbuf SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} array_map SEC(".maps"); + +SEC("cgroup_skb/egress") +int program(struct __sk_buff *skb) +{ + err = 0; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/priv_freplace_prog.c b/tools/testing/selftests/bpf/progs/priv_freplace_prog.c new file mode 100644 index 000000000000..ccf1b04010ba --- /dev/null +++ b/tools/testing/selftests/bpf/progs/priv_freplace_prog.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +SEC("freplace/xdp_prog1") +int new_xdp_prog2(struct xdp_md *xd) +{ + return XDP_DROP; +} diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c index 3c7b2b618c8a..725e29595079 100644 --- a/tools/testing/selftests/bpf/progs/priv_prog.c +++ b/tools/testing/selftests/bpf/progs/priv_prog.c @@ -6,8 +6,8 @@ char _license[] SEC("license") = "GPL"; -SEC("kprobe") -int kprobe_prog(void *ctx) +SEC("xdp") +int xdp_prog1(struct xdp_md *xdp) { - return 1; + return XDP_DROP; } diff --git a/tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c b/tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c new file mode 100644 index 000000000000..a5a8f08ac8fb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "../test_kmods/bpf_testmod.h" +#include "../test_kmods/bpf_testmod_kfunc.h" + +char _license[] SEC("license") = "GPL"; + +void __kfunc_btf_root(void) +{ + bpf_kfunc_st_ops_inc10(NULL); +} + +static __noinline __used int subprog(struct st_ops_args *args) +{ + args->a += 1; + return args->a; +} + +__success +/* prologue */ +__xlated("0: r8 = r1") +__xlated("1: r1 = 0") +__xlated("2: call kernel-function") +__xlated("3: if r0 != 0x0 goto pc+5") +__xlated("4: r6 = *(u64 *)(r8 +0)") +__xlated("5: r7 = *(u64 *)(r6 +0)") +__xlated("6: r7 += 1000") +__xlated("7: *(u64 *)(r6 +0) = r7") +__xlated("8: goto pc+2") +__xlated("9: r1 = r0") +__xlated("10: call kernel-function") +__xlated("11: r1 = r8") +/* save __u64 *ctx to stack */ +__xlated("12: *(u64 *)(r10 -8) = r1") +/* main prog */ +__xlated("13: r1 = *(u64 *)(r1 +0)") +__xlated("14: r6 = r1") +__xlated("15: call kernel-function") +__xlated("16: r1 = r6") +__xlated("17: call pc+") +/* epilogue */ +__xlated("18: r1 = 0") +__xlated("19: r6 = 0") +__xlated("20: call kernel-function") +__xlated("21: if r0 != 0x0 goto pc+6") +__xlated("22: r1 = *(u64 *)(r10 -8)") +__xlated("23: r1 = *(u64 *)(r1 +0)") +__xlated("24: r6 = *(u64 *)(r1 +0)") +__xlated("25: r6 += 10000") +__xlated("26: *(u64 *)(r1 +0) = r6") +__xlated("27: goto pc+2") +__xlated("28: r1 = r0") +__xlated("29: call kernel-function") +__xlated("30: r0 = r6") +__xlated("31: r0 *= 2") +__xlated("32: exit") +SEC("struct_ops/test_pro_epilogue") +__naked int test_kfunc_pro_epilogue(void) +{ + asm volatile ( + "r1 = *(u64 *)(r1 +0);" + "r6 = r1;" + "call %[bpf_kfunc_st_ops_inc10];" + "r1 = r6;" + "call subprog;" + "exit;" + : + : __imm(bpf_kfunc_st_ops_inc10) + : __clobber_all); +} + +SEC("syscall") +__retval(22022) /* (PROLOGUE_A [1000] + KFUNC_INC10 + SUBPROG_A [1] + EPILOGUE_A [10000]) * 2 */ +int syscall_pro_epilogue(void *ctx) +{ + struct st_ops_args args = {}; + + return bpf_kfunc_st_ops_test_pro_epilogue(&args); +} + +SEC(".struct_ops.link") +struct bpf_testmod_st_ops pro_epilogue_with_kfunc = { + .test_pro_epilogue = (void *)test_kfunc_pro_epilogue, +}; diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index ab3a532b7dd6..43637ee2cdcd 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -242,7 +242,8 @@ out: } SEC("?lsm.s/bpf") -int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size) +int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size, + bool kernel) { struct bpf_key *bkey; @@ -439,3 +440,61 @@ int rcu_read_lock_global_subprog_unlock(void *ctx) ret += global_subprog_unlock(ret); return 0; } + +int __noinline +global_sleepable_helper_subprog(int i) +{ + if (i) + bpf_copy_from_user(&i, sizeof(i), NULL); + return i; +} + +int __noinline +global_sleepable_kfunc_subprog(int i) +{ + if (i) + bpf_copy_from_user_str(&i, sizeof(i), NULL, 0); + global_subprog(i); + return i; +} + +int __noinline +global_subprog_calling_sleepable_global(int i) +{ + if (!i) + global_sleepable_kfunc_subprog(i); + return i; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int rcu_read_lock_sleepable_helper_global_subprog(void *ctx) +{ + volatile int ret = 0; + + bpf_rcu_read_lock(); + ret += global_sleepable_helper_subprog(ret); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int rcu_read_lock_sleepable_kfunc_global_subprog(void *ctx) +{ + volatile int ret = 0; + + bpf_rcu_read_lock(); + ret += global_sleepable_kfunc_subprog(ret); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int rcu_read_lock_sleepable_global_subprog_indirect(void *ctx) +{ + volatile int ret = 0; + + bpf_rcu_read_lock(); + ret += global_subprog_calling_sleepable_global(ret); + bpf_rcu_read_unlock(); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/read_vsyscall.c b/tools/testing/selftests/bpf/progs/read_vsyscall.c index 39ebef430059..395591374d4f 100644 --- a/tools/testing/selftests/bpf/progs/read_vsyscall.c +++ b/tools/testing/selftests/bpf/progs/read_vsyscall.c @@ -8,14 +8,16 @@ int target_pid = 0; void *user_ptr = 0; -int read_ret[9]; +int read_ret[10]; char _license[] SEC("license") = "GPL"; /* - * This is the only kfunc, the others are helpers + * These are the kfuncs, the others are helpers */ int bpf_copy_from_user_str(void *dst, u32, const void *, u64) __weak __ksym; +int bpf_copy_from_user_task_str(void *dst, u32, const void *, + struct task_struct *, u64) __weak __ksym; SEC("fentry/" SYS_PREFIX "sys_nanosleep") int do_probe_read(void *ctx) @@ -47,6 +49,11 @@ int do_copy_from_user(void *ctx) read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr, bpf_get_current_task_btf(), 0); read_ret[8] = bpf_copy_from_user_str((char *)buf, sizeof(buf), user_ptr, 0); + read_ret[9] = bpf_copy_from_user_task_str((char *)buf, + sizeof(buf), + user_ptr, + bpf_get_current_task_btf(), + 0); return 0; } diff --git a/tools/testing/selftests/bpf/progs/res_spin_lock.c b/tools/testing/selftests/bpf/progs/res_spin_lock.c new file mode 100644 index 000000000000..b33385dfbd35 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/res_spin_lock.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024-2025 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +#define EDEADLK 35 +#define ETIMEDOUT 110 + +struct arr_elem { + struct bpf_res_spin_lock lock; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 64); + __type(key, int); + __type(value, struct arr_elem); +} arrmap SEC(".maps"); + +struct bpf_res_spin_lock lockA __hidden SEC(".data.A"); +struct bpf_res_spin_lock lockB __hidden SEC(".data.B"); + +SEC("tc") +int res_spin_lock_test(struct __sk_buff *ctx) +{ + struct arr_elem *elem1, *elem2; + int r; + + elem1 = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!elem1) + return -1; + elem2 = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!elem2) + return -1; + + r = bpf_res_spin_lock(&elem1->lock); + if (r) + return r; + if (!bpf_res_spin_lock(&elem2->lock)) { + bpf_res_spin_unlock(&elem2->lock); + bpf_res_spin_unlock(&elem1->lock); + return -1; + } + bpf_res_spin_unlock(&elem1->lock); + return 0; +} + +SEC("tc") +int res_spin_lock_test_AB(struct __sk_buff *ctx) +{ + int r; + + r = bpf_res_spin_lock(&lockA); + if (r) + return !r; + /* Only unlock if we took the lock. */ + if (!bpf_res_spin_lock(&lockB)) + bpf_res_spin_unlock(&lockB); + bpf_res_spin_unlock(&lockA); + return 0; +} + +int err; + +SEC("tc") +int res_spin_lock_test_BA(struct __sk_buff *ctx) +{ + int r; + + r = bpf_res_spin_lock(&lockB); + if (r) + return !r; + if (!bpf_res_spin_lock(&lockA)) + bpf_res_spin_unlock(&lockA); + else + err = -EDEADLK; + bpf_res_spin_unlock(&lockB); + return err ?: 0; +} + +SEC("tc") +int res_spin_lock_test_held_lock_max(struct __sk_buff *ctx) +{ + struct bpf_res_spin_lock *locks[48] = {}; + struct arr_elem *e; + u64 time_beg, time; + int ret = 0, i; + + _Static_assert(ARRAY_SIZE(((struct rqspinlock_held){}).locks) == 31, + "RES_NR_HELD assumed to be 31"); + + for (i = 0; i < 34; i++) { + int key = i; + + /* We cannot pass in i as it will get spilled/filled by the compiler and + * loses bounds in verifier state. + */ + e = bpf_map_lookup_elem(&arrmap, &key); + if (!e) + return 1; + locks[i] = &e->lock; + } + + for (; i < 48; i++) { + int key = i - 2; + + /* We cannot pass in i as it will get spilled/filled by the compiler and + * loses bounds in verifier state. + */ + e = bpf_map_lookup_elem(&arrmap, &key); + if (!e) + return 1; + locks[i] = &e->lock; + } + + time_beg = bpf_ktime_get_ns(); + for (i = 0; i < 34; i++) { + if (bpf_res_spin_lock(locks[i])) + goto end; + } + + /* Trigger AA, after exhausting entries in the held lock table. This + * time, only the timeout can save us, as AA detection won't succeed. + */ + if (!bpf_res_spin_lock(locks[34])) { + bpf_res_spin_unlock(locks[34]); + ret = 1; + goto end; + } + +end: + for (i = i - 1; i >= 0; i--) + bpf_res_spin_unlock(locks[i]); + time = bpf_ktime_get_ns() - time_beg; + /* Time spent should be easily above our limit (1/4 s), since AA + * detection won't be expedited due to lack of held lock entry. + */ + return ret ?: (time > 1000000000 / 4 ? 0 : 1); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c new file mode 100644 index 000000000000..330682a88c16 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024-2025 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +struct arr_elem { + struct bpf_res_spin_lock lock; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct arr_elem); +} arrmap SEC(".maps"); + +long value; + +struct bpf_spin_lock lock __hidden SEC(".data.A"); +struct bpf_res_spin_lock res_lock __hidden SEC(".data.B"); + +SEC("?tc") +__failure __msg("point to map value or allocated object") +int res_spin_lock_arg(struct __sk_buff *ctx) +{ + struct arr_elem *elem; + + elem = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!elem) + return 0; + bpf_res_spin_lock((struct bpf_res_spin_lock *)bpf_core_cast(&elem->lock, struct __sk_buff)); + bpf_res_spin_lock(&elem->lock); + return 0; +} + +SEC("?tc") +__failure __msg("AA deadlock detected") +int res_spin_lock_AA(struct __sk_buff *ctx) +{ + struct arr_elem *elem; + + elem = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!elem) + return 0; + bpf_res_spin_lock(&elem->lock); + bpf_res_spin_lock(&elem->lock); + return 0; +} + +SEC("?tc") +__failure __msg("AA deadlock detected") +int res_spin_lock_cond_AA(struct __sk_buff *ctx) +{ + struct arr_elem *elem; + + elem = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!elem) + return 0; + if (bpf_res_spin_lock(&elem->lock)) + return 0; + bpf_res_spin_lock(&elem->lock); + return 0; +} + +SEC("?tc") +__failure __msg("unlock of different lock") +int res_spin_lock_mismatch_1(struct __sk_buff *ctx) +{ + struct arr_elem *elem; + + elem = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!elem) + return 0; + if (bpf_res_spin_lock(&elem->lock)) + return 0; + bpf_res_spin_unlock(&res_lock); + return 0; +} + +SEC("?tc") +__failure __msg("unlock of different lock") +int res_spin_lock_mismatch_2(struct __sk_buff *ctx) +{ + struct arr_elem *elem; + + elem = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!elem) + return 0; + if (bpf_res_spin_lock(&res_lock)) + return 0; + bpf_res_spin_unlock(&elem->lock); + return 0; +} + +SEC("?tc") +__failure __msg("unlock of different lock") +int res_spin_lock_irq_mismatch_1(struct __sk_buff *ctx) +{ + struct arr_elem *elem; + unsigned long f1; + + elem = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!elem) + return 0; + bpf_local_irq_save(&f1); + if (bpf_res_spin_lock(&res_lock)) + return 0; + bpf_res_spin_unlock_irqrestore(&res_lock, &f1); + return 0; +} + +SEC("?tc") +__failure __msg("unlock of different lock") +int res_spin_lock_irq_mismatch_2(struct __sk_buff *ctx) +{ + struct arr_elem *elem; + unsigned long f1; + + elem = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!elem) + return 0; + if (bpf_res_spin_lock_irqsave(&res_lock, &f1)) + return 0; + bpf_res_spin_unlock(&res_lock); + return 0; +} + +SEC("?tc") +__success +int res_spin_lock_ooo(struct __sk_buff *ctx) +{ + struct arr_elem *elem; + + elem = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!elem) + return 0; + if (bpf_res_spin_lock(&res_lock)) + return 0; + if (bpf_res_spin_lock(&elem->lock)) { + bpf_res_spin_unlock(&res_lock); + return 0; + } + bpf_res_spin_unlock(&elem->lock); + bpf_res_spin_unlock(&res_lock); + return 0; +} + +SEC("?tc") +__success +int res_spin_lock_ooo_irq(struct __sk_buff *ctx) +{ + struct arr_elem *elem; + unsigned long f1, f2; + + elem = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!elem) + return 0; + if (bpf_res_spin_lock_irqsave(&res_lock, &f1)) + return 0; + if (bpf_res_spin_lock_irqsave(&elem->lock, &f2)) { + bpf_res_spin_unlock_irqrestore(&res_lock, &f1); + /* We won't have a unreleased IRQ flag error here. */ + return 0; + } + bpf_res_spin_unlock_irqrestore(&elem->lock, &f2); + bpf_res_spin_unlock_irqrestore(&res_lock, &f1); + return 0; +} + +struct bpf_res_spin_lock lock1 __hidden SEC(".data.OO1"); +struct bpf_res_spin_lock lock2 __hidden SEC(".data.OO2"); + +SEC("?tc") +__failure __msg("bpf_res_spin_unlock cannot be out of order") +int res_spin_lock_ooo_unlock(struct __sk_buff *ctx) +{ + if (bpf_res_spin_lock(&lock1)) + return 0; + if (bpf_res_spin_lock(&lock2)) { + bpf_res_spin_unlock(&lock1); + return 0; + } + bpf_res_spin_unlock(&lock1); + bpf_res_spin_unlock(&lock2); + return 0; +} + +SEC("?tc") +__failure __msg("off 1 doesn't point to 'struct bpf_res_spin_lock' that is at 0") +int res_spin_lock_bad_off(struct __sk_buff *ctx) +{ + struct arr_elem *elem; + + elem = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!elem) + return 0; + bpf_res_spin_lock((void *)&elem->lock + 1); + return 0; +} + +SEC("?tc") +__failure __msg("R1 doesn't have constant offset. bpf_res_spin_lock has to be at the constant offset") +int res_spin_lock_var_off(struct __sk_buff *ctx) +{ + struct arr_elem *elem; + u64 val = value; + + elem = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!elem) { + // FIXME: Only inline assembly use in assert macro doesn't emit + // BTF definition. + bpf_throw(0); + return 0; + } + bpf_assert_range(val, 0, 40); + bpf_res_spin_lock((void *)&value + val); + return 0; +} + +SEC("?tc") +__failure __msg("map 'res_spin.bss' has no valid bpf_res_spin_lock") +int res_spin_lock_no_lock_map(struct __sk_buff *ctx) +{ + bpf_res_spin_lock((void *)&value + 1); + return 0; +} + +SEC("?tc") +__failure __msg("local 'kptr' has no valid bpf_res_spin_lock") +int res_spin_lock_no_lock_kptr(struct __sk_buff *ctx) +{ + struct { int i; } *p = bpf_obj_new(typeof(*p)); + + if (!p) + return 0; + bpf_res_spin_lock((void *)p); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/set_global_vars.c b/tools/testing/selftests/bpf/progs/set_global_vars.c new file mode 100644 index 000000000000..9adb5ba4cd4d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/set_global_vars.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include "bpf_experimental.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include <stdbool.h> + +char _license[] SEC("license") = "GPL"; + +enum Enum { EA1 = 0, EA2 = 11 }; +enum Enumu64 {EB1 = 0llu, EB2 = 12llu }; +enum Enums64 { EC1 = 0ll, EC2 = 13ll }; + +const volatile __s64 var_s64 = -1; +const volatile __u64 var_u64 = 0; +const volatile __s32 var_s32 = -1; +const volatile __u32 var_u32 = 0; +const volatile __s16 var_s16 = -1; +const volatile __u16 var_u16 = 0; +const volatile __s8 var_s8 = -1; +const volatile __u8 var_u8 = 0; +const volatile enum Enum var_ea = EA1; +const volatile enum Enumu64 var_eb = EB1; +const volatile enum Enums64 var_ec = EC1; +const volatile bool var_b = false; + +char arr[4] = {0}; + +SEC("socket") +int test_set_globals(void *ctx) +{ + volatile __s8 a; + + a = var_s64; + a = var_u64; + a = var_s32; + a = var_u32; + a = var_s16; + a = var_u16; + a = var_s8; + a = var_u8; + a = var_ea; + a = var_eb; + a = var_ec; + a = var_b; + return a; +} diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c index 6dd4318debbf..0107a24b7522 100644 --- a/tools/testing/selftests/bpf/progs/setget_sockopt.c +++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c @@ -61,6 +61,9 @@ static const struct sockopt_test sol_tcp_tests[] = { { .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, }, { .opt = TCP_BPF_SOCK_OPS_CB_FLAGS, .new = BPF_SOCK_OPS_ALL_CB_FLAGS, .expected = BPF_SOCK_OPS_ALL_CB_FLAGS, }, + { .opt = TCP_BPF_DELACK_MAX, .new = 30000, .expected = 30000, }, + { .opt = TCP_BPF_RTO_MIN, .new = 30000, .expected = 30000, }, + { .opt = TCP_RTO_MAX_MS, .new = 2000, .expected = 2000, }, { .opt = 0, }, }; diff --git a/tools/testing/selftests/bpf/progs/strncmp_bench.c b/tools/testing/selftests/bpf/progs/strncmp_bench.c index 18373a7df76e..f47bf88f8d2a 100644 --- a/tools/testing/selftests/bpf/progs/strncmp_bench.c +++ b/tools/testing/selftests/bpf/progs/strncmp_bench.c @@ -35,7 +35,10 @@ static __always_inline int local_strncmp(const char *s1, unsigned int sz, SEC("tp/syscalls/sys_enter_getpgid") int strncmp_no_helper(void *ctx) { - if (local_strncmp(str, cmp_str_len + 1, target) < 0) + const char *target_str = target; + + barrier_var(target_str); + if (local_strncmp(str, cmp_str_len + 1, target_str) < 0) __sync_add_and_fetch(&hits, 1); return 0; } diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c new file mode 100644 index 000000000000..36386b3c23a1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c @@ -0,0 +1,30 @@ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "../test_kmods/bpf_testmod.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +void bpf_task_release(struct task_struct *p) __ksym; + +/* This test struct_ops BPF programs returning referenced kptr. The verifier should + * allow a referenced kptr or a NULL pointer to be returned. A referenced kptr to task + * here is acquried automatically as the task argument is tagged with "__ref". + */ +SEC("struct_ops/test_return_ref_kptr") +struct task_struct *BPF_PROG(kptr_return, int dummy, + struct task_struct *task, struct cgroup *cgrp) +{ + if (dummy % 2) { + bpf_task_release(task); + return NULL; + } + return task; +} + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_kptr_return = { + .test_return_ref_kptr = (void *)kptr_return, +}; + + diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c new file mode 100644 index 000000000000..caeea158ef69 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c @@ -0,0 +1,26 @@ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "../test_kmods/bpf_testmod.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym; +void bpf_task_release(struct task_struct *p) __ksym; + +/* This test struct_ops BPF programs returning referenced kptr. The verifier should + * reject programs returning a non-zero scalar value. + */ +SEC("struct_ops/test_return_ref_kptr") +__failure __msg("At program exit the register R0 has smin=1 smax=1 should have been in [0, 0]") +struct task_struct *BPF_PROG(kptr_return_fail__invalid_scalar, int dummy, + struct task_struct *task, struct cgroup *cgrp) +{ + bpf_task_release(task); + return (struct task_struct *)1; +} + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_kptr_return = { + .test_return_ref_kptr = (void *)kptr_return_fail__invalid_scalar, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c new file mode 100644 index 000000000000..b8b4f05c3d7f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c @@ -0,0 +1,34 @@ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "../test_kmods/bpf_testmod.h" +#include "bpf_experimental.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym; +void bpf_task_release(struct task_struct *p) __ksym; + +/* This test struct_ops BPF programs returning referenced kptr. The verifier should + * reject programs returning a local kptr. + */ +SEC("struct_ops/test_return_ref_kptr") +__failure __msg("At program exit the register R0 is not a known value (ptr_or_null_)") +struct task_struct *BPF_PROG(kptr_return_fail__local_kptr, int dummy, + struct task_struct *task, struct cgroup *cgrp) +{ + struct task_struct *t; + + bpf_task_release(task); + + t = bpf_obj_new(typeof(*task)); + if (!t) + return NULL; + + return t; +} + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_kptr_return = { + .test_return_ref_kptr = (void *)kptr_return_fail__local_kptr, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c new file mode 100644 index 000000000000..7ddeb28c2329 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c @@ -0,0 +1,25 @@ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "../test_kmods/bpf_testmod.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym; +void bpf_task_release(struct task_struct *p) __ksym; + +/* This test struct_ops BPF programs returning referenced kptr. The verifier should + * reject programs returning a modified referenced kptr. + */ +SEC("struct_ops/test_return_ref_kptr") +__failure __msg("dereference of modified trusted_ptr_ ptr R0 off={{[0-9]+}} disallowed") +struct task_struct *BPF_PROG(kptr_return_fail__nonzero_offset, int dummy, + struct task_struct *task, struct cgroup *cgrp) +{ + return (struct task_struct *)&task->jobctl; +} + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_kptr_return = { + .test_return_ref_kptr = (void *)kptr_return_fail__nonzero_offset, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c new file mode 100644 index 000000000000..6a2dd5367802 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c @@ -0,0 +1,30 @@ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "../test_kmods/bpf_testmod.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym; +void bpf_task_release(struct task_struct *p) __ksym; + +/* This test struct_ops BPF programs returning referenced kptr. The verifier should + * reject programs returning a referenced kptr of the wrong type. + */ +SEC("struct_ops/test_return_ref_kptr") +__failure __msg("At program exit the register R0 is not a known value (ptr_or_null_)") +struct task_struct *BPF_PROG(kptr_return_fail__wrong_type, int dummy, + struct task_struct *task, struct cgroup *cgrp) +{ + struct task_struct *ret; + + ret = (struct task_struct *)bpf_cgroup_acquire(cgrp); + bpf_task_release(task); + + return ret; +} + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_kptr_return = { + .test_return_ref_kptr = (void *)kptr_return_fail__wrong_type, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c new file mode 100644 index 000000000000..76dcb6089d7f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c @@ -0,0 +1,31 @@ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "../test_kmods/bpf_testmod.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +__attribute__((nomerge)) extern void bpf_task_release(struct task_struct *p) __ksym; + +/* This is a test BPF program that uses struct_ops to access a referenced + * kptr argument. This is a test for the verifier to ensure that it + * 1) recongnizes the task as a referenced object (i.e., ref_obj_id > 0), and + * 2) the same reference can be acquired from multiple paths as long as it + * has not been released. + */ +SEC("struct_ops/test_refcounted") +int BPF_PROG(refcounted, int dummy, struct task_struct *task) +{ + if (dummy == 1) + bpf_task_release(task); + else + bpf_task_release(task); + return 0; +} + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_refcounted = { + .test_refcounted = (void *)refcounted, +}; + + diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c new file mode 100644 index 000000000000..ae074aa62852 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c @@ -0,0 +1,39 @@ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "../test_kmods/bpf_testmod.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +extern void bpf_task_release(struct task_struct *p) __ksym; + +__noinline int subprog_release(__u64 *ctx __arg_ctx) +{ + struct task_struct *task = (struct task_struct *)ctx[1]; + int dummy = (int)ctx[0]; + + bpf_task_release(task); + + return dummy + 1; +} + +/* Test that the verifier rejects a program that contains a global + * subprogram with referenced kptr arguments + */ +SEC("struct_ops/test_refcounted") +__failure __log_level(2) +__msg("Validating subprog_release() func#1...") +__msg("invalid bpf_context access off=8. Reference may already be released") +int refcounted_fail__global_subprog(unsigned long long *ctx) +{ + struct task_struct *task = (struct task_struct *)ctx[1]; + + bpf_task_release(task); + + return subprog_release(ctx); +} + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_ref_acquire = { + .test_refcounted = (void *)refcounted_fail__global_subprog, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c new file mode 100644 index 000000000000..e945b1a04294 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c @@ -0,0 +1,22 @@ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "../test_kmods/bpf_testmod.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +/* Test that the verifier rejects a program that acquires a referenced + * kptr through context without releasing the reference + */ +SEC("struct_ops/test_refcounted") +__failure __msg("Unreleased reference id=1 alloc_insn=0") +int BPF_PROG(refcounted_fail__ref_leak, int dummy, + struct task_struct *task) +{ + return 0; +} + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_ref_acquire = { + .test_refcounted = (void *)refcounted_fail__ref_leak, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c new file mode 100644 index 000000000000..3b125025a1f2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "../test_kmods/bpf_testmod.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} prog_array SEC(".maps"); + +/* Test that the verifier rejects a program with referenced kptr arguments + * that tail call + */ +SEC("struct_ops/test_refcounted") +__failure __msg("program with __ref argument cannot tail call") +int refcounted_fail__tail_call(unsigned long long *ctx) +{ + struct task_struct *task = (struct task_struct *)ctx[1]; + + bpf_task_release(task); + bpf_tail_call(ctx, &prog_array, 0); + + return 0; +} + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_ref_acquire = { + .test_refcounted = (void *)refcounted_fail__tail_call, +}; + diff --git a/tools/testing/selftests/bpf/progs/summarization.c b/tools/testing/selftests/bpf/progs/summarization.c new file mode 100644 index 000000000000..f89effe82c9e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/summarization.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +__noinline +long changes_pkt_data(struct __sk_buff *sk) +{ + return bpf_skb_pull_data(sk, 0); +} + +__noinline __weak +long does_not_change_pkt_data(struct __sk_buff *sk) +{ + return 0; +} + +SEC("?tc") +int main_changes_with_subprogs(struct __sk_buff *sk) +{ + changes_pkt_data(sk); + does_not_change_pkt_data(sk); + return 0; +} + +SEC("?tc") +int main_changes(struct __sk_buff *sk) +{ + bpf_skb_pull_data(sk, 0); + return 0; +} + +SEC("?tc") +int main_does_not_change(struct __sk_buff *sk) +{ + return 0; +} + +__noinline +long might_sleep(struct pt_regs *ctx __arg_ctx) +{ + int i; + + bpf_copy_from_user(&i, sizeof(i), NULL); + return i; +} + +__noinline __weak +long does_not_sleep(struct pt_regs *ctx __arg_ctx) +{ + return 0; +} + +SEC("?uprobe.s") +int main_might_sleep_with_subprogs(struct pt_regs *ctx) +{ + might_sleep(ctx); + does_not_sleep(ctx); + return 0; +} + +SEC("?uprobe.s") +int main_might_sleep(struct pt_regs *ctx) +{ + int i; + + bpf_copy_from_user(&i, sizeof(i), NULL); + return i; +} + +SEC("?uprobe.s") +int main_does_not_sleep(struct pt_regs *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c b/tools/testing/selftests/bpf/progs/summarization_freplace.c index f9a622705f1b..935f00e0e9ea 100644 --- a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c +++ b/tools/testing/selftests/bpf/progs/summarization_freplace.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/bpf.h> +#include <vmlinux.h> #include <bpf/bpf_helpers.h> SEC("?freplace") @@ -15,4 +15,19 @@ long does_not_change_pkt_data(struct __sk_buff *sk) return 0; } +SEC("?freplace") +long might_sleep(struct pt_regs *ctx) +{ + int i; + + bpf_copy_from_user(&i, sizeof(i), NULL); + return i; +} + +SEC("?freplace") +long does_not_sleep(struct pt_regs *ctx) +{ + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c index 44628865fe1d..4fee0fdc7607 100644 --- a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c +++ b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c @@ -51,13 +51,13 @@ out: } SEC("lsm/bpf") -int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size) +int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) { return bpf_link_create_verify(cmd); } SEC("lsm.s/bpf") -int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size) +int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) { return bpf_link_create_verify(cmd); } diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c index 51b3f79df523..448403634eea 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c @@ -15,6 +15,7 @@ struct { struct core_reloc_arrays_output { int a2; + int a3; char b123; int c1c; int d00d; @@ -41,6 +42,7 @@ int test_core_arrays(void *ctx) { struct core_reloc_arrays *in = (void *)&data.in; struct core_reloc_arrays_output *out = (void *)&data.out; + int *a; if (CORE_READ(&out->a2, &in->a[2])) return 1; @@ -53,6 +55,9 @@ int test_core_arrays(void *ctx) if (CORE_READ(&out->f01c, &in->f[0][1].c)) return 1; + a = __builtin_preserve_access_index(({ in->a; })); + out->a3 = a[0] + a[1] + a[2] + a[3]; + return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_get_xattr.c b/tools/testing/selftests/bpf/progs/test_get_xattr.c index 66e737720f7c..54305f4c9f2d 100644 --- a/tools/testing/selftests/bpf/progs/test_get_xattr.c +++ b/tools/testing/selftests/bpf/progs/test_get_xattr.c @@ -6,6 +6,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include "bpf_kfuncs.h" +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; @@ -17,12 +18,23 @@ static const char expected_value[] = "hello"; char value1[32]; char value2[32]; +/* Matches caller of test_get_xattr() in prog_tests/fs_kfuncs.c */ +static const char xattr_names[][64] = { + /* The following work. */ + "user.kfuncs", + "security.bpf.xxx", + + /* The following do not work. */ + "security.bpf", + "security.selinux" +}; + SEC("lsm.s/file_open") int BPF_PROG(test_file_open, struct file *f) { struct bpf_dynptr value_ptr; __u32 pid; - int ret; + int ret, i; pid = bpf_get_current_pid_tgid() >> 32; if (pid != monitored_pid) @@ -30,7 +42,11 @@ int BPF_PROG(test_file_open, struct file *f) bpf_dynptr_from_mem(value1, sizeof(value1), 0, &value_ptr); - ret = bpf_get_file_xattr(f, "user.kfuncs", &value_ptr); + for (i = 0; i < ARRAY_SIZE(xattr_names); i++) { + ret = bpf_get_file_xattr(f, xattr_names[i], &value_ptr); + if (ret == sizeof(expected_value)) + break; + } if (ret != sizeof(expected_value)) return 0; if (bpf_strncmp(value1, ret, expected_value)) @@ -44,7 +60,7 @@ int BPF_PROG(test_inode_getxattr, struct dentry *dentry, char *name) { struct bpf_dynptr value_ptr; __u32 pid; - int ret; + int ret, i; pid = bpf_get_current_pid_tgid() >> 32; if (pid != monitored_pid) @@ -52,7 +68,11 @@ int BPF_PROG(test_inode_getxattr, struct dentry *dentry, char *name) bpf_dynptr_from_mem(value2, sizeof(value2), 0, &value_ptr); - ret = bpf_get_dentry_xattr(dentry, "user.kfuncs", &value_ptr); + for (i = 0; i < ARRAY_SIZE(xattr_names); i++) { + ret = bpf_get_dentry_xattr(dentry, xattr_names[i], &value_ptr); + if (ret == sizeof(expected_value)) + break; + } if (ret != sizeof(expected_value)) return 0; if (bpf_strncmp(value2, ret, expected_value)) diff --git a/tools/testing/selftests/bpf/progs/test_kernel_flag.c b/tools/testing/selftests/bpf/progs/test_kernel_flag.c new file mode 100644 index 000000000000..b45fab3be352 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_kernel_flag.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (C) 2025 Microsoft Corporation + * + * Author: Blaise Boscaccy <bboscaccy@linux.microsoft.com> + */ + +#include "vmlinux.h" +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +__u32 monitored_tid; + +SEC("lsm.s/bpf") +int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) +{ + __u32 tid; + + tid = bpf_get_current_pid_tgid() & 0xFFFFFFFF; + if (!kernel || tid != monitored_tid) + return 0; + else + return -EINVAL; +} diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c index cd4d752bd089..061befb004c2 100644 --- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c @@ -36,7 +36,7 @@ char _license[] SEC("license") = "GPL"; SEC("?lsm.s/bpf") __failure __msg("cannot pass in dynptr at an offset=-8") -int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size) +int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) { unsigned long val; @@ -46,7 +46,7 @@ int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size) SEC("?lsm.s/bpf") __failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr") -int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size) +int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) { unsigned long val = 0; @@ -55,7 +55,7 @@ int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size) } SEC("lsm.s/bpf") -int BPF_PROG(dynptr_data_null, int cmd, union bpf_attr *attr, unsigned int size) +int BPF_PROG(dynptr_data_null, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) { struct bpf_key *trusted_keyring; struct bpf_dynptr ptr; diff --git a/tools/testing/selftests/bpf/progs/test_lookup_key.c b/tools/testing/selftests/bpf/progs/test_lookup_key.c index c73776990ae3..cdbbb12f1491 100644 --- a/tools/testing/selftests/bpf/progs/test_lookup_key.c +++ b/tools/testing/selftests/bpf/progs/test_lookup_key.c @@ -23,7 +23,7 @@ extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; extern void bpf_key_put(struct bpf_key *key) __ksym; SEC("lsm.s/bpf") -int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size) +int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) { struct bpf_key *bkey; __u32 pid; diff --git a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c index 2fdc44e76624..89b0cd5a3e06 100644 --- a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c +++ b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c @@ -7,7 +7,7 @@ char tp_name[128]; SEC("lsm.s/bpf") -int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size) +int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) { switch (cmd) { case BPF_RAW_TRACEPOINT_OPEN: diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c index 5eb25c6ad75b..a5be3267dbb0 100644 --- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018 Facebook */ -#include <stdlib.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> diff --git a/tools/testing/selftests/bpf/progs/test_set_remove_xattr.c b/tools/testing/selftests/bpf/progs/test_set_remove_xattr.c new file mode 100644 index 000000000000..6a612cf168d3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_set_remove_xattr.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <errno.h> +#include <bpf/bpf_tracing.h> +#include "bpf_kfuncs.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +__u32 monitored_pid; + +const char xattr_foo[] = "security.bpf.foo"; +const char xattr_bar[] = "security.bpf.bar"; +static const char xattr_selinux[] = "security.selinux"; +char value_bar[] = "world"; +char read_value[32]; + +bool set_security_bpf_bar_success; +bool remove_security_bpf_bar_success; +bool set_security_selinux_fail; +bool remove_security_selinux_fail; + +char name_buf[32]; + +static inline bool name_match_foo(const char *name) +{ + bpf_probe_read_kernel(name_buf, sizeof(name_buf), name); + + return !bpf_strncmp(name_buf, sizeof(xattr_foo), xattr_foo); +} + +/* Test bpf_set_dentry_xattr and bpf_remove_dentry_xattr */ +SEC("lsm.s/inode_getxattr") +int BPF_PROG(test_inode_getxattr, struct dentry *dentry, char *name) +{ + struct bpf_dynptr value_ptr; + __u32 pid; + int ret; + + pid = bpf_get_current_pid_tgid() >> 32; + if (pid != monitored_pid) + return 0; + + /* Only do the following for security.bpf.foo */ + if (!name_match_foo(name)) + return 0; + + bpf_dynptr_from_mem(read_value, sizeof(read_value), 0, &value_ptr); + + /* read security.bpf.bar */ + ret = bpf_get_dentry_xattr(dentry, xattr_bar, &value_ptr); + + if (ret < 0) { + /* If security.bpf.bar doesn't exist, set it */ + bpf_dynptr_from_mem(value_bar, sizeof(value_bar), 0, &value_ptr); + + ret = bpf_set_dentry_xattr(dentry, xattr_bar, &value_ptr, 0); + if (!ret) + set_security_bpf_bar_success = true; + ret = bpf_set_dentry_xattr(dentry, xattr_selinux, &value_ptr, 0); + if (ret) + set_security_selinux_fail = true; + } else { + /* If security.bpf.bar exists, remove it */ + ret = bpf_remove_dentry_xattr(dentry, xattr_bar); + if (!ret) + remove_security_bpf_bar_success = true; + + ret = bpf_remove_dentry_xattr(dentry, xattr_selinux); + if (ret) + remove_security_selinux_fail = true; + } + + return 0; +} + +bool locked_set_security_bpf_bar_success; +bool locked_remove_security_bpf_bar_success; +bool locked_set_security_selinux_fail; +bool locked_remove_security_selinux_fail; + +/* Test bpf_set_dentry_xattr_locked and bpf_remove_dentry_xattr_locked. + * It not necessary to differentiate the _locked version and the + * not-_locked version in the BPF program. The verifier will fix them up + * properly. + */ +SEC("lsm.s/inode_setxattr") +int BPF_PROG(test_inode_setxattr, struct mnt_idmap *idmap, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + struct bpf_dynptr value_ptr; + __u32 pid; + int ret; + + pid = bpf_get_current_pid_tgid() >> 32; + if (pid != monitored_pid) + return 0; + + /* Only do the following for security.bpf.foo */ + if (!name_match_foo(name)) + return 0; + + bpf_dynptr_from_mem(read_value, sizeof(read_value), 0, &value_ptr); + + /* read security.bpf.bar */ + ret = bpf_get_dentry_xattr(dentry, xattr_bar, &value_ptr); + + if (ret < 0) { + /* If security.bpf.bar doesn't exist, set it */ + bpf_dynptr_from_mem(value_bar, sizeof(value_bar), 0, &value_ptr); + + ret = bpf_set_dentry_xattr(dentry, xattr_bar, &value_ptr, 0); + if (!ret) + locked_set_security_bpf_bar_success = true; + ret = bpf_set_dentry_xattr(dentry, xattr_selinux, &value_ptr, 0); + if (ret) + locked_set_security_selinux_fail = true; + } else { + /* If security.bpf.bar exists, remove it */ + ret = bpf_remove_dentry_xattr(dentry, xattr_bar); + if (!ret) + locked_remove_security_bpf_bar_success = true; + + ret = bpf_remove_dentry_xattr(dentry, xattr_selinux); + if (ret) + locked_remove_security_selinux_fail = true; + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c index 1c8b678e2e9a..f678ee6bd7ea 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c @@ -245,4 +245,73 @@ int lock_global_subprog_call2(struct __sk_buff *ctx) return ret; } +int __noinline +global_subprog_int(int i) +{ + if (i) + bpf_printk("%p", &i); + return i; +} + +int __noinline +global_sleepable_helper_subprog(int i) +{ + if (i) + bpf_copy_from_user(&i, sizeof(i), NULL); + return i; +} + +int __noinline +global_sleepable_kfunc_subprog(int i) +{ + if (i) + bpf_copy_from_user_str(&i, sizeof(i), NULL, 0); + global_subprog_int(i); + return i; +} + +int __noinline +global_subprog_calling_sleepable_global(int i) +{ + if (!i) + global_sleepable_kfunc_subprog(i); + return i; +} + +SEC("?syscall") +int lock_global_sleepable_helper_subprog(struct __sk_buff *ctx) +{ + int ret = 0; + + bpf_spin_lock(&lockA); + if (ctx->mark == 42) + ret = global_sleepable_helper_subprog(ctx->mark); + bpf_spin_unlock(&lockA); + return ret; +} + +SEC("?syscall") +int lock_global_sleepable_kfunc_subprog(struct __sk_buff *ctx) +{ + int ret = 0; + + bpf_spin_lock(&lockA); + if (ctx->mark == 42) + ret = global_sleepable_kfunc_subprog(ctx->mark); + bpf_spin_unlock(&lockA); + return ret; +} + +SEC("?syscall") +int lock_global_sleepable_subprog_indirect(struct __sk_buff *ctx) +{ + int ret = 0; + + bpf_spin_lock(&lockA); + if (ctx->mark == 42) + ret = global_subprog_calling_sleepable_global(ctx->mark); + bpf_spin_unlock(&lockA); + return ret; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c index 7e750309ce27..0b74b8bd22e8 100644 --- a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c +++ b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c @@ -49,7 +49,7 @@ out: } SEC("lsm.s/bpf") -int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size) +int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) { struct cgroup *cgrp = NULL; struct task_struct *task; diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c index 505aab9a5234..096488f47fbc 100644 --- a/tools/testing/selftests/bpf/progs/test_usdt.c +++ b/tools/testing/selftests/bpf/progs/test_usdt.c @@ -11,6 +11,7 @@ int usdt0_called; u64 usdt0_cookie; int usdt0_arg_cnt; int usdt0_arg_ret; +int usdt0_arg_size; SEC("usdt") int usdt0(struct pt_regs *ctx) @@ -26,6 +27,7 @@ int usdt0(struct pt_regs *ctx) usdt0_arg_cnt = bpf_usdt_arg_cnt(ctx); /* should return -ENOENT for any arg_num */ usdt0_arg_ret = bpf_usdt_arg(ctx, bpf_get_prandom_u32(), &tmp); + usdt0_arg_size = bpf_usdt_arg_size(ctx, bpf_get_prandom_u32()); return 0; } @@ -34,6 +36,7 @@ u64 usdt3_cookie; int usdt3_arg_cnt; int usdt3_arg_rets[3]; u64 usdt3_args[3]; +int usdt3_arg_sizes[3]; SEC("usdt//proc/self/exe:test:usdt3") int usdt3(struct pt_regs *ctx) @@ -50,12 +53,15 @@ int usdt3(struct pt_regs *ctx) usdt3_arg_rets[0] = bpf_usdt_arg(ctx, 0, &tmp); usdt3_args[0] = (int)tmp; + usdt3_arg_sizes[0] = bpf_usdt_arg_size(ctx, 0); usdt3_arg_rets[1] = bpf_usdt_arg(ctx, 1, &tmp); usdt3_args[1] = (long)tmp; + usdt3_arg_sizes[1] = bpf_usdt_arg_size(ctx, 1); usdt3_arg_rets[2] = bpf_usdt_arg(ctx, 2, &tmp); usdt3_args[2] = (uintptr_t)tmp; + usdt3_arg_sizes[2] = bpf_usdt_arg_size(ctx, 2); return 0; } @@ -64,12 +70,15 @@ int usdt12_called; u64 usdt12_cookie; int usdt12_arg_cnt; u64 usdt12_args[12]; +int usdt12_arg_sizes[12]; SEC("usdt//proc/self/exe:test:usdt12") int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5, long a6, __u64 a7, uintptr_t a8, int a9, short a10, short a11, signed char a12) { + int i; + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) return 0; @@ -90,6 +99,11 @@ int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5, usdt12_args[9] = a10; usdt12_args[10] = a11; usdt12_args[11] = a12; + + bpf_for(i, 0, 12) { + usdt12_arg_sizes[i] = bpf_usdt_arg_size(ctx, i); + } + return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c index 12034a73ee2d..e96d09e11115 100644 --- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c +++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c @@ -37,7 +37,7 @@ struct { char _license[] SEC("license") = "GPL"; SEC("lsm.s/bpf") -int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size) +int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) { struct bpf_dynptr data_ptr, sig_ptr; struct data *data_val; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index fe2d71ae0e71..fcf6ca14f2ea 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -4,37 +4,50 @@ #include <bpf/bpf_helpers.h> -#define __round_mask(x, y) ((__typeof__(x))((y) - 1)) -#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1) +#define META_SIZE 32 + #define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem +/* Demonstrates how metadata can be passed from an XDP program to a TC program + * using bpf_xdp_adjust_meta. + * For the sake of testing the metadata support in drivers, the XDP program uses + * a fixed-size payload after the Ethernet header as metadata. The TC program + * copies the metadata it receives into a map so it can be checked from + * userspace. + */ + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __uint(value_size, META_SIZE); +} test_result SEC(".maps"); + SEC("tc") int ing_cls(struct __sk_buff *ctx) { - __u8 *data, *data_meta, *data_end; - __u32 diff = 0; + __u8 *data, *data_meta; + __u32 key = 0; data_meta = ctx_ptr(ctx, data_meta); - data_end = ctx_ptr(ctx, data_end); data = ctx_ptr(ctx, data); - if (data + ETH_ALEN > data_end || - data_meta + round_up(ETH_ALEN, 4) > data) + if (data_meta + META_SIZE > data) return TC_ACT_SHOT; - diff |= ((__u32 *)data_meta)[0] ^ ((__u32 *)data)[0]; - diff |= ((__u16 *)data_meta)[2] ^ ((__u16 *)data)[2]; + bpf_map_update_elem(&test_result, &key, data_meta, BPF_ANY); - return diff ? TC_ACT_SHOT : TC_ACT_OK; + return TC_ACT_SHOT; } SEC("xdp") int ing_xdp(struct xdp_md *ctx) { - __u8 *data, *data_meta, *data_end; + __u8 *data, *data_meta, *data_end, *payload; + struct ethhdr *eth; int ret; - ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, 4)); + ret = bpf_xdp_adjust_meta(ctx, -META_SIZE); if (ret < 0) return XDP_DROP; @@ -42,11 +55,21 @@ int ing_xdp(struct xdp_md *ctx) data_end = ctx_ptr(ctx, data_end); data = ctx_ptr(ctx, data); - if (data + ETH_ALEN > data_end || - data_meta + round_up(ETH_ALEN, 4) > data) + eth = (struct ethhdr *)data; + payload = data + sizeof(struct ethhdr); + + if (payload + META_SIZE > data_end || + data_meta + META_SIZE > data) + return XDP_DROP; + + /* The Linux networking stack may send other packets on the test + * interface that interfere with the test. Just drop them. + * The test packets can be recognized by their ethertype of zero. + */ + if (eth->h_proto != 0) return XDP_DROP; - __builtin_memcpy(data_meta, data, ETH_ALEN); + __builtin_memcpy(data_meta, payload, META_SIZE); return XDP_PASS; } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c index a7588302268d..a80cc5f2f4f2 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c @@ -102,8 +102,8 @@ bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt) #define TESTVLAN 4011 /* 0xFAB */ // #define TO_VLAN 4000 /* 0xFA0 (hint 0xOA0 = 160) */ -SEC("xdp_drop_vlan_4011") -int xdp_prognum0(struct xdp_md *ctx) +SEC("xdp") +int xdp_drop_vlan_4011(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; @@ -144,8 +144,8 @@ Load prog with ip tool: /* Changing VLAN to zero, have same practical effect as removing the VLAN. */ #define TO_VLAN 0 -SEC("xdp_vlan_change") -int xdp_prognum1(struct xdp_md *ctx) +SEC("xdp") +int xdp_vlan_change(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; @@ -178,8 +178,8 @@ int xdp_prognum1(struct xdp_md *ctx) #endif #define VLAN_HDR_SZ 4 /* bytes */ -SEC("xdp_vlan_remove_outer") -int xdp_prognum2(struct xdp_md *ctx) +SEC("xdp") +int xdp_vlan_remove_outer(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; @@ -224,8 +224,8 @@ void shift_mac_4bytes_32bit(void *data) p[1] = p[0]; } -SEC("xdp_vlan_remove_outer2") -int xdp_prognum3(struct xdp_md *ctx) +SEC("xdp") +int xdp_vlan_remove_outer2(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; @@ -254,8 +254,8 @@ int xdp_prognum3(struct xdp_md *ctx) * The TC-clsact eBPF programs (currently) need to be attach via TC commands */ -SEC("tc_vlan_push") -int _tc_progA(struct __sk_buff *ctx) +SEC("tc") +int tc_vlan_push(struct __sk_buff *ctx) { bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN); diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c index 5094c288cfd7..a9be6ae49454 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c +++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c @@ -620,23 +620,61 @@ __naked void helper_call_does_not_prevent_bpf_fastcall(void) SEC("raw_tp") __arch_x86_64 +__log_level(4) __msg("stack depth 24") +/* may_goto counter at -24 */ +__xlated("0: *(u64 *)(r10 -24) =") +/* may_goto timestamp at -16 */ +__xlated("1: *(u64 *)(r10 -16) =") +__xlated("2: r1 = 1") +__xlated("...") +__xlated("4: r0 = &(void __percpu *)(r0)") +__xlated("...") +/* may_goto expansion starts */ +__xlated("6: r11 = *(u64 *)(r10 -24)") +__xlated("7: if r11 == 0x0 goto pc+6") +__xlated("8: r11 -= 1") +__xlated("9: if r11 != 0x0 goto pc+2") +__xlated("10: r11 = -24") +__xlated("11: call unknown") +__xlated("12: *(u64 *)(r10 -24) = r11") +/* may_goto expansion ends */ +__xlated("13: *(u64 *)(r10 -8) = r1") +__xlated("14: exit") +__success +__naked void may_goto_interaction_x86_64(void) +{ + asm volatile ( + "r1 = 1;" + "*(u64 *)(r10 - 16) = r1;" + "call %[bpf_get_smp_processor_id];" + "r1 = *(u64 *)(r10 - 16);" + ".8byte %[may_goto];" + /* just touch some stack at -8 */ + "*(u64 *)(r10 - 8) = r1;" + "exit;" + : + : __imm(bpf_get_smp_processor_id), + __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, +1 /* offset */, 0)) + : __clobber_all); +} + +SEC("raw_tp") +__arch_arm64 __log_level(4) __msg("stack depth 16") /* may_goto counter at -16 */ __xlated("0: *(u64 *)(r10 -16) =") __xlated("1: r1 = 1") -__xlated("...") -__xlated("3: r0 = &(void __percpu *)(r0)") -__xlated("...") +__xlated("2: call bpf_get_smp_processor_id") /* may_goto expansion starts */ -__xlated("5: r11 = *(u64 *)(r10 -16)") -__xlated("6: if r11 == 0x0 goto pc+3") -__xlated("7: r11 -= 1") -__xlated("8: *(u64 *)(r10 -16) = r11") +__xlated("3: r11 = *(u64 *)(r10 -16)") +__xlated("4: if r11 == 0x0 goto pc+3") +__xlated("5: r11 -= 1") +__xlated("6: *(u64 *)(r10 -16) = r11") /* may_goto expansion ends */ -__xlated("9: *(u64 *)(r10 -8) = r1") -__xlated("10: exit") +__xlated("7: *(u64 *)(r10 -8) = r1") +__xlated("8: exit") __success -__naked void may_goto_interaction(void) +__naked void may_goto_interaction_arm64(void) { asm volatile ( "r1 = 1;" diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c index 05a329ee45ee..d5d8f24df394 100644 --- a/tools/testing/selftests/bpf/progs/verifier_gotol.c +++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c @@ -4,11 +4,7 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ - defined(__TARGET_ARCH_loongarch)) && \ - __clang_major__ >= 18 +#ifdef CAN_USE_GOTOL SEC("socket") __description("gotol, small_imm") diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c index e54bb5385bc1..75dd922e4e9f 100644 --- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -407,11 +407,7 @@ l0_%=: call %[bpf_jiffies64]; \ : __clobber_all); } -#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ - defined(__TARGET_ARCH_loongarch)) && \ - __clang_major__ >= 18 +#ifdef CAN_USE_GOTOL SEC("socket") __success __retval(0) __naked void gotol_and_may_goto(void) diff --git a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c new file mode 100644 index 000000000000..77698d5a19e4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Google LLC. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "../../../include/linux/filter.h" +#include "bpf_misc.h" + +#ifdef CAN_USE_LOAD_ACQ_STORE_REL + +SEC("socket") +__description("load-acquire, 8-bit") +__success __success_unpriv __retval(0x12) +__naked void load_acquire_8(void) +{ + asm volatile ( + "w1 = 0x12;" + "*(u8 *)(r10 - 1) = w1;" + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r10 - 1)); + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -1)) + : __clobber_all); +} + +SEC("socket") +__description("load-acquire, 16-bit") +__success __success_unpriv __retval(0x1234) +__naked void load_acquire_16(void) +{ + asm volatile ( + "w1 = 0x1234;" + "*(u16 *)(r10 - 2) = w1;" + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u16 *)(r10 - 2)); + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -2)) + : __clobber_all); +} + +SEC("socket") +__description("load-acquire, 32-bit") +__success __success_unpriv __retval(0x12345678) +__naked void load_acquire_32(void) +{ + asm volatile ( + "w1 = 0x12345678;" + "*(u32 *)(r10 - 4) = w1;" + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 4)); + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -4)) + : __clobber_all); +} + +SEC("socket") +__description("load-acquire, 64-bit") +__success __success_unpriv __retval(0x1234567890abcdef) +__naked void load_acquire_64(void) +{ + asm volatile ( + "r1 = 0x1234567890abcdef ll;" + "*(u64 *)(r10 - 8) = r1;" + ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r10 - 8)); + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -8)) + : __clobber_all); +} + +SEC("socket") +__description("load-acquire with uninitialized src_reg") +__failure __failure_unpriv __msg("R2 !read_ok") +__naked void load_acquire_with_uninitialized_src_reg(void) +{ + asm volatile ( + ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r2 + 0)); + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0)) + : __clobber_all); +} + +SEC("socket") +__description("load-acquire with non-pointer src_reg") +__failure __failure_unpriv __msg("R1 invalid mem access 'scalar'") +__naked void load_acquire_with_non_pointer_src_reg(void) +{ + asm volatile ( + "r1 = 0;" + ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r1 + 0)); + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_1, 0)) + : __clobber_all); +} + +SEC("socket") +__description("misaligned load-acquire") +__failure __failure_unpriv __msg("misaligned stack access off") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void load_acquire_misaligned(void) +{ + asm volatile ( + "r1 = 0;" + "*(u64 *)(r10 - 8) = r1;" + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 5)); + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -5)) + : __clobber_all); +} + +SEC("socket") +__description("load-acquire from ctx pointer") +__failure __failure_unpriv __msg("BPF_ATOMIC loads from R1 ctx is not allowed") +__naked void load_acquire_from_ctx_pointer(void) +{ + asm volatile ( + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r1 + 0)); + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_1, 0)) + : __clobber_all); +} + +SEC("xdp") +__description("load-acquire from pkt pointer") +__failure __msg("BPF_ATOMIC loads from R2 pkt is not allowed") +__naked void load_acquire_from_pkt_pointer(void) +{ + asm volatile ( + "r2 = *(u32 *)(r1 + %[xdp_md_data]);" + "r3 = *(u32 *)(r1 + %[xdp_md_data_end]);" + "r1 = r2;" + "r1 += 8;" + "if r1 >= r3 goto l0_%=;" + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r2 + 0)); +"l0_%=: r0 = 0;" + "exit;" + : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)), + __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0)) + : __clobber_all); +} + +SEC("flow_dissector") +__description("load-acquire from flow_keys pointer") +__failure __msg("BPF_ATOMIC loads from R2 flow_keys is not allowed") +__naked void load_acquire_from_flow_keys_pointer(void) +{ + asm volatile ( + "r2 = *(u64 *)(r1 + %[__sk_buff_flow_keys]);" + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r2 + 0)); + "exit;" + : + : __imm_const(__sk_buff_flow_keys, + offsetof(struct __sk_buff, flow_keys)), + __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0)) + : __clobber_all); +} + +SEC("sk_reuseport") +__description("load-acquire from sock pointer") +__failure __msg("BPF_ATOMIC loads from R2 sock is not allowed") +__naked void load_acquire_from_sock_pointer(void) +{ + asm volatile ( + "r2 = *(u64 *)(r1 + %[sk_reuseport_md_sk]);" + // w0 = load_acquire((u8 *)(r2 + offsetof(struct bpf_sock, family))); + ".8byte %[load_acquire_insn];" + "exit;" + : + : __imm_const(sk_reuseport_md_sk, offsetof(struct sk_reuseport_md, sk)), + __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, + offsetof(struct bpf_sock, family))) + : __clobber_all); +} + +SEC("socket") +__description("load-acquire with invalid register R15") +__failure __failure_unpriv __msg("R15 is invalid") +__naked void load_acquire_with_invalid_reg(void) +{ + asm volatile ( + ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r15 + 0)); + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, 15 /* invalid reg */, 0)) + : __clobber_all); +} + +#else /* CAN_USE_LOAD_ACQ_STORE_REL */ + +SEC("socket") +__description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support load-acquire, use a dummy test") +__success +int dummy_test(void) +{ + return 0; +} + +#endif /* CAN_USE_LOAD_ACQ_STORE_REL */ + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c index e81097c96fe2..3966d827f288 100644 --- a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c +++ b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c @@ -69,8 +69,38 @@ __naked void may_goto_batch_1(void) } SEC("raw_tp") -__description("may_goto batch with offsets 2/0") +__description("may_goto batch with offsets 2/0 - x86_64") __arch_x86_64 +__xlated("0: *(u64 *)(r10 -16) = 65535") +__xlated("1: *(u64 *)(r10 -8) = 0") +__xlated("2: r11 = *(u64 *)(r10 -16)") +__xlated("3: if r11 == 0x0 goto pc+6") +__xlated("4: r11 -= 1") +__xlated("5: if r11 != 0x0 goto pc+2") +__xlated("6: r11 = -16") +__xlated("7: call unknown") +__xlated("8: *(u64 *)(r10 -16) = r11") +__xlated("9: r0 = 1") +__xlated("10: r0 = 2") +__xlated("11: exit") +__success +__naked void may_goto_batch_2_x86_64(void) +{ + asm volatile ( + ".8byte %[may_goto1];" + ".8byte %[may_goto3];" + "r0 = 1;" + "r0 = 2;" + "exit;" + : + : __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 2 /* offset */, 0)), + __imm_insn(may_goto3, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0)) + : __clobber_all); +} + +SEC("raw_tp") +__description("may_goto batch with offsets 2/0 - arm64") +__arch_arm64 __xlated("0: *(u64 *)(r10 -8) = 8388608") __xlated("1: r11 = *(u64 *)(r10 -8)") __xlated("2: if r11 == 0x0 goto pc+3") @@ -80,7 +110,7 @@ __xlated("5: r0 = 1") __xlated("6: r0 = 2") __xlated("7: exit") __success -__naked void may_goto_batch_2(void) +__naked void may_goto_batch_2_arm64(void) { asm volatile ( ".8byte %[may_goto1];" diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c index 6b564d4c0986..6662d4b39969 100644 --- a/tools/testing/selftests/bpf/progs/verifier_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c @@ -2,6 +2,7 @@ /* Copyright (C) 2023 SUSE LLC */ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "../../../include/linux/filter.h" #include "bpf_misc.h" SEC("?raw_tp") @@ -90,6 +91,54 @@ __naked int bpf_end_bswap(void) ::: __clobber_all); } +#if defined(ENABLE_ATOMICS_TESTS) && \ + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r3 = r10") +__msg("mark_precise: frame0: regs=r2 stack= before 2: (db) r2 = load_acquire((u64 *)(r10 -8))") +__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1") +__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8") +__naked int bpf_load_acquire(void) +{ + asm volatile ( + "r1 = 8;" + "*(u64 *)(r10 - 8) = r1;" + ".8byte %[load_acquire_insn];" /* r2 = load_acquire((u64 *)(r10 - 8)); */ + "r3 = r10;" + "r3 += r2;" /* mark_precise */ + "r0 = 0;" + "exit;" + : + : __imm_insn(load_acquire_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -8)) + : __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r2 = r10") +__msg("mark_precise: frame0: regs=r1 stack= before 2: (79) r1 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-8 before 1: (db) store_release((u64 *)(r10 -8), r1)") +__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8") +__naked int bpf_store_release(void) +{ + asm volatile ( + "r1 = 8;" + ".8byte %[store_release_insn];" /* store_release((u64 *)(r10 - 8), r1); */ + "r1 = *(u64 *)(r10 - 8);" + "r2 = r10;" + "r2 += r1;" /* mark_precise */ + "r0 = 0;" + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8)) + : __clobber_all); +} + +#endif /* load-acquire, store-release */ #endif /* v4 instruction */ SEC("?raw_tp") diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c index 417c61cd4b19..24aabc6083fd 100644 --- a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c +++ b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c @@ -481,4 +481,56 @@ l1_%=: r0 = 42; \ : __clobber_all); } +SEC("socket") +__description("PTR_TO_STACK stack size > 512") +__failure __msg("invalid write to stack R1 off=-520 size=8") +__naked void stack_check_size_gt_512(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -520; \ + r0 = 42; \ + *(u64*)(r1 + 0) = r0; \ + exit; \ +" ::: __clobber_all); +} + +#ifdef __BPF_FEATURE_MAY_GOTO +SEC("socket") +__description("PTR_TO_STACK stack size 512 with may_goto with jit") +__load_if_JITed() +__success __retval(42) +__naked void stack_check_size_512_with_may_goto_jit(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -512; \ + r0 = 42; \ + *(u32*)(r1 + 0) = r0; \ + may_goto l0_%=; \ + r2 = 100; \ + l0_%=: \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("PTR_TO_STACK stack size 512 with may_goto without jit") +__load_if_no_JITed() +__failure __msg("stack size 520(extra 8) is too large") +__naked void stack_check_size_512_with_may_goto(void) +{ + asm volatile (" \ + r1 = r10; \ + r1 += -512; \ + r0 = 42; \ + *(u32*)(r1 + 0) = r0; \ + may_goto l0_%=; \ + r2 = 100; \ + l0_%=: \ + exit; \ +" ::: __clobber_all); +} +#endif + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_store_release.c b/tools/testing/selftests/bpf/progs/verifier_store_release.c new file mode 100644 index 000000000000..c0442d5bb049 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_store_release.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Google LLC. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "../../../include/linux/filter.h" +#include "bpf_misc.h" + +#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \ + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) + +SEC("socket") +__description("store-release, 8-bit") +__success __success_unpriv __retval(0x12) +__naked void store_release_8(void) +{ + asm volatile ( + "w1 = 0x12;" + ".8byte %[store_release_insn];" // store_release((u8 *)(r10 - 1), w1); + "w0 = *(u8 *)(r10 - 1);" + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -1)) + : __clobber_all); +} + +SEC("socket") +__description("store-release, 16-bit") +__success __success_unpriv __retval(0x1234) +__naked void store_release_16(void) +{ + asm volatile ( + "w1 = 0x1234;" + ".8byte %[store_release_insn];" // store_release((u16 *)(r10 - 2), w1); + "w0 = *(u16 *)(r10 - 2);" + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_H, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -2)) + : __clobber_all); +} + +SEC("socket") +__description("store-release, 32-bit") +__success __success_unpriv __retval(0x12345678) +__naked void store_release_32(void) +{ + asm volatile ( + "w1 = 0x12345678;" + ".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 4), w1); + "w0 = *(u32 *)(r10 - 4);" + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_W, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -4)) + : __clobber_all); +} + +SEC("socket") +__description("store-release, 64-bit") +__success __success_unpriv __retval(0x1234567890abcdef) +__naked void store_release_64(void) +{ + asm volatile ( + "r1 = 0x1234567890abcdef ll;" + ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1); + "r0 = *(u64 *)(r10 - 8);" + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8)) + : __clobber_all); +} + +SEC("socket") +__description("store-release with uninitialized src_reg") +__failure __failure_unpriv __msg("R2 !read_ok") +__naked void store_release_with_uninitialized_src_reg(void) +{ + asm volatile ( + ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r2); + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_2, -8)) + : __clobber_all); +} + +SEC("socket") +__description("store-release with uninitialized dst_reg") +__failure __failure_unpriv __msg("R2 !read_ok") +__naked void store_release_with_uninitialized_dst_reg(void) +{ + asm volatile ( + "r1 = 0;" + ".8byte %[store_release_insn];" // store_release((u64 *)(r2 - 8), r1); + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_2, BPF_REG_1, -8)) + : __clobber_all); +} + +SEC("socket") +__description("store-release with non-pointer dst_reg") +__failure __failure_unpriv __msg("R1 invalid mem access 'scalar'") +__naked void store_release_with_non_pointer_dst_reg(void) +{ + asm volatile ( + "r1 = 0;" + ".8byte %[store_release_insn];" // store_release((u64 *)(r1 + 0), r1); + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_1, BPF_REG_1, 0)) + : __clobber_all); +} + +SEC("socket") +__description("misaligned store-release") +__failure __failure_unpriv __msg("misaligned stack access off") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void store_release_misaligned(void) +{ + asm volatile ( + "w0 = 0;" + ".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 5), w0); + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_W, BPF_STORE_REL, BPF_REG_10, BPF_REG_0, -5)) + : __clobber_all); +} + +SEC("socket") +__description("store-release to ctx pointer") +__failure __failure_unpriv __msg("BPF_ATOMIC stores into R1 ctx is not allowed") +__naked void store_release_to_ctx_pointer(void) +{ + asm volatile ( + "w0 = 0;" + // store_release((u8 *)(r1 + offsetof(struct __sk_buff, cb[0])), w0); + ".8byte %[store_release_insn];" + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0]))) + : __clobber_all); +} + +SEC("xdp") +__description("store-release to pkt pointer") +__failure __msg("BPF_ATOMIC stores into R2 pkt is not allowed") +__naked void store_release_to_pkt_pointer(void) +{ + asm volatile ( + "w0 = 0;" + "r2 = *(u32 *)(r1 + %[xdp_md_data]);" + "r3 = *(u32 *)(r1 + %[xdp_md_data_end]);" + "r1 = r2;" + "r1 += 8;" + "if r1 >= r3 goto l0_%=;" + ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0); +"l0_%=: r0 = 0;" + "exit;" + : + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)), + __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0)) + : __clobber_all); +} + +SEC("flow_dissector") +__description("store-release to flow_keys pointer") +__failure __msg("BPF_ATOMIC stores into R2 flow_keys is not allowed") +__naked void store_release_to_flow_keys_pointer(void) +{ + asm volatile ( + "w0 = 0;" + "r2 = *(u64 *)(r1 + %[__sk_buff_flow_keys]);" + ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0); + "exit;" + : + : __imm_const(__sk_buff_flow_keys, + offsetof(struct __sk_buff, flow_keys)), + __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0)) + : __clobber_all); +} + +SEC("sk_reuseport") +__description("store-release to sock pointer") +__failure __msg("R2 cannot write into sock") +__naked void store_release_to_sock_pointer(void) +{ + asm volatile ( + "w0 = 0;" + "r2 = *(u64 *)(r1 + %[sk_reuseport_md_sk]);" + ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0); + "exit;" + : + : __imm_const(sk_reuseport_md_sk, offsetof(struct sk_reuseport_md, sk)), + __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0)) + : __clobber_all); +} + +SEC("socket") +__description("store-release, leak pointer to stack") +__success __success_unpriv __retval(0) +__naked void store_release_leak_pointer_to_stack(void) +{ + asm volatile ( + ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1); + "r0 = 0;" + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8)) + : __clobber_all); +} + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, long long); + __type(value, long long); +} map_hash_8b SEC(".maps"); + +SEC("socket") +__description("store-release, leak pointer to map") +__success __retval(0) +__failure_unpriv __msg_unpriv("R6 leaks addr into map") +__naked void store_release_leak_pointer_to_map(void) +{ + asm volatile ( + "r6 = r1;" + "r1 = %[map_hash_8b] ll;" + "r2 = 0;" + "*(u64 *)(r10 - 8) = r2;" + "r2 = r10;" + "r2 += -8;" + "call %[bpf_map_lookup_elem];" + "if r0 == 0 goto l0_%=;" + ".8byte %[store_release_insn];" // store_release((u64 *)(r0 + 0), r6); +"l0_%=:" + "r0 = 0;" + "exit;" + : + : __imm_addr(map_hash_8b), + __imm(bpf_map_lookup_elem), + __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_0, BPF_REG_6, 0)) + : __clobber_all); +} + +SEC("socket") +__description("store-release with invalid register R15") +__failure __failure_unpriv __msg("R15 is invalid") +__naked void store_release_with_invalid_reg(void) +{ + asm volatile ( + ".8byte %[store_release_insn];" // store_release((u64 *)(r15 + 0), r1); + "exit;" + : + : __imm_insn(store_release_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, 15 /* invalid reg */, BPF_REG_1, 0)) + : __clobber_all); +} + +#else + +SEC("socket") +__description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support store-release, use a dummy test") +__success +int dummy_test(void) +{ + return 0; +} + +#endif + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c index 682dda8dabbc..50c8958f94e5 100644 --- a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c +++ b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c @@ -1,7 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/if_ether.h> + #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> struct { __uint(type, BPF_MAP_TYPE_DEVMAP); @@ -28,4 +31,89 @@ int xdp_redirect_map_2(struct xdp_md *xdp) return bpf_redirect_map(&tx_port, 2, 0); } +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 3); + __type(key, __u32); + __type(value, __u64); +} rxcnt SEC(".maps"); + +static int xdp_count(struct xdp_md *xdp, __u32 key) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct ethhdr *eth = data; + __u64 *count; + + if (data + sizeof(*eth) > data_end) + return XDP_DROP; + + if (bpf_htons(eth->h_proto) == ETH_P_IP) { + /* We only count IPv4 packets */ + count = bpf_map_lookup_elem(&rxcnt, &key); + if (count) + *count += 1; + } + + return XDP_PASS; +} + +SEC("xdp") +int xdp_count_0(struct xdp_md *xdp) +{ + return xdp_count(xdp, 0); +} + +SEC("xdp") +int xdp_count_1(struct xdp_md *xdp) +{ + return xdp_count(xdp, 1); +} + +SEC("xdp") +int xdp_count_2(struct xdp_md *xdp) +{ + return xdp_count(xdp, 2); +} + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, __be64); +} rx_mac SEC(".maps"); + +static int store_mac(struct xdp_md *xdp, __u32 id) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct ethhdr *eth = data; + __u32 key = id; + __be64 mac = 0; + + if (data + sizeof(*eth) > data_end) + return XDP_DROP; + + /* Only store IPv4 MAC to avoid being polluted by IPv6 packets */ + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + __builtin_memcpy(&mac, eth->h_source, ETH_ALEN); + bpf_map_update_elem(&rx_mac, &key, &mac, 0); + bpf_printk("%s - %x", __func__, mac); + } + + return XDP_PASS; +} + +SEC("xdp") +int store_mac_1(struct xdp_md *xdp) +{ + return store_mac(xdp, 0); +} + +SEC("xdp") +int store_mac_2(struct xdp_md *xdp) +{ + return store_mac(xdp, 1); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c index 97b26a30b59a..bc2945ed8a80 100644 --- a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c @@ -34,6 +34,14 @@ struct { __uint(max_entries, 128); } mac_map SEC(".maps"); +/* map to store redirect flags for each protocol*/ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u16); + __type(value, __u64); + __uint(max_entries, 16); +} redirect_flags SEC(".maps"); + SEC("xdp") int xdp_redirect_map_multi_prog(struct xdp_md *ctx) { @@ -41,25 +49,34 @@ int xdp_redirect_map_multi_prog(struct xdp_md *ctx) void *data = (void *)(long)ctx->data; int if_index = ctx->ingress_ifindex; struct ethhdr *eth = data; + __u64 *flags_from_map; __u16 h_proto; __u64 nh_off; + __u64 flags; nh_off = sizeof(*eth); if (data + nh_off > data_end) return XDP_DROP; - h_proto = eth->h_proto; - - /* Using IPv4 for (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) testing */ - if (h_proto == bpf_htons(ETH_P_IP)) - return bpf_redirect_map(&map_all, 0, - BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); - /* Using IPv6 for none flag testing */ - else if (h_proto == bpf_htons(ETH_P_IPV6)) - return bpf_redirect_map(&map_all, if_index, 0); - /* All others for BPF_F_BROADCAST testing */ - else - return bpf_redirect_map(&map_all, 0, BPF_F_BROADCAST); + h_proto = bpf_htons(eth->h_proto); + + flags_from_map = bpf_map_lookup_elem(&redirect_flags, &h_proto); + + /* Default flags for IPv4 : (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) */ + if (h_proto == ETH_P_IP) { + flags = flags_from_map ? *flags_from_map : BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS; + return bpf_redirect_map(&map_all, 0, flags); + } + /* Default flags for IPv6 : 0 */ + if (h_proto == ETH_P_IPV6) { + flags = flags_from_map ? *flags_from_map : 0; + return bpf_redirect_map(&map_all, if_index, flags); + } + /* Default flags for others BPF_F_BROADCAST : 0 */ + else { + flags = flags_from_map ? *flags_from_map : BPF_F_BROADCAST; + return bpf_redirect_map(&map_all, 0, flags); + } } /* The following 2 progs are for 2nd devmap prog testing */ diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h index fb4f4714eeb4..e65889ab4adf 100644 --- a/tools/testing/selftests/bpf/test_btf.h +++ b/tools/testing/selftests/bpf/test_btf.h @@ -72,9 +72,15 @@ #define BTF_TYPE_FLOAT_ENC(name, sz) \ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz) +#define BTF_DECL_ATTR_ENC(value, type, component_idx) \ + BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), type), (component_idx) + #define BTF_DECL_TAG_ENC(value, type, component_idx) \ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx) +#define BTF_TYPE_ATTR_ENC(value, type) \ + BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 1, 0), type) + #define BTF_TYPE_TAG_ENC(value, type) \ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type) diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index cc9dde507aba..3220f1d28697 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -1130,6 +1130,7 @@ static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { }; static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { + .get_func_proto = bpf_base_func_proto, .is_valid_access = bpf_testmod_ops_is_valid_access, }; @@ -1176,10 +1177,25 @@ static int bpf_testmod_ops__test_maybe_null(int dummy, return 0; } +static int bpf_testmod_ops__test_refcounted(int dummy, + struct task_struct *task__ref) +{ + return 0; +} + +static struct task_struct * +bpf_testmod_ops__test_return_ref_kptr(int dummy, struct task_struct *task__ref, + struct cgroup *cgrp) +{ + return NULL; +} + static struct bpf_testmod_ops __bpf_testmod_ops = { .test_1 = bpf_testmod_test_1, .test_2 = bpf_testmod_test_2, .test_maybe_null = bpf_testmod_ops__test_maybe_null, + .test_refcounted = bpf_testmod_ops__test_refcounted, + .test_return_ref_kptr = bpf_testmod_ops__test_return_ref_kptr, }; struct bpf_struct_ops bpf_bpf_testmod_ops = { @@ -1293,6 +1309,85 @@ static int bpf_test_mod_st_ops__test_pro_epilogue(struct st_ops_args *args) return 0; } +static int bpf_cgroup_from_id_id; +static int bpf_cgroup_release_id; + +static int st_ops_gen_prologue_with_kfunc(struct bpf_insn *insn_buf, bool direct_write, + const struct bpf_prog *prog) +{ + struct bpf_insn *insn = insn_buf; + + /* r8 = r1; // r8 will be "u64 *ctx". + * r1 = 0; + * r0 = bpf_cgroup_from_id(r1); + * if r0 != 0 goto pc+5; + * r6 = r8[0]; // r6 will be "struct st_ops *args". + * r7 = r6->a; + * r7 += 1000; + * r6->a = r7; + * goto pc+2; + * r1 = r0; + * bpf_cgroup_release(r1); + * r1 = r8; + */ + *insn++ = BPF_MOV64_REG(BPF_REG_8, BPF_REG_1); + *insn++ = BPF_MOV64_IMM(BPF_REG_1, 0); + *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_from_id_id); + *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 5); + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_8, 0); + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_6, offsetof(struct st_ops_args, a)); + *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1000); + *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, offsetof(struct st_ops_args, a)); + *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2); + *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0); + *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id), + *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_8); + *insn++ = prog->insnsi[0]; + + return insn - insn_buf; +} + +static int st_ops_gen_epilogue_with_kfunc(struct bpf_insn *insn_buf, const struct bpf_prog *prog, + s16 ctx_stack_off) +{ + struct bpf_insn *insn = insn_buf; + + /* r1 = 0; + * r6 = 0; + * r0 = bpf_cgroup_from_id(r1); + * if r0 != 0 goto pc+6; + * r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx" + * r1 = r1[0]; // r1 will be "struct st_ops *args" + * r6 = r1->a; + * r6 += 10000; + * r1->a = r6; + * goto pc+2 + * r1 = r0; + * bpf_cgroup_release(r1); + * r0 = r6; + * r0 *= 2; + * BPF_EXIT; + */ + *insn++ = BPF_MOV64_IMM(BPF_REG_1, 0); + *insn++ = BPF_MOV64_IMM(BPF_REG_6, 0); + *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_from_id_id); + *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 6); + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_FP, ctx_stack_off); + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0); + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct st_ops_args, a)); + *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 10000); + *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(struct st_ops_args, a)); + *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2); + *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0); + *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id), + *insn++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_6); + *insn++ = BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, 2); + *insn++ = BPF_EXIT_INSN(); + + return insn - insn_buf; +} + +#define KFUNC_PRO_EPI_PREFIX "test_kfunc_" static int st_ops_gen_prologue(struct bpf_insn *insn_buf, bool direct_write, const struct bpf_prog *prog) { @@ -1302,6 +1397,9 @@ static int st_ops_gen_prologue(struct bpf_insn *insn_buf, bool direct_write, strcmp(prog->aux->attach_func_name, "test_pro_epilogue")) return 0; + if (!strncmp(prog->aux->name, KFUNC_PRO_EPI_PREFIX, strlen(KFUNC_PRO_EPI_PREFIX))) + return st_ops_gen_prologue_with_kfunc(insn_buf, direct_write, prog); + /* r6 = r1[0]; // r6 will be "struct st_ops *args". r1 is "u64 *ctx". * r7 = r6->a; * r7 += 1000; @@ -1325,6 +1423,9 @@ static int st_ops_gen_epilogue(struct bpf_insn *insn_buf, const struct bpf_prog strcmp(prog->aux->attach_func_name, "test_pro_epilogue")) return 0; + if (!strncmp(prog->aux->name, KFUNC_PRO_EPI_PREFIX, strlen(KFUNC_PRO_EPI_PREFIX))) + return st_ops_gen_epilogue_with_kfunc(insn_buf, prog, ctx_stack_off); + /* r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx" * r1 = r1[0]; // r1 will be "struct st_ops *args" * r6 = r1->a; @@ -1395,6 +1496,13 @@ static void st_ops_unreg(void *kdata, struct bpf_link *link) static int st_ops_init(struct btf *btf) { + struct btf *kfunc_btf; + + bpf_cgroup_from_id_id = bpf_find_btf_id("bpf_cgroup_from_id", BTF_KIND_FUNC, &kfunc_btf); + bpf_cgroup_release_id = bpf_find_btf_id("bpf_cgroup_release", BTF_KIND_FUNC, &kfunc_btf); + if (bpf_cgroup_from_id_id < 0 || bpf_cgroup_release_id < 0) + return -EINVAL; + return 0; } diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h index 356803d1c10e..c9fab51f16e2 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h @@ -6,6 +6,7 @@ #include <linux/types.h> struct task_struct; +struct cgroup; struct bpf_testmod_test_read_ctx { char *buf; @@ -36,6 +37,11 @@ struct bpf_testmod_ops { /* Used to test nullable arguments. */ int (*test_maybe_null)(int dummy, struct task_struct *task); int (*unsupported_ops)(void); + /* Used to test ref_acquired arguments. */ + int (*test_refcounted)(int dummy, struct task_struct *task); + /* Used to test returning referenced kptr. */ + struct task_struct *(*test_return_ref_kptr)(int dummy, struct task_struct *task, + struct cgroup *cgrp); /* The following fields are used to test shadow copies. */ char onebyte; diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 53b06647cf57..49f2fc61061f 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -37,6 +37,7 @@ #define TEST_TAG_JITED_PFX "comment:test_jited=" #define TEST_TAG_JITED_PFX_UNPRIV "comment:test_jited_unpriv=" #define TEST_TAG_CAPS_UNPRIV "comment:test_caps_unpriv=" +#define TEST_TAG_LOAD_MODE_PFX "comment:load_mode=" /* Warning: duplicated in bpf_misc.h */ #define POINTER_VALUE 0xcafe4all @@ -55,6 +56,11 @@ enum mode { UNPRIV = 2 }; +enum load_mode { + JITED = 1 << 0, + NO_JITED = 1 << 1, +}; + struct expect_msg { const char *substr; /* substring match */ regex_t regex; @@ -87,6 +93,7 @@ struct test_spec { int prog_flags; int mode_mask; int arch_mask; + int load_mask; bool auxiliary; bool valid; }; @@ -406,6 +413,7 @@ static int parse_test_spec(struct test_loader *tester, bool collect_jit = false; int func_id, i, err = 0; u32 arch_mask = 0; + u32 load_mask = 0; struct btf *btf; enum arch arch; @@ -580,10 +588,22 @@ static int parse_test_spec(struct test_loader *tester, if (err) goto cleanup; spec->mode_mask |= UNPRIV; + } else if (str_has_pfx(s, TEST_TAG_LOAD_MODE_PFX)) { + val = s + sizeof(TEST_TAG_LOAD_MODE_PFX) - 1; + if (strcmp(val, "jited") == 0) { + load_mask = JITED; + } else if (strcmp(val, "no_jited") == 0) { + load_mask = NO_JITED; + } else { + PRINT_FAIL("bad load spec: '%s'", val); + err = -EINVAL; + goto cleanup; + } } } spec->arch_mask = arch_mask ?: -1; + spec->load_mask = load_mask ?: (JITED | NO_JITED); if (spec->mode_mask == 0) spec->mode_mask = PRIV; @@ -773,7 +793,7 @@ static int drop_capabilities(struct cap_state *caps) err = cap_disable_effective(caps_to_drop, &caps->old_caps); if (err) { - PRINT_FAIL("failed to drop capabilities: %i, %s\n", err, strerror(err)); + PRINT_FAIL("failed to drop capabilities: %i, %s\n", err, strerror(-err)); return err; } @@ -790,7 +810,7 @@ static int restore_capabilities(struct cap_state *caps) err = cap_enable_effective(caps->old_caps, NULL); if (err) - PRINT_FAIL("failed to restore capabilities: %i, %s\n", err, strerror(err)); + PRINT_FAIL("failed to restore capabilities: %i, %s\n", err, strerror(-err)); caps->initialized = false; return err; } @@ -928,6 +948,7 @@ void run_subtest(struct test_loader *tester, bool unpriv) { struct test_subspec *subspec = unpriv ? &spec->unpriv : &spec->priv; + int current_runtime = is_jit_enabled() ? JITED : NO_JITED; struct bpf_program *tprog = NULL, *tprog_iter; struct bpf_link *link, *links[32] = {}; struct test_spec *spec_iter; @@ -946,6 +967,11 @@ void run_subtest(struct test_loader *tester, return; } + if ((current_runtime & spec->load_mask) == 0) { + test__skip(); + return; + } + if (unpriv) { if (!can_execute_unpriv(tester, spec)) { test__skip(); @@ -959,7 +985,7 @@ void run_subtest(struct test_loader *tester, if (subspec->caps) { err = cap_enable_effective(subspec->caps, NULL); if (err) { - PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(err)); + PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(-err)); goto subtest_cleanup; } } diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh deleted file mode 100755 index 1e565f47aca9..000000000000 --- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh +++ /dev/null @@ -1,476 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# Setup/topology: -# -# NS1 NS2 NS3 -# veth1 <---> veth2 veth3 <---> veth4 (the top route) -# veth5 <---> veth6 veth7 <---> veth8 (the bottom route) -# -# each vethN gets IPv[4|6]_N address -# -# IPv*_SRC = IPv*_1 -# IPv*_DST = IPv*_4 -# -# all tests test pings from IPv*_SRC to IPv*_DST -# -# by default, routes are configured to allow packets to go -# IP*_1 <=> IP*_2 <=> IP*_3 <=> IP*_4 (the top route) -# -# a GRE device is installed in NS3 with IPv*_GRE, and -# NS1/NS2 are configured to route packets to IPv*_GRE via IP*_8 -# (the bottom route) -# -# Tests: -# -# 1. routes NS2->IPv*_DST are brought down, so the only way a ping -# from IP*_SRC to IP*_DST can work is via IPv*_GRE -# -# 2a. in an egress test, a bpf LWT_XMIT program is installed on veth1 -# that encaps the packets with an IP/GRE header to route to IPv*_GRE -# -# ping: SRC->[encap at veth1:egress]->GRE:decap->DST -# ping replies go DST->SRC directly -# -# 2b. in an ingress test, a bpf LWT_IN program is installed on veth2 -# that encaps the packets with an IP/GRE header to route to IPv*_GRE -# -# ping: SRC->[encap at veth2:ingress]->GRE:decap->DST -# ping replies go DST->SRC directly - -BPF_FILE="test_lwt_ip_encap.bpf.o" -if [[ $EUID -ne 0 ]]; then - echo "This script must be run as root" - echo "FAIL" - exit 1 -fi - -readonly NS1="ns1-$(mktemp -u XXXXXX)" -readonly NS2="ns2-$(mktemp -u XXXXXX)" -readonly NS3="ns3-$(mktemp -u XXXXXX)" - -readonly IPv4_1="172.16.1.100" -readonly IPv4_2="172.16.2.100" -readonly IPv4_3="172.16.3.100" -readonly IPv4_4="172.16.4.100" -readonly IPv4_5="172.16.5.100" -readonly IPv4_6="172.16.6.100" -readonly IPv4_7="172.16.7.100" -readonly IPv4_8="172.16.8.100" -readonly IPv4_GRE="172.16.16.100" - -readonly IPv4_SRC=$IPv4_1 -readonly IPv4_DST=$IPv4_4 - -readonly IPv6_1="fb01::1" -readonly IPv6_2="fb02::1" -readonly IPv6_3="fb03::1" -readonly IPv6_4="fb04::1" -readonly IPv6_5="fb05::1" -readonly IPv6_6="fb06::1" -readonly IPv6_7="fb07::1" -readonly IPv6_8="fb08::1" -readonly IPv6_GRE="fb10::1" - -readonly IPv6_SRC=$IPv6_1 -readonly IPv6_DST=$IPv6_4 - -TEST_STATUS=0 -TESTS_SUCCEEDED=0 -TESTS_FAILED=0 - -TMPFILE="" - -process_test_results() -{ - if [[ "${TEST_STATUS}" -eq 0 ]] ; then - echo "PASS" - TESTS_SUCCEEDED=$((TESTS_SUCCEEDED+1)) - else - echo "FAIL" - TESTS_FAILED=$((TESTS_FAILED+1)) - fi -} - -print_test_summary_and_exit() -{ - echo "passed tests: ${TESTS_SUCCEEDED}" - echo "failed tests: ${TESTS_FAILED}" - if [ "${TESTS_FAILED}" -eq "0" ] ; then - exit 0 - else - exit 1 - fi -} - -setup() -{ - set -e # exit on error - TEST_STATUS=0 - - # create devices and namespaces - ip netns add "${NS1}" - ip netns add "${NS2}" - ip netns add "${NS3}" - - # rp_filter gets confused by what these tests are doing, so disable it - ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0 - ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0 - ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0 - - # disable IPv6 DAD because it sometimes takes too long and fails tests - ip netns exec ${NS1} sysctl -wq net.ipv6.conf.all.accept_dad=0 - ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.accept_dad=0 - ip netns exec ${NS3} sysctl -wq net.ipv6.conf.all.accept_dad=0 - ip netns exec ${NS1} sysctl -wq net.ipv6.conf.default.accept_dad=0 - ip netns exec ${NS2} sysctl -wq net.ipv6.conf.default.accept_dad=0 - ip netns exec ${NS3} sysctl -wq net.ipv6.conf.default.accept_dad=0 - - ip link add veth1 type veth peer name veth2 - ip link add veth3 type veth peer name veth4 - ip link add veth5 type veth peer name veth6 - ip link add veth7 type veth peer name veth8 - - ip netns exec ${NS2} sysctl -wq net.ipv4.ip_forward=1 - ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip link set veth1 netns ${NS1} - ip link set veth2 netns ${NS2} - ip link set veth3 netns ${NS2} - ip link set veth4 netns ${NS3} - ip link set veth5 netns ${NS1} - ip link set veth6 netns ${NS2} - ip link set veth7 netns ${NS2} - ip link set veth8 netns ${NS3} - - if [ ! -z "${VRF}" ] ; then - ip -netns ${NS1} link add red type vrf table 1001 - ip -netns ${NS1} link set red up - ip -netns ${NS1} route add table 1001 unreachable default metric 8192 - ip -netns ${NS1} -6 route add table 1001 unreachable default metric 8192 - ip -netns ${NS1} link set veth1 vrf red - ip -netns ${NS1} link set veth5 vrf red - - ip -netns ${NS2} link add red type vrf table 1001 - ip -netns ${NS2} link set red up - ip -netns ${NS2} route add table 1001 unreachable default metric 8192 - ip -netns ${NS2} -6 route add table 1001 unreachable default metric 8192 - ip -netns ${NS2} link set veth2 vrf red - ip -netns ${NS2} link set veth3 vrf red - ip -netns ${NS2} link set veth6 vrf red - ip -netns ${NS2} link set veth7 vrf red - fi - - # configure addesses: the top route (1-2-3-4) - ip -netns ${NS1} addr add ${IPv4_1}/24 dev veth1 - ip -netns ${NS2} addr add ${IPv4_2}/24 dev veth2 - ip -netns ${NS2} addr add ${IPv4_3}/24 dev veth3 - ip -netns ${NS3} addr add ${IPv4_4}/24 dev veth4 - ip -netns ${NS1} -6 addr add ${IPv6_1}/128 nodad dev veth1 - ip -netns ${NS2} -6 addr add ${IPv6_2}/128 nodad dev veth2 - ip -netns ${NS2} -6 addr add ${IPv6_3}/128 nodad dev veth3 - ip -netns ${NS3} -6 addr add ${IPv6_4}/128 nodad dev veth4 - - # configure addresses: the bottom route (5-6-7-8) - ip -netns ${NS1} addr add ${IPv4_5}/24 dev veth5 - ip -netns ${NS2} addr add ${IPv4_6}/24 dev veth6 - ip -netns ${NS2} addr add ${IPv4_7}/24 dev veth7 - ip -netns ${NS3} addr add ${IPv4_8}/24 dev veth8 - ip -netns ${NS1} -6 addr add ${IPv6_5}/128 nodad dev veth5 - ip -netns ${NS2} -6 addr add ${IPv6_6}/128 nodad dev veth6 - ip -netns ${NS2} -6 addr add ${IPv6_7}/128 nodad dev veth7 - ip -netns ${NS3} -6 addr add ${IPv6_8}/128 nodad dev veth8 - - ip -netns ${NS1} link set dev veth1 up - ip -netns ${NS2} link set dev veth2 up - ip -netns ${NS2} link set dev veth3 up - ip -netns ${NS3} link set dev veth4 up - ip -netns ${NS1} link set dev veth5 up - ip -netns ${NS2} link set dev veth6 up - ip -netns ${NS2} link set dev veth7 up - ip -netns ${NS3} link set dev veth8 up - - # configure routes: IP*_SRC -> veth1/IP*_2 (= top route) default; - # the bottom route to specific bottom addresses - - # NS1 - # top route - ip -netns ${NS1} route add ${IPv4_2}/32 dev veth1 ${VRF} - ip -netns ${NS1} route add default dev veth1 via ${IPv4_2} ${VRF} # go top by default - ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1 ${VRF} - ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} ${VRF} # go top by default - # bottom route - ip -netns ${NS1} route add ${IPv4_6}/32 dev veth5 ${VRF} - ip -netns ${NS1} route add ${IPv4_7}/32 dev veth5 via ${IPv4_6} ${VRF} - ip -netns ${NS1} route add ${IPv4_8}/32 dev veth5 via ${IPv4_6} ${VRF} - ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5 ${VRF} - ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6} ${VRF} - ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6} ${VRF} - - # NS2 - # top route - ip -netns ${NS2} route add ${IPv4_1}/32 dev veth2 ${VRF} - ip -netns ${NS2} route add ${IPv4_4}/32 dev veth3 ${VRF} - ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2 ${VRF} - ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3 ${VRF} - # bottom route - ip -netns ${NS2} route add ${IPv4_5}/32 dev veth6 ${VRF} - ip -netns ${NS2} route add ${IPv4_8}/32 dev veth7 ${VRF} - ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6 ${VRF} - ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7 ${VRF} - - # NS3 - # top route - ip -netns ${NS3} route add ${IPv4_3}/32 dev veth4 - ip -netns ${NS3} route add ${IPv4_1}/32 dev veth4 via ${IPv4_3} - ip -netns ${NS3} route add ${IPv4_2}/32 dev veth4 via ${IPv4_3} - ip -netns ${NS3} -6 route add ${IPv6_3}/128 dev veth4 - ip -netns ${NS3} -6 route add ${IPv6_1}/128 dev veth4 via ${IPv6_3} - ip -netns ${NS3} -6 route add ${IPv6_2}/128 dev veth4 via ${IPv6_3} - # bottom route - ip -netns ${NS3} route add ${IPv4_7}/32 dev veth8 - ip -netns ${NS3} route add ${IPv4_5}/32 dev veth8 via ${IPv4_7} - ip -netns ${NS3} route add ${IPv4_6}/32 dev veth8 via ${IPv4_7} - ip -netns ${NS3} -6 route add ${IPv6_7}/128 dev veth8 - ip -netns ${NS3} -6 route add ${IPv6_5}/128 dev veth8 via ${IPv6_7} - ip -netns ${NS3} -6 route add ${IPv6_6}/128 dev veth8 via ${IPv6_7} - - # configure IPv4 GRE device in NS3, and a route to it via the "bottom" route - ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255 - ip -netns ${NS3} link set gre_dev up - ip -netns ${NS3} addr add ${IPv4_GRE} dev gre_dev - ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6} ${VRF} - ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8} ${VRF} - - - # configure IPv6 GRE device in NS3, and a route to it via the "bottom" route - ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote ${IPv6_1} local ${IPv6_GRE} ttl 255 - ip -netns ${NS3} link set gre6_dev up - ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev - ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF} - ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF} - - TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX) - - sleep 1 # reduce flakiness - set +e -} - -cleanup() -{ - if [ -f ${TMPFILE} ] ; then - rm ${TMPFILE} - fi - - ip netns del ${NS1} 2> /dev/null - ip netns del ${NS2} 2> /dev/null - ip netns del ${NS3} 2> /dev/null -} - -trap cleanup EXIT - -remove_routes_to_gredev() -{ - ip -netns ${NS1} route del ${IPv4_GRE} dev veth5 ${VRF} - ip -netns ${NS2} route del ${IPv4_GRE} dev veth7 ${VRF} - ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5 ${VRF} - ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7 ${VRF} -} - -add_unreachable_routes_to_gredev() -{ - ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32 ${VRF} - ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32 ${VRF} - ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128 ${VRF} - ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128 ${VRF} -} - -test_ping() -{ - local readonly PROTO=$1 - local readonly EXPECTED=$2 - local RET=0 - - if [ "${PROTO}" == "IPv4" ] ; then - ip netns exec ${NS1} ping -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null - RET=$? - elif [ "${PROTO}" == "IPv6" ] ; then - ip netns exec ${NS1} ping6 -c 1 -W 1 -I veth1 ${IPv6_DST} 2>&1 > /dev/null - RET=$? - else - echo " test_ping: unknown PROTO: ${PROTO}" - TEST_STATUS=1 - fi - - if [ "0" != "${RET}" ]; then - RET=1 - fi - - if [ "${EXPECTED}" != "${RET}" ] ; then - echo " test_ping failed: expected: ${EXPECTED}; got ${RET}" - TEST_STATUS=1 - fi -} - -test_gso() -{ - local readonly PROTO=$1 - local readonly PKT_SZ=5000 - local IP_DST="" - : > ${TMPFILE} # trim the capture file - - # check that nc is present - command -v nc >/dev/null 2>&1 || \ - { echo >&2 "nc is not available: skipping TSO tests"; return; } - - # listen on port 9000, capture TCP into $TMPFILE - if [ "${PROTO}" == "IPv4" ] ; then - IP_DST=${IPv4_DST} - ip netns exec ${NS3} bash -c \ - "nc -4 -l -p 9000 > ${TMPFILE} &" - elif [ "${PROTO}" == "IPv6" ] ; then - IP_DST=${IPv6_DST} - ip netns exec ${NS3} bash -c \ - "nc -6 -l -p 9000 > ${TMPFILE} &" - RET=$? - else - echo " test_gso: unknown PROTO: ${PROTO}" - TEST_STATUS=1 - fi - sleep 1 # let nc start listening - - # send a packet larger than MTU - ip netns exec ${NS1} bash -c \ - "dd if=/dev/zero bs=$PKT_SZ count=1 > /dev/tcp/${IP_DST}/9000 2>/dev/null" - sleep 2 # let the packet get delivered - - # verify we received all expected bytes - SZ=$(stat -c %s ${TMPFILE}) - if [ "$SZ" != "$PKT_SZ" ] ; then - echo " test_gso failed: ${PROTO}" - TEST_STATUS=1 - fi -} - -test_egress() -{ - local readonly ENCAP=$1 - echo "starting egress ${ENCAP} encap test ${VRF}" - setup - - # by default, pings work - test_ping IPv4 0 - test_ping IPv6 0 - - # remove NS2->DST routes, ping fails - ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 ${VRF} - ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF} - test_ping IPv4 1 - test_ping IPv6 1 - - # install replacement routes (LWT/eBPF), pings succeed - if [ "${ENCAP}" == "IPv4" ] ; then - ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \ - ${BPF_FILE} sec encap_gre dev veth1 ${VRF} - ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \ - ${BPF_FILE} sec encap_gre dev veth1 ${VRF} - elif [ "${ENCAP}" == "IPv6" ] ; then - ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \ - ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF} - ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \ - ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF} - else - echo " unknown encap ${ENCAP}" - TEST_STATUS=1 - fi - test_ping IPv4 0 - test_ping IPv6 0 - - # skip GSO tests with VRF: VRF routing needs properly assigned - # source IP/device, which is easy to do with ping and hard with dd/nc. - if [ -z "${VRF}" ] ; then - test_gso IPv4 - test_gso IPv6 - fi - - # a negative test: remove routes to GRE devices: ping fails - remove_routes_to_gredev - test_ping IPv4 1 - test_ping IPv6 1 - - # another negative test - add_unreachable_routes_to_gredev - test_ping IPv4 1 - test_ping IPv6 1 - - cleanup - process_test_results -} - -test_ingress() -{ - local readonly ENCAP=$1 - echo "starting ingress ${ENCAP} encap test ${VRF}" - setup - - # need to wait a bit for IPv6 to autoconf, otherwise - # ping6 sometimes fails with "unable to bind to address" - - # by default, pings work - test_ping IPv4 0 - test_ping IPv6 0 - - # remove NS2->DST routes, pings fail - ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 ${VRF} - ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF} - test_ping IPv4 1 - test_ping IPv6 1 - - # install replacement routes (LWT/eBPF), pings succeed - if [ "${ENCAP}" == "IPv4" ] ; then - ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \ - ${BPF_FILE} sec encap_gre dev veth2 ${VRF} - ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \ - ${BPF_FILE} sec encap_gre dev veth2 ${VRF} - elif [ "${ENCAP}" == "IPv6" ] ; then - ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \ - ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF} - ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \ - ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF} - else - echo "FAIL: unknown encap ${ENCAP}" - TEST_STATUS=1 - fi - test_ping IPv4 0 - test_ping IPv6 0 - - # a negative test: remove routes to GRE devices: ping fails - remove_routes_to_gredev - test_ping IPv4 1 - test_ping IPv6 1 - - # another negative test - add_unreachable_routes_to_gredev - test_ping IPv4 1 - test_ping IPv6 1 - - cleanup - process_test_results -} - -VRF="" -test_egress IPv4 -test_egress IPv6 -test_ingress IPv4 -test_ingress IPv6 - -VRF="vrf red" -test_egress IPv4 -test_egress IPv6 -test_ingress IPv4 -test_ingress IPv6 - -print_test_summary_and_exit diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh deleted file mode 100755 index 0efea2292d6a..000000000000 --- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh +++ /dev/null @@ -1,156 +0,0 @@ -#!/bin/bash -# Connects 6 network namespaces through veths. -# Each NS may have different IPv6 global scope addresses : -# NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6 -# fb00::1 fd00::1 fd00::2 fd00::3 fb00::6 -# fc42::1 fd00::4 -# -# All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a -# IPv6 header with a Segment Routing Header, with segments : -# fd00::1 -> fd00::2 -> fd00::3 -> fd00::4 -# -# 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions : -# - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1 -# - fd00::2 : remove the TLV, change the flags, add a tag -# - fd00::3 : apply an End.T action to fd00::4, through routing table 117 -# -# fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet. -# Each End.BPF action will validate the operations applied on the SRH by the -# previous BPF program in the chain, otherwise the packet is dropped. -# -# An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this -# datagram can be read on NS6 when binding to fb00::6. - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 -BPF_FILE="test_lwt_seg6local.bpf.o" -readonly NS1="ns1-$(mktemp -u XXXXXX)" -readonly NS2="ns2-$(mktemp -u XXXXXX)" -readonly NS3="ns3-$(mktemp -u XXXXXX)" -readonly NS4="ns4-$(mktemp -u XXXXXX)" -readonly NS5="ns5-$(mktemp -u XXXXXX)" -readonly NS6="ns6-$(mktemp -u XXXXXX)" - -msg="skip all tests:" -if [ $UID != 0 ]; then - echo $msg please run this as root >&2 - exit $ksft_skip -fi - -TMP_FILE="/tmp/selftest_lwt_seg6local.txt" - -cleanup() -{ - if [ "$?" = "0" ]; then - echo "selftests: test_lwt_seg6local [PASS]"; - else - echo "selftests: test_lwt_seg6local [FAILED]"; - fi - - set +e - ip netns del ${NS1} 2> /dev/null - ip netns del ${NS2} 2> /dev/null - ip netns del ${NS3} 2> /dev/null - ip netns del ${NS4} 2> /dev/null - ip netns del ${NS5} 2> /dev/null - ip netns del ${NS6} 2> /dev/null - rm -f $TMP_FILE -} - -set -e - -ip netns add ${NS1} -ip netns add ${NS2} -ip netns add ${NS3} -ip netns add ${NS4} -ip netns add ${NS5} -ip netns add ${NS6} - -trap cleanup 0 2 3 6 9 - -ip link add veth1 type veth peer name veth2 -ip link add veth3 type veth peer name veth4 -ip link add veth5 type veth peer name veth6 -ip link add veth7 type veth peer name veth8 -ip link add veth9 type veth peer name veth10 - -ip link set veth1 netns ${NS1} -ip link set veth2 netns ${NS2} -ip link set veth3 netns ${NS2} -ip link set veth4 netns ${NS3} -ip link set veth5 netns ${NS3} -ip link set veth6 netns ${NS4} -ip link set veth7 netns ${NS4} -ip link set veth8 netns ${NS5} -ip link set veth9 netns ${NS5} -ip link set veth10 netns ${NS6} - -ip netns exec ${NS1} ip link set dev veth1 up -ip netns exec ${NS2} ip link set dev veth2 up -ip netns exec ${NS2} ip link set dev veth3 up -ip netns exec ${NS3} ip link set dev veth4 up -ip netns exec ${NS3} ip link set dev veth5 up -ip netns exec ${NS4} ip link set dev veth6 up -ip netns exec ${NS4} ip link set dev veth7 up -ip netns exec ${NS5} ip link set dev veth8 up -ip netns exec ${NS5} ip link set dev veth9 up -ip netns exec ${NS6} ip link set dev veth10 up -ip netns exec ${NS6} ip link set dev lo up - -# All link scope addresses and routes required between veths -ip netns exec ${NS1} ip -6 addr add fb00::12/16 dev veth1 scope link -ip netns exec ${NS1} ip -6 route add fb00::21 dev veth1 scope link -ip netns exec ${NS2} ip -6 addr add fb00::21/16 dev veth2 scope link -ip netns exec ${NS2} ip -6 addr add fb00::34/16 dev veth3 scope link -ip netns exec ${NS2} ip -6 route add fb00::43 dev veth3 scope link -ip netns exec ${NS3} ip -6 route add fb00::65 dev veth5 scope link -ip netns exec ${NS3} ip -6 addr add fb00::43/16 dev veth4 scope link -ip netns exec ${NS3} ip -6 addr add fb00::56/16 dev veth5 scope link -ip netns exec ${NS4} ip -6 addr add fb00::65/16 dev veth6 scope link -ip netns exec ${NS4} ip -6 addr add fb00::78/16 dev veth7 scope link -ip netns exec ${NS4} ip -6 route add fb00::87 dev veth7 scope link -ip netns exec ${NS5} ip -6 addr add fb00::87/16 dev veth8 scope link -ip netns exec ${NS5} ip -6 addr add fb00::910/16 dev veth9 scope link -ip netns exec ${NS5} ip -6 route add fb00::109 dev veth9 scope link -ip netns exec ${NS5} ip -6 route add fb00::109 table 117 dev veth9 scope link -ip netns exec ${NS6} ip -6 addr add fb00::109/16 dev veth10 scope link - -ip netns exec ${NS1} ip -6 addr add fb00::1/16 dev lo -ip netns exec ${NS1} ip -6 route add fb00::6 dev veth1 via fb00::21 - -ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj ${BPF_FILE} sec encap_srh dev veth2 -ip netns exec ${NS2} ip -6 route add fd00::1 dev veth3 via fb00::43 scope link - -ip netns exec ${NS3} ip -6 route add fc42::1 dev veth5 via fb00::65 -ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec add_egr_x dev veth4 - -ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec pop_egr dev veth6 -ip netns exec ${NS4} ip -6 addr add fc42::1 dev lo -ip netns exec ${NS4} ip -6 route add fd00::3 dev veth7 via fb00::87 - -ip netns exec ${NS5} ip -6 route add fd00::4 table 117 dev veth9 via fb00::109 -ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec inspect_t dev veth8 - -ip netns exec ${NS6} ip -6 addr add fb00::6/16 dev lo -ip netns exec ${NS6} ip -6 addr add fd00::4/16 dev lo - -ip netns exec ${NS1} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null -ip netns exec ${NS2} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null -ip netns exec ${NS3} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null -ip netns exec ${NS4} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null -ip netns exec ${NS5} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null - -ip netns exec ${NS6} sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null -ip netns exec ${NS6} sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null -ip netns exec ${NS6} sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null - -ip netns exec ${NS6} nc -l -6 -u -d 7330 > $TMP_FILE & -ip netns exec ${NS1} bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330" -sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment -kill -TERM $! - -if [[ $(< $TMP_FILE) != "foobar" ]]; then - exit 1 -fi - -exit 0 diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 8b40e9496af1..986ce32b113a 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1396,9 +1396,10 @@ static void test_map_stress(void) #define MAX_DELAY_US 50000 #define MIN_DELAY_RANGE_US 5000 -static bool retry_for_again_or_busy(int err) +static bool can_retry(int err) { - return (err == EAGAIN || err == EBUSY); + return (err == EAGAIN || err == EBUSY || + (err == ENOMEM && map_opts.map_flags == BPF_F_NO_PREALLOC)); } int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, @@ -1451,12 +1452,12 @@ static void test_update_delete(unsigned int fn, void *data) if (do_update) { err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES, - retry_for_again_or_busy); + can_retry); if (err) printf("error %d %d\n", err, errno); assert(err == 0); err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES, - retry_for_again_or_busy); + can_retry); if (err) printf("error %d %d\n", err, errno); assert(err == 0); diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index c9e745d49493..309d9d4a8ace 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -88,7 +88,9 @@ static void stdio_hijack(char **log_buf, size_t *log_cnt) #endif } -static void stdio_restore_cleanup(void) +static pthread_mutex_t stdout_lock = PTHREAD_MUTEX_INITIALIZER; + +static void stdio_restore(void) { #ifdef __GLIBC__ if (verbose() && env.worker_id == -1) { @@ -98,34 +100,33 @@ static void stdio_restore_cleanup(void) fflush(stdout); + pthread_mutex_lock(&stdout_lock); + if (env.subtest_state) { - fclose(env.subtest_state->stdout_saved); + if (env.subtest_state->stdout_saved) + fclose(env.subtest_state->stdout_saved); env.subtest_state->stdout_saved = NULL; stdout = env.test_state->stdout_saved; stderr = env.test_state->stdout_saved; } else { - fclose(env.test_state->stdout_saved); + if (env.test_state->stdout_saved) + fclose(env.test_state->stdout_saved); env.test_state->stdout_saved = NULL; + stdout = env.stdout_saved; + stderr = env.stderr_saved; } + + pthread_mutex_unlock(&stdout_lock); #endif } -static void stdio_restore(void) +static int traffic_monitor_print_fn(const char *format, va_list args) { -#ifdef __GLIBC__ - if (verbose() && env.worker_id == -1) { - /* nothing to do, output to stdout by default */ - return; - } - - if (stdout == env.stdout_saved) - return; - - stdio_restore_cleanup(); + pthread_mutex_lock(&stdout_lock); + vfprintf(stdout, format, args); + pthread_mutex_unlock(&stdout_lock); - stdout = env.stdout_saved; - stderr = env.stderr_saved; -#endif + return 0; } /* Adapted from perf/util/string.c */ @@ -474,8 +475,6 @@ static void dump_test_log(const struct prog_test_def *test, print_test_result(test, test_state); } -static void stdio_restore(void); - /* A bunch of tests set custom affinity per-thread and/or per-process. Reset * it after each test/sub-test. */ @@ -490,13 +489,11 @@ static void reset_affinity(void) err = sched_setaffinity(0, sizeof(cpuset), &cpuset); if (err < 0) { - stdio_restore(); fprintf(stderr, "Failed to reset process affinity: %d!\n", err); exit(EXIT_ERR_SETUP_INFRA); } err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset); if (err < 0) { - stdio_restore(); fprintf(stderr, "Failed to reset thread affinity: %d!\n", err); exit(EXIT_ERR_SETUP_INFRA); } @@ -514,7 +511,6 @@ static void save_netns(void) static void restore_netns(void) { if (setns(env.saved_netns_fd, CLONE_NEWNET) == -1) { - stdio_restore(); perror("setns(CLONE_NEWNS)"); exit(EXIT_ERR_SETUP_INFRA); } @@ -541,7 +537,8 @@ void test__end_subtest(void) test_result(subtest_state->error_cnt, subtest_state->skipped)); - stdio_restore_cleanup(); + stdio_restore(); + env.subtest_state = NULL; } @@ -1270,8 +1267,10 @@ void crash_handler(int signum) sz = backtrace(bt, ARRAY_SIZE(bt)); - if (env.stdout_saved) - stdio_restore(); + fflush(stdout); + stdout = env.stdout_saved; + stderr = env.stderr_saved; + if (env.test) { env.test_state->error_cnt++; dump_test_log(env.test, env.test_state, true, false, NULL); @@ -1365,10 +1364,19 @@ static int recv_message(int sock, struct msg *msg) return ret; } +static bool ns_is_needed(const char *test_name) +{ + if (strlen(test_name) < 3) + return false; + + return !strncmp(test_name, "ns_", 3); +} + static void run_one_test(int test_num) { struct prog_test_def *test = &prog_test_defs[test_num]; struct test_state *state = &test_states[test_num]; + struct netns_obj *ns = NULL; env.test = test; env.test_state = state; @@ -1376,10 +1384,13 @@ static void run_one_test(int test_num) stdio_hijack(&state->log_buf, &state->log_cnt); watchdog_start(); + if (ns_is_needed(test->test_name)) + ns = netns_new(test->test_name, true); if (test->run_test) test->run_test(); else if (test->run_serial_test) test->run_serial_test(); + netns_free(ns); watchdog_stop(); /* ensure last sub-test is finalized properly */ @@ -1388,6 +1399,8 @@ static void run_one_test(int test_num) state->tested = true; + stdio_restore(); + if (verbose() && env.worker_id == -1) print_test_result(test, state); @@ -1396,7 +1409,6 @@ static void run_one_test(int test_num) if (test->need_cgroup_cleanup) cleanup_cgroup_environment(); - stdio_restore(); free(stop_libbpf_log_capture()); dump_test_log(test, state, false, false, NULL); @@ -1931,6 +1943,9 @@ int main(int argc, char **argv) sigaction(SIGSEGV, &sigact, NULL); + env.stdout_saved = stdout; + env.stderr_saved = stderr; + env.secs_till_notify = 10; env.secs_till_kill = 120; err = argp_parse(&argp, argc, argv, 0, NULL, &env); @@ -1947,6 +1962,8 @@ int main(int argc, char **argv) libbpf_set_strict_mode(LIBBPF_STRICT_ALL); libbpf_set_print(libbpf_print_fn); + traffic_monitor_set_print(traffic_monitor_print_fn); + srand(time(NULL)); env.jit_enabled = is_jit_enabled(); @@ -1957,9 +1974,6 @@ int main(int argc, char **argv) return -1; } - env.stdout_saved = stdout; - env.stderr_saved = stderr; - env.has_testmod = true; if (!env.list_test_names) { /* ensure previous instance of the module is unloaded */ diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 404d0d4915d5..870694f2a359 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -427,6 +427,14 @@ void hexdump(const char *prefix, const void *buf, size_t len); goto goto_label; \ }) +#define SYS_FAIL(goto_label, fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_NEQ(0, system(cmd), cmd)) \ + goto goto_label; \ + }) + #define ALL_TO_DEV_NULL " >/dev/null 2>&1" #define SYS_NOFAIL(fmt, ...) \ diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh deleted file mode 100755 index d9661b9988ba..000000000000 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ /dev/null @@ -1,645 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -# End-to-end eBPF tunnel test suite -# The script tests BPF network tunnel implementation. -# -# Topology: -# --------- -# root namespace | at_ns0 namespace -# | -# ----------- | ----------- -# | tnl dev | | | tnl dev | (overlay network) -# ----------- | ----------- -# metadata-mode | native-mode -# with bpf | -# | -# ---------- | ---------- -# | veth1 | --------- | veth0 | (underlay network) -# ---------- peer ---------- -# -# -# Device Configuration -# -------------------- -# Root namespace with metadata-mode tunnel + BPF -# Device names and addresses: -# veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay) -# tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay) -# -# Namespace at_ns0 with native tunnel -# Device names and addresses: -# veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay) -# tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay) -# -# -# End-to-end ping packet flow -# --------------------------- -# Most of the tests start by namespace creation, device configuration, -# then ping the underlay and overlay network. When doing 'ping 10.1.1.100' -# from root namespace, the following operations happen: -# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev. -# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata, -# with remote_ip=172.16.1.100 and others. -# 3) Outer tunnel header is prepended and route the packet to veth1's egress -# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0 -# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet -# 6) Forward the packet to the overlay tnl dev - -BPF_FILE="test_tunnel_kern.bpf.o" -BPF_PIN_TUNNEL_DIR="/sys/fs/bpf/tc/tunnel" -PING_ARG="-c 3 -w 10 -q" -ret=0 -GREEN='\033[0;92m' -RED='\033[0;31m' -NC='\033[0m' # No Color - -config_device() -{ - ip netns add at_ns0 - ip link add veth0 type veth peer name veth1 - ip link set veth0 netns at_ns0 - ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 - ip netns exec at_ns0 ip link set dev veth0 up - ip link set dev veth1 up mtu 1500 - ip addr add dev veth1 172.16.1.200/24 -} - -add_gre_tunnel() -{ - tun_key= - if [ -n "$1" ]; then - tun_key="key $1" - fi - - # at_ns0 namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq $tun_key \ - local 172.16.1.100 remote 172.16.1.200 - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - - # root namespace - ip link add dev $DEV type $TYPE $tun_key external - ip link set dev $DEV up - ip addr add dev $DEV 10.1.1.200/24 -} - -add_ip6gretap_tunnel() -{ - - # assign ipv6 address - ip netns exec at_ns0 ip addr add ::11/96 dev veth0 - ip netns exec at_ns0 ip link set dev veth0 up - ip addr add dev veth1 ::22/96 - ip link set dev veth1 up - - # at_ns0 namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \ - local ::11 remote ::22 - - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96 - ip netns exec at_ns0 ip link set dev $DEV_NS up - - # root namespace - ip link add dev $DEV type $TYPE external - ip addr add dev $DEV 10.1.1.200/24 - ip addr add dev $DEV fc80::200/24 - ip link set dev $DEV up -} - -add_erspan_tunnel() -{ - # at_ns0 namespace - if [ "$1" == "v1" ]; then - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ - local 172.16.1.100 remote 172.16.1.200 \ - erspan_ver 1 erspan 123 - else - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ - local 172.16.1.100 remote 172.16.1.200 \ - erspan_ver 2 erspan_dir egress erspan_hwid 3 - fi - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - - # root namespace - ip link add dev $DEV type $TYPE external - ip link set dev $DEV up - ip addr add dev $DEV 10.1.1.200/24 -} - -add_ip6erspan_tunnel() -{ - - # assign ipv6 address - ip netns exec at_ns0 ip addr add ::11/96 dev veth0 - ip netns exec at_ns0 ip link set dev veth0 up - ip addr add dev veth1 ::22/96 - ip link set dev veth1 up - - # at_ns0 namespace - if [ "$1" == "v1" ]; then - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ - local ::11 remote ::22 \ - erspan_ver 1 erspan 123 - else - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ - local ::11 remote ::22 \ - erspan_ver 2 erspan_dir egress erspan_hwid 7 - fi - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 ip link set dev $DEV_NS up - - # root namespace - ip link add dev $DEV type $TYPE external - ip addr add dev $DEV 10.1.1.200/24 - ip link set dev $DEV up -} - -add_geneve_tunnel() -{ - # at_ns0 namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE \ - id 2 dstport 6081 remote 172.16.1.200 - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - - # root namespace - ip link add dev $DEV type $TYPE dstport 6081 external - ip link set dev $DEV up - ip addr add dev $DEV 10.1.1.200/24 -} - -add_ip6geneve_tunnel() -{ - ip netns exec at_ns0 ip addr add ::11/96 dev veth0 - ip netns exec at_ns0 ip link set dev veth0 up - ip addr add dev veth1 ::22/96 - ip link set dev veth1 up - - # at_ns0 namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE id 22 \ - remote ::22 # geneve has no local option - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 ip link set dev $DEV_NS up - - # root namespace - ip link add dev $DEV type $TYPE external - ip addr add dev $DEV 10.1.1.200/24 - ip link set dev $DEV up -} - -add_ipip_tunnel() -{ - # at_ns0 namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE \ - local 172.16.1.100 remote 172.16.1.200 - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - - # root namespace - ip link add dev $DEV type $TYPE external - ip link set dev $DEV up - ip addr add dev $DEV 10.1.1.200/24 -} - -add_ip6tnl_tunnel() -{ - ip netns exec at_ns0 ip addr add ::11/96 dev veth0 - ip netns exec at_ns0 ip link set dev veth0 up - ip addr add dev veth1 ::22/96 - ip link set dev veth1 up - - # at_ns0 namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE \ - local ::11 remote ::22 - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 ip addr add dev $DEV_NS 1::11/96 - ip netns exec at_ns0 ip link set dev $DEV_NS up - - # root namespace - ip link add dev $DEV type $TYPE external - ip addr add dev $DEV 10.1.1.200/24 - ip addr add dev $DEV 1::22/96 - ip link set dev $DEV up -} - -test_gre() -{ - TYPE=gretap - DEV_NS=gretap00 - DEV=gretap11 - ret=0 - - check $TYPE - config_device - add_gre_tunnel 2 - attach_bpf $DEV gre_set_tunnel gre_get_tunnel - ping $PING_ARG 10.1.1.100 - check_err $? - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_gre_no_tunnel_key() -{ - TYPE=gre - DEV_NS=gre00 - DEV=gre11 - ret=0 - - check $TYPE - config_device - add_gre_tunnel - attach_bpf $DEV gre_set_tunnel_no_key gre_get_tunnel - ping $PING_ARG 10.1.1.100 - check_err $? - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_ip6gre() -{ - TYPE=ip6gre - DEV_NS=ip6gre00 - DEV=ip6gre11 - ret=0 - - check $TYPE - config_device - # reuse the ip6gretap function - add_ip6gretap_tunnel - attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel - # underlay - ping6 $PING_ARG ::11 - # overlay: ipv4 over ipv6 - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - ping $PING_ARG 10.1.1.100 - check_err $? - # overlay: ipv6 over ipv6 - ip netns exec at_ns0 ping6 $PING_ARG fc80::200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_ip6gretap() -{ - TYPE=ip6gretap - DEV_NS=ip6gretap00 - DEV=ip6gretap11 - ret=0 - - check $TYPE - config_device - add_ip6gretap_tunnel - attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel - # underlay - ping6 $PING_ARG ::11 - # overlay: ipv4 over ipv6 - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - ping $PING_ARG 10.1.1.100 - check_err $? - # overlay: ipv6 over ipv6 - ip netns exec at_ns0 ping6 $PING_ARG fc80::200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_erspan() -{ - TYPE=erspan - DEV_NS=erspan00 - DEV=erspan11 - ret=0 - - check $TYPE - config_device - add_erspan_tunnel $1 - attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel - ping $PING_ARG 10.1.1.100 - check_err $? - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_ip6erspan() -{ - TYPE=ip6erspan - DEV_NS=ip6erspan00 - DEV=ip6erspan11 - ret=0 - - check $TYPE - config_device - add_ip6erspan_tunnel $1 - attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel - ping6 $PING_ARG ::11 - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_geneve() -{ - TYPE=geneve - DEV_NS=geneve00 - DEV=geneve11 - ret=0 - - check $TYPE - config_device - add_geneve_tunnel - attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel - ping $PING_ARG 10.1.1.100 - check_err $? - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_ip6geneve() -{ - TYPE=geneve - DEV_NS=ip6geneve00 - DEV=ip6geneve11 - ret=0 - - check $TYPE - config_device - add_ip6geneve_tunnel - attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel - ping $PING_ARG 10.1.1.100 - check_err $? - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: ip6$TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: ip6$TYPE"${NC} -} - -test_ipip() -{ - TYPE=ipip - DEV_NS=ipip00 - DEV=ipip11 - ret=0 - - check $TYPE - config_device - add_ipip_tunnel - ip link set dev veth1 mtu 1500 - attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel - ping $PING_ARG 10.1.1.100 - check_err $? - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_ipip6() -{ - TYPE=ip6tnl - DEV_NS=ipip6tnl00 - DEV=ipip6tnl11 - ret=0 - - check $TYPE - config_device - add_ip6tnl_tunnel - ip link set dev veth1 mtu 1500 - attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel - # underlay - ping6 $PING_ARG ::11 - # ip4 over ip6 - ping $PING_ARG 10.1.1.100 - check_err $? - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_ip6ip6() -{ - TYPE=ip6tnl - DEV_NS=ip6ip6tnl00 - DEV=ip6ip6tnl11 - ret=0 - - check $TYPE - config_device - add_ip6tnl_tunnel - ip link set dev veth1 mtu 1500 - attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel - # underlay - ping6 $PING_ARG ::11 - # ip6 over ip6 - ping6 $PING_ARG 1::11 - check_err $? - ip netns exec at_ns0 ping6 $PING_ARG 1::22 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: ip6$TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: ip6$TYPE"${NC} -} - -attach_bpf() -{ - DEV=$1 - SET=$2 - GET=$3 - mkdir -p ${BPF_PIN_TUNNEL_DIR} - bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR}/ - tc qdisc add dev $DEV clsact - tc filter add dev $DEV egress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$SET - tc filter add dev $DEV ingress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$GET -} - -cleanup() -{ - rm -rf ${BPF_PIN_TUNNEL_DIR} - - ip netns delete at_ns0 2> /dev/null - ip link del veth1 2> /dev/null - ip link del ipip11 2> /dev/null - ip link del ipip6tnl11 2> /dev/null - ip link del ip6ip6tnl11 2> /dev/null - ip link del gretap11 2> /dev/null - ip link del gre11 2> /dev/null - ip link del ip6gre11 2> /dev/null - ip link del ip6gretap11 2> /dev/null - ip link del geneve11 2> /dev/null - ip link del ip6geneve11 2> /dev/null - ip link del erspan11 2> /dev/null - ip link del ip6erspan11 2> /dev/null -} - -cleanup_exit() -{ - echo "CATCH SIGKILL or SIGINT, cleanup and exit" - cleanup - exit 0 -} - -check() -{ - ip link help 2>&1 | grep -q "\s$1\s" - if [ $? -ne 0 ];then - echo "SKIP $1: iproute2 not support" - cleanup - return 1 - fi -} - -enable_debug() -{ - echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control - echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control - echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control - echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control -} - -check_err() -{ - if [ $ret -eq 0 ]; then - ret=$1 - fi -} - -bpf_tunnel_test() -{ - local errors=0 - - echo "Testing GRE tunnel..." - test_gre - errors=$(( $errors + $? )) - - echo "Testing GRE tunnel (without tunnel keys)..." - test_gre_no_tunnel_key - errors=$(( $errors + $? )) - - echo "Testing IP6GRE tunnel..." - test_ip6gre - errors=$(( $errors + $? )) - - echo "Testing IP6GRETAP tunnel..." - test_ip6gretap - errors=$(( $errors + $? )) - - echo "Testing ERSPAN tunnel..." - test_erspan v2 - errors=$(( $errors + $? )) - - echo "Testing IP6ERSPAN tunnel..." - test_ip6erspan v2 - errors=$(( $errors + $? )) - - echo "Testing GENEVE tunnel..." - test_geneve - errors=$(( $errors + $? )) - - echo "Testing IP6GENEVE tunnel..." - test_ip6geneve - errors=$(( $errors + $? )) - - echo "Testing IPIP tunnel..." - test_ipip - errors=$(( $errors + $? )) - - echo "Testing IPIP6 tunnel..." - test_ipip6 - errors=$(( $errors + $? )) - - echo "Testing IP6IP6 tunnel..." - test_ip6ip6 - errors=$(( $errors + $? )) - - return $errors -} - -trap cleanup 0 3 6 -trap cleanup_exit 2 9 - -cleanup -bpf_tunnel_test - -if [ $? -ne 0 ]; then - echo -e "$(basename $0): ${RED}FAIL${NC}" - exit 1 -fi -echo -e "$(basename $0): ${GREEN}PASS${NC}" -exit 0 diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh deleted file mode 100755 index 4c3c3fdd2d73..000000000000 --- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh +++ /dev/null @@ -1,214 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# Test topology: -# - - - - - - - - - - - - - - - - - - - -# | veth1 veth2 veth3 | ns0 -# - -| - - - - - - | - - - - - - | - - -# --------- --------- --------- -# | veth0 | | veth0 | | veth0 | -# --------- --------- --------- -# ns1 ns2 ns3 -# -# Test modules: -# XDP modes: generic, native, native + egress_prog -# -# Test cases: -# ARP: Testing BPF_F_BROADCAST, the ingress interface also should receive -# the redirects. -# ns1 -> gw: ns1, ns2, ns3, should receive the arp request -# IPv4: Testing BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, the ingress -# interface should not receive the redirects. -# ns1 -> gw: ns1 should not receive, ns2, ns3 should receive redirects. -# IPv6: Testing none flag, all the pkts should be redirected back -# ping test: ns1 -> ns2 (block), echo requests will be redirect back -# egress_prog: -# all src mac should be egress interface's mac - -# netns numbers -NUM=3 -IFACES="" -DRV_MODE="xdpgeneric xdpdrv xdpegress" -PASS=0 -FAIL=0 -LOG_DIR=$(mktemp -d) -declare -a NS -NS[0]="ns0-$(mktemp -u XXXXXX)" -NS[1]="ns1-$(mktemp -u XXXXXX)" -NS[2]="ns2-$(mktemp -u XXXXXX)" -NS[3]="ns3-$(mktemp -u XXXXXX)" - -test_pass() -{ - echo "Pass: $@" - PASS=$((PASS + 1)) -} - -test_fail() -{ - echo "fail: $@" - FAIL=$((FAIL + 1)) -} - -clean_up() -{ - for i in $(seq 0 $NUM); do - ip netns del ${NS[$i]} 2> /dev/null - done -} - -# Kselftest framework requirement - SKIP code is 4. -check_env() -{ - ip link set dev lo xdpgeneric off &>/dev/null - if [ $? -ne 0 ];then - echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support" - exit 4 - fi - - which tcpdump &>/dev/null - if [ $? -ne 0 ];then - echo "selftests: [SKIP] Could not run test without tcpdump" - exit 4 - fi -} - -setup_ns() -{ - local mode=$1 - IFACES="" - - if [ "$mode" = "xdpegress" ]; then - mode="xdpdrv" - fi - - ip netns add ${NS[0]} - for i in $(seq $NUM); do - ip netns add ${NS[$i]} - ip -n ${NS[$i]} link add veth0 type veth peer name veth$i netns ${NS[0]} - ip -n ${NS[$i]} link set veth0 up - ip -n ${NS[0]} link set veth$i up - - ip -n ${NS[$i]} addr add 192.0.2.$i/24 dev veth0 - ip -n ${NS[$i]} addr add 2001:db8::$i/64 dev veth0 - # Add a neigh entry for IPv4 ping test - ip -n ${NS[$i]} neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0 - ip -n ${NS[$i]} link set veth0 $mode obj \ - xdp_dummy.bpf.o sec xdp &> /dev/null || \ - { test_fail "Unable to load dummy xdp" && exit 1; } - IFACES="$IFACES veth$i" - veth_mac[$i]=$(ip -n ${NS[0]} link show veth$i | awk '/link\/ether/ {print $2}') - done -} - -do_egress_tests() -{ - local mode=$1 - - # mac test - ip netns exec ${NS[2]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log & - ip netns exec ${NS[3]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log & - sleep 0.5 - ip netns exec ${NS[1]} ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null - sleep 0.5 - pkill tcpdump - - # mac check - grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-2_${mode}.log && \ - test_pass "$mode mac ns1-2" || test_fail "$mode mac ns1-2" - grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-3_${mode}.log && \ - test_pass "$mode mac ns1-3" || test_fail "$mode mac ns1-3" -} - -do_ping_tests() -{ - local mode=$1 - - # ping6 test: echo request should be redirect back to itself, not others - ip netns exec ${NS[1]} ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02 - - ip netns exec ${NS[1]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log & - ip netns exec ${NS[2]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log & - ip netns exec ${NS[3]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log & - sleep 0.5 - # ARP test - ip netns exec ${NS[1]} arping -q -c 2 -I veth0 192.0.2.254 - # IPv4 test - ip netns exec ${NS[1]} ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null - # IPv6 test - ip netns exec ${NS[1]} ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null - sleep 0.5 - pkill tcpdump - - # All netns should receive the redirect arp requests - [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \ - test_pass "$mode arp(F_BROADCAST) ns1-1" || \ - test_fail "$mode arp(F_BROADCAST) ns1-1" - [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-2_${mode}.log) -eq 2 ] && \ - test_pass "$mode arp(F_BROADCAST) ns1-2" || \ - test_fail "$mode arp(F_BROADCAST) ns1-2" - [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-3_${mode}.log) -eq 2 ] && \ - test_pass "$mode arp(F_BROADCAST) ns1-3" || \ - test_fail "$mode arp(F_BROADCAST) ns1-3" - - # ns1 should not receive the redirect echo request, others should - [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \ - test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" || \ - test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" - [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-2_${mode}.log) -eq 4 ] && \ - test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" || \ - test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" - [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-3_${mode}.log) -eq 4 ] && \ - test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" || \ - test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" - - # ns1 should receive the echo request, ns2 should not - [ $(grep -c "ICMP6, echo request" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \ - test_pass "$mode IPv6 (no flags) ns1-1" || \ - test_fail "$mode IPv6 (no flags) ns1-1" - [ $(grep -c "ICMP6, echo request" ${LOG_DIR}/ns1-2_${mode}.log) -eq 0 ] && \ - test_pass "$mode IPv6 (no flags) ns1-2" || \ - test_fail "$mode IPv6 (no flags) ns1-2" -} - -do_tests() -{ - local mode=$1 - local drv_p - - case ${mode} in - xdpdrv) drv_p="-N";; - xdpegress) drv_p="-X";; - xdpgeneric) drv_p="-S";; - esac - - ip netns exec ${NS[0]} ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log & - xdp_pid=$! - sleep 1 - if ! ps -p $xdp_pid > /dev/null; then - test_fail "$mode xdp_redirect_multi start failed" - return 1 - fi - - if [ "$mode" = "xdpegress" ]; then - do_egress_tests $mode - else - do_ping_tests $mode - fi - - kill $xdp_pid -} - -check_env - -trap clean_up EXIT - -for mode in ${DRV_MODE}; do - setup_ns $mode - do_tests $mode - clean_up -done -rm -rf ${LOG_DIR} - -echo "Summary: PASS $PASS, FAIL $FAIL" -[ $FAIL -eq 0 ] && exit 0 || exit 1 diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh deleted file mode 100755 index fbcaa9f0120b..000000000000 --- a/tools/testing/selftests/bpf/test_xdp_vlan.sh +++ /dev/null @@ -1,233 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# Author: Jesper Dangaard Brouer <hawk@kernel.org> - -# Kselftest framework requirement - SKIP code is 4. -readonly KSFT_SKIP=4 -readonly NS1="ns1-$(mktemp -u XXXXXX)" -readonly NS2="ns2-$(mktemp -u XXXXXX)" - -# Allow wrapper scripts to name test -if [ -z "$TESTNAME" ]; then - TESTNAME=xdp_vlan -fi - -# Default XDP mode -XDP_MODE=xdpgeneric - -usage() { - echo "Testing XDP + TC eBPF VLAN manipulations: $TESTNAME" - echo "" - echo "Usage: $0 [-vfh]" - echo " -v | --verbose : Verbose" - echo " --flush : Flush before starting (e.g. after --interactive)" - echo " --interactive : Keep netns setup running after test-run" - echo " --mode=XXX : Choose XDP mode (xdp | xdpgeneric | xdpdrv)" - echo "" -} - -valid_xdp_mode() -{ - local mode=$1 - - case "$mode" in - xdpgeneric | xdpdrv | xdp) - return 0 - ;; - *) - return 1 - esac -} - -cleanup() -{ - local status=$? - - if [ "$status" = "0" ]; then - echo "selftests: $TESTNAME [PASS]"; - else - echo "selftests: $TESTNAME [FAILED]"; - fi - - if [ -n "$INTERACTIVE" ]; then - echo "Namespace setup still active explore with:" - echo " ip netns exec ${NS1} bash" - echo " ip netns exec ${NS2} bash" - exit $status - fi - - set +e - ip link del veth1 2> /dev/null - ip netns del ${NS1} 2> /dev/null - ip netns del ${NS2} 2> /dev/null -} - -# Using external program "getopt" to get --long-options -OPTIONS=$(getopt -o hvfi: \ - --long verbose,flush,help,interactive,debug,mode: -- "$@") -if (( $? != 0 )); then - usage - echo "selftests: $TESTNAME [FAILED] Error calling getopt, unknown option?" - exit 2 -fi -eval set -- "$OPTIONS" - -## --- Parse command line arguments / parameters --- -while true; do - case "$1" in - -v | --verbose) - export VERBOSE=yes - shift - ;; - -i | --interactive | --debug ) - INTERACTIVE=yes - shift - ;; - -f | --flush ) - cleanup - shift - ;; - --mode ) - shift - XDP_MODE=$1 - shift - ;; - -- ) - shift - break - ;; - -h | --help ) - usage; - echo "selftests: $TESTNAME [SKIP] usage help info requested" - exit $KSFT_SKIP - ;; - * ) - shift - break - ;; - esac -done - -if [ "$EUID" -ne 0 ]; then - echo "selftests: $TESTNAME [FAILED] need root privileges" - exit 1 -fi - -valid_xdp_mode $XDP_MODE -if [ $? -ne 0 ]; then - echo "selftests: $TESTNAME [FAILED] unknown XDP mode ($XDP_MODE)" - exit 1 -fi - -ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null -if [ $? -ne 0 ]; then - echo "selftests: $TESTNAME [SKIP] need ip xdp support" - exit $KSFT_SKIP -fi - -# Interactive mode likely require us to cleanup netns -if [ -n "$INTERACTIVE" ]; then - ip link del veth1 2> /dev/null - ip netns del ${NS1} 2> /dev/null - ip netns del ${NS2} 2> /dev/null -fi - -# Exit on failure -set -e - -# Some shell-tools dependencies -which ip > /dev/null -which tc > /dev/null -which ethtool > /dev/null - -# Make rest of shell verbose, showing comments as doc/info -if [ -n "$VERBOSE" ]; then - set -v -fi - -# Create two namespaces -ip netns add ${NS1} -ip netns add ${NS2} - -# Run cleanup if failing or on kill -trap cleanup 0 2 3 6 9 - -# Create veth pair -ip link add veth1 type veth peer name veth2 - -# Move veth1 and veth2 into the respective namespaces -ip link set veth1 netns ${NS1} -ip link set veth2 netns ${NS2} - -# NOTICE: XDP require VLAN header inside packet payload -# - Thus, disable VLAN offloading driver features -# - For veth REMEMBER TX side VLAN-offload -# -# Disable rx-vlan-offload (mostly needed on ns1) -ip netns exec ${NS1} ethtool -K veth1 rxvlan off -ip netns exec ${NS2} ethtool -K veth2 rxvlan off -# -# Disable tx-vlan-offload (mostly needed on ns2) -ip netns exec ${NS2} ethtool -K veth2 txvlan off -ip netns exec ${NS1} ethtool -K veth1 txvlan off - -export IPADDR1=100.64.41.1 -export IPADDR2=100.64.41.2 - -# In ns1/veth1 add IP-addr on plain net_device -ip netns exec ${NS1} ip addr add ${IPADDR1}/24 dev veth1 -ip netns exec ${NS1} ip link set veth1 up - -# In ns2/veth2 create VLAN device -export VLAN=4011 -export DEVNS2=veth2 -ip netns exec ${NS2} ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN -ip netns exec ${NS2} ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN -ip netns exec ${NS2} ip link set $DEVNS2 up -ip netns exec ${NS2} ip link set $DEVNS2.$VLAN up - -# Bringup lo in netns (to avoids confusing people using --interactive) -ip netns exec ${NS1} ip link set lo up -ip netns exec ${NS2} ip link set lo up - -# At this point, the hosts cannot reach each-other, -# because ns2 are using VLAN tags on the packets. - -ip netns exec ${NS2} sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First ping must fail"' - - -# Now we can use the test_xdp_vlan.c program to pop/push these VLAN tags -# ---------------------------------------------------------------------- -# In ns1: ingress use XDP to remove VLAN tags -export DEVNS1=veth1 -export BPF_FILE=test_xdp_vlan.bpf.o - -# First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change" -export XDP_PROG=xdp_vlan_change -ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG - -# In ns1: egress use TC to add back VLAN tag 4011 -# (del cmd) -# tc qdisc del dev $DEVNS1 clsact 2> /dev/null -# -ip netns exec ${NS1} tc qdisc add dev $DEVNS1 clsact -ip netns exec ${NS1} tc filter add dev $DEVNS1 egress \ - prio 1 handle 1 bpf da obj $BPF_FILE sec tc_vlan_push - -# Now the namespaces can reach each-other, test with ping: -ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1 -ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2 - -# Second test: Replace xdp prog, that fully remove vlan header -# -# Catch kernel bug for generic-XDP, that does didn't allow us to -# remove a VLAN header, because skb->protocol still contain VLAN -# ETH_P_8021Q indication, and this cause overwriting of our changes. -# -export XDP_PROG=xdp_vlan_remove_outer2 -ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE off -ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG - -# Now the namespaces should still be able reach each-other, test with ping: -ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1 -ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2 diff --git a/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh b/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh deleted file mode 100755 index c515326d6d59..000000000000 --- a/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -# Exit on failure -set -e - -# Wrapper script to test generic-XDP -export TESTNAME=xdp_vlan_mode_generic -./test_xdp_vlan.sh --mode=xdpgeneric diff --git a/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh b/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh deleted file mode 100755 index 5cf7ce1f16c1..000000000000 --- a/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -# Exit on failure -set -e - -# Wrapper script to test native-XDP -export TESTNAME=xdp_vlan_mode_native -./test_xdp_vlan.sh --mode=xdpdrv diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 06af5029885b..a18972ffdeb6 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -3,6 +3,7 @@ #define _GNU_SOURCE #include <argp.h> #include <libgen.h> +#include <ctype.h> #include <string.h> #include <stdlib.h> #include <sched.h> @@ -154,6 +155,16 @@ struct filter { bool abs; }; +struct var_preset { + char *name; + enum { INTEGRAL, ENUMERATOR } type; + union { + long long ivalue; + char *svalue; + }; + bool applied; +}; + static struct env { char **filenames; int filename_cnt; @@ -195,6 +206,8 @@ static struct env { int progs_processed; int progs_skipped; int top_src_lines; + struct var_preset *presets; + int npresets; } env; static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) @@ -246,16 +259,20 @@ static const struct argp_option opts[] = { { "test-reg-invariants", 'r', NULL, 0, "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" }, { "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" }, + { "set-global-vars", 'G', "GLOBAL", 0, "Set global variables provided in the expression, for example \"var1 = 1\"" }, {}, }; static int parse_stats(const char *stats_str, struct stat_specs *specs); static int append_filter(struct filter **filters, int *cnt, const char *str); static int append_filter_file(const char *path); +static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr); +static int append_var_preset_file(const char *filename); +static int append_file(const char *path); +static int append_file_from_file(const char *path); static error_t parse_arg(int key, char *arg, struct argp_state *state) { - void *tmp; int err; switch (key) { @@ -353,15 +370,26 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) argp_usage(state); } break; + case 'G': { + if (arg[0] == '@') + err = append_var_preset_file(arg + 1); + else + err = append_var_preset(&env.presets, &env.npresets, arg); + if (err) { + fprintf(stderr, "Failed to parse global variable presets: %s\n", arg); + return err; + } + break; + } case ARGP_KEY_ARG: - tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames)); - if (!tmp) - return -ENOMEM; - env.filenames = tmp; - env.filenames[env.filename_cnt] = strdup(arg); - if (!env.filenames[env.filename_cnt]) - return -ENOMEM; - env.filename_cnt++; + if (arg[0] == '@') + err = append_file_from_file(arg + 1); + else + err = append_file(arg); + if (err) { + fprintf(stderr, "Failed to collect BPF object files: %d\n", err); + return err; + } break; default: return ARGP_ERR_UNKNOWN; @@ -632,7 +660,7 @@ static int append_filter_file(const char *path) f = fopen(path, "r"); if (!f) { err = -errno; - fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err); + fprintf(stderr, "Failed to open filters in '%s': %s\n", path, strerror(-err)); return err; } @@ -662,6 +690,49 @@ static const struct stat_specs default_output_spec = { }, }; +static int append_file(const char *path) +{ + void *tmp; + + tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames)); + if (!tmp) + return -ENOMEM; + env.filenames = tmp; + env.filenames[env.filename_cnt] = strdup(path); + if (!env.filenames[env.filename_cnt]) + return -ENOMEM; + env.filename_cnt++; + return 0; +} + +static int append_file_from_file(const char *path) +{ + char buf[1024]; + int err = 0; + FILE *f; + + f = fopen(path, "r"); + if (!f) { + err = -errno; + fprintf(stderr, "Failed to open object files list in '%s': %s\n", + path, strerror(errno)); + return err; + } + + while (fscanf(f, " %1023[^\n]\n", buf) == 1) { + /* lines starting with # are comments, skip them */ + if (buf[0] == '\0' || buf[0] == '#') + continue; + err = append_file(buf); + if (err) + goto cleanup; + } + +cleanup: + fclose(f); + return err; +} + static const struct stat_specs default_csv_output_spec = { .spec_cnt = 14, .ids = { @@ -1163,13 +1234,13 @@ static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const ch bpf_program__set_expected_attach_type(prog, attach_type); if (!env.quiet) { - printf("Using guessed program type '%s' for %s/%s...\n", + fprintf(stderr, "Using guessed program type '%s' for %s/%s...\n", libbpf_bpf_prog_type_str(prog_type), filename, prog_name); } } else { if (!env.quiet) { - printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n", + fprintf(stderr, "Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n", ctx_name, filename, prog_name); } } @@ -1292,6 +1363,261 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf return 0; }; +static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr) +{ + void *tmp; + struct var_preset *cur; + char var[256], val[256], *val_end; + long long value; + int n; + + tmp = realloc(*presets, (*cnt + 1) * sizeof(**presets)); + if (!tmp) + return -ENOMEM; + *presets = tmp; + cur = &(*presets)[*cnt]; + memset(cur, 0, sizeof(*cur)); + (*cnt)++; + + if (sscanf(expr, "%s = %s %n", var, val, &n) != 2 || n != strlen(expr)) { + fprintf(stderr, "Failed to parse expression '%s'\n", expr); + return -EINVAL; + } + + if (val[0] == '-' || isdigit(val[0])) { + /* must be a number */ + errno = 0; + value = strtoll(val, &val_end, 0); + if (errno == ERANGE) { + errno = 0; + value = strtoull(val, &val_end, 0); + } + if (errno || *val_end != '\0') { + fprintf(stderr, "Failed to parse value '%s'\n", val); + return -EINVAL; + } + cur->ivalue = value; + cur->type = INTEGRAL; + } else { + /* if not a number, consider it enum value */ + cur->svalue = strdup(val); + if (!cur->svalue) + return -ENOMEM; + cur->type = ENUMERATOR; + } + + cur->name = strdup(var); + if (!cur->name) + return -ENOMEM; + + return 0; +} + +static int append_var_preset_file(const char *filename) +{ + char buf[1024]; + FILE *f; + int err = 0; + + f = fopen(filename, "rt"); + if (!f) { + err = -errno; + fprintf(stderr, "Failed to open presets in '%s': %s\n", filename, strerror(-err)); + return -EINVAL; + } + + while (fscanf(f, " %1023[^\n]\n", buf) == 1) { + if (buf[0] == '\0' || buf[0] == '#') + continue; + + err = append_var_preset(&env.presets, &env.npresets, buf); + if (err) + goto cleanup; + } + +cleanup: + fclose(f); + return err; +} + +static bool is_signed_type(const struct btf_type *t) +{ + if (btf_is_int(t)) + return btf_int_encoding(t) & BTF_INT_SIGNED; + if (btf_is_any_enum(t)) + return btf_kflag(t); + return true; +} + +static int enum_value_from_name(const struct btf *btf, const struct btf_type *t, + const char *evalue, long long *retval) +{ + if (btf_is_enum(t)) { + struct btf_enum *e = btf_enum(t); + int i, n = btf_vlen(t); + + for (i = 0; i < n; ++i, ++e) { + const char *cur_name = btf__name_by_offset(btf, e->name_off); + + if (strcmp(cur_name, evalue) == 0) { + *retval = e->val; + return 0; + } + } + } else if (btf_is_enum64(t)) { + struct btf_enum64 *e = btf_enum64(t); + int i, n = btf_vlen(t); + + for (i = 0; i < n; ++i, ++e) { + const char *cur_name = btf__name_by_offset(btf, e->name_off); + __u64 value = btf_enum64_value(e); + + if (strcmp(cur_name, evalue) == 0) { + *retval = value; + return 0; + } + } + } + return -EINVAL; +} + +static bool is_preset_supported(const struct btf_type *t) +{ + return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t); +} + +static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct btf_type *t, + struct bpf_map *map, struct btf_var_secinfo *sinfo, + struct var_preset *preset) +{ + const struct btf_type *base_type; + void *ptr; + long long value = preset->ivalue; + size_t size; + + base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type)); + if (!base_type) { + fprintf(stderr, "Failed to resolve type %d\n", t->type); + return -EINVAL; + } + if (!is_preset_supported(base_type)) { + fprintf(stderr, "Setting value for type %s is not supported\n", + btf__name_by_offset(btf, base_type->name_off)); + return -EINVAL; + } + + if (preset->type == ENUMERATOR) { + if (btf_is_any_enum(base_type)) { + if (enum_value_from_name(btf, base_type, preset->svalue, &value)) { + fprintf(stderr, + "Failed to find integer value for enum element %s\n", + preset->svalue); + return -EINVAL; + } + } else { + fprintf(stderr, "Value %s is not supported for type %s\n", + preset->svalue, btf__name_by_offset(btf, base_type->name_off)); + return -EINVAL; + } + } + + /* Check if value fits into the target variable size */ + if (sinfo->size < sizeof(value)) { + bool is_signed = is_signed_type(base_type); + __u32 unsigned_bits = sinfo->size * 8 - (is_signed ? 1 : 0); + long long max_val = 1ll << unsigned_bits; + + if (value >= max_val || value < -max_val) { + fprintf(stderr, + "Variable %s value %lld is out of range [%lld; %lld]\n", + btf__name_by_offset(btf, t->name_off), value, + is_signed ? -max_val : 0, max_val - 1); + return -EINVAL; + } + } + + ptr = bpf_map__initial_value(map, &size); + if (!ptr || sinfo->offset + sinfo->size > size) + return -EINVAL; + + if (__BYTE_ORDER == __LITTLE_ENDIAN) { + memcpy(ptr + sinfo->offset, &value, sinfo->size); + } else { /* __BYTE_ORDER == __BIG_ENDIAN */ + __u8 src_offset = sizeof(value) - sinfo->size; + + memcpy(ptr + sinfo->offset, (void *)&value + src_offset, sinfo->size); + } + return 0; +} + +static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, int npresets) +{ + struct btf_var_secinfo *sinfo; + const char *sec_name; + const struct btf_type *t; + struct bpf_map *map; + struct btf *btf; + int i, j, k, n, cnt, err = 0; + + if (npresets == 0) + return 0; + + btf = bpf_object__btf(obj); + if (!btf) + return -EINVAL; + + cnt = btf__type_cnt(btf); + for (i = 1; i != cnt; ++i) { + t = btf__type_by_id(btf, i); + + if (!btf_is_datasec(t)) + continue; + + sinfo = btf_var_secinfos(t); + sec_name = btf__name_by_offset(btf, t->name_off); + map = bpf_object__find_map_by_name(obj, sec_name); + if (!map) + continue; + + n = btf_vlen(t); + for (j = 0; j < n; ++j, ++sinfo) { + const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type); + const char *var_name; + + if (!btf_is_var(var_type)) + continue; + + var_name = btf__name_by_offset(btf, var_type->name_off); + + for (k = 0; k < npresets; ++k) { + if (strcmp(var_name, presets[k].name) != 0) + continue; + + if (presets[k].applied) { + fprintf(stderr, "Variable %s is set more than once", + var_name); + return -EINVAL; + } + + err = set_global_var(obj, btf, var_type, map, sinfo, presets + k); + if (err) + return err; + + presets[k].applied = true; + break; + } + } + } + for (i = 0; i < npresets; ++i) { + if (!presets[i].applied) { + fprintf(stderr, "Global variable preset %s has not been applied\n", + presets[i].name); + } + presets[i].applied = false; + } + return err; +} + static int process_obj(const char *filename) { const char *base_filename = basename(strdupa(filename)); @@ -1341,6 +1667,11 @@ static int process_obj(const char *filename) if (prog_cnt == 1) { prog = bpf_object__next_program(obj, NULL); bpf_program__set_autoload(prog, true); + err = set_global_vars(obj, env.presets, env.npresets); + if (err) { + fprintf(stderr, "Failed to set global variables %d\n", err); + goto cleanup; + } process_prog(filename, obj, prog); goto cleanup; } @@ -1355,6 +1686,12 @@ static int process_obj(const char *filename) goto cleanup; } + err = set_global_vars(tobj, env.presets, env.npresets); + if (err) { + fprintf(stderr, "Failed to set global variables %d\n", err); + goto cleanup; + } + lprog = NULL; bpf_object__for_each_program(tprog, tobj) { const char *tprog_name = bpf_program__name(tprog); @@ -2460,5 +2797,11 @@ int main(int argc, char **argv) free(env.deny_filters[i].prog_glob); } free(env.deny_filters); + for (i = 0; i < env.npresets; ++i) { + free(env.presets[i].name); + if (env.presets[i].type == ENUMERATOR) + free(env.presets[i].svalue); + } + free(env.presets); return -err; } diff --git a/tools/testing/selftests/bpf/with_addr.sh b/tools/testing/selftests/bpf/with_addr.sh deleted file mode 100755 index ffcd3953f94c..000000000000 --- a/tools/testing/selftests/bpf/with_addr.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# add private ipv4 and ipv6 addresses to loopback - -readonly V6_INNER='100::a/128' -readonly V4_INNER='192.168.0.1/32' - -if getopts ":s" opt; then - readonly SIT_DEV_NAME='sixtofourtest0' - readonly V6_SIT='2::/64' - readonly V4_SIT='172.17.0.1/32' - shift -fi - -fail() { - echo "error: $*" 1>&2 - exit 1 -} - -setup() { - ip -6 addr add "${V6_INNER}" dev lo || fail 'failed to setup v6 address' - ip -4 addr add "${V4_INNER}" dev lo || fail 'failed to setup v4 address' - - if [[ -n "${V6_SIT}" ]]; then - ip link add "${SIT_DEV_NAME}" type sit remote any local any \ - || fail 'failed to add sit' - ip link set dev "${SIT_DEV_NAME}" up \ - || fail 'failed to bring sit device up' - ip -6 addr add "${V6_SIT}" dev "${SIT_DEV_NAME}" \ - || fail 'failed to setup v6 SIT address' - ip -4 addr add "${V4_SIT}" dev "${SIT_DEV_NAME}" \ - || fail 'failed to setup v4 SIT address' - fi - - sleep 2 # avoid race causing bind to fail -} - -cleanup() { - if [[ -n "${V6_SIT}" ]]; then - ip -4 addr del "${V4_SIT}" dev "${SIT_DEV_NAME}" - ip -6 addr del "${V6_SIT}" dev "${SIT_DEV_NAME}" - ip link del "${SIT_DEV_NAME}" - fi - - ip -4 addr del "${V4_INNER}" dev lo - ip -6 addr del "${V6_INNER}" dev lo -} - -trap cleanup EXIT - -setup -"$@" -exit "$?" diff --git a/tools/testing/selftests/bpf/with_tunnels.sh b/tools/testing/selftests/bpf/with_tunnels.sh deleted file mode 100755 index e24949ed3a20..000000000000 --- a/tools/testing/selftests/bpf/with_tunnels.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# setup tunnels for flow dissection test - -readonly SUFFIX="test_$(mktemp -u XXXX)" -CONFIG="remote 127.0.0.2 local 127.0.0.1 dev lo" - -setup() { - ip link add "ipip_${SUFFIX}" type ipip ${CONFIG} - ip link add "gre_${SUFFIX}" type gre ${CONFIG} - ip link add "sit_${SUFFIX}" type sit ${CONFIG} - - echo "tunnels before test:" - ip tunnel show - - ip link set "ipip_${SUFFIX}" up - ip link set "gre_${SUFFIX}" up - ip link set "sit_${SUFFIX}" up -} - - -cleanup() { - ip tunnel del "ipip_${SUFFIX}" - ip tunnel del "gre_${SUFFIX}" - ip tunnel del "sit_${SUFFIX}" - - echo "tunnels after test:" - ip tunnel show -} - -trap cleanup EXIT - -setup -"$@" -exit "$?" diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 6f7b15d6c6ed..3d8de0d4c96a 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -13,6 +13,7 @@ * - UDP 9091 packets trigger TX reply * - TX HW timestamp is requested and reported back upon completion * - TX checksum is requested + * - TX launch time HW offload is requested for transmission */ #include <test_progs.h> @@ -37,6 +38,15 @@ #include <time.h> #include <unistd.h> #include <libgen.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <linux/pkt_sched.h> +#include <linux/pkt_cls.h> +#include <linux/ethtool.h> +#include <sys/socket.h> +#include <arpa/inet.h> #include "xdp_metadata.h" @@ -64,6 +74,18 @@ int rxq; bool skip_tx; __u64 last_hw_rx_timestamp; __u64 last_xdp_rx_timestamp; +__u64 last_launch_time; +__u64 launch_time_delta_to_hw_rx_timestamp; +int launch_time_queue; + +#define run_command(cmd, ...) \ +({ \ + char command[1024]; \ + memset(command, 0, sizeof(command)); \ + snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \ + fprintf(stderr, "Running: %s\n", command); \ + system(command); \ +}) void test__fail(void) { /* for network_helpers.c */ } @@ -298,6 +320,12 @@ static bool complete_tx(struct xsk *xsk, clockid_t clock_id) if (meta->completion.tx_timestamp) { __u64 ref_tstamp = gettime(clock_id); + if (launch_time_delta_to_hw_rx_timestamp) { + print_tstamp_delta("HW Launch-time", + "HW TX-complete-time", + last_launch_time, + meta->completion.tx_timestamp); + } print_tstamp_delta("HW TX-complete-time", "User TX-complete-time", meta->completion.tx_timestamp, ref_tstamp); print_tstamp_delta("XDP RX-time", "User TX-complete-time", @@ -395,6 +423,17 @@ static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id) xsk, ntohs(udph->check), ntohs(want_csum), meta->request.csum_start, meta->request.csum_offset); + /* Set the value of launch time */ + if (launch_time_delta_to_hw_rx_timestamp) { + meta->flags |= XDP_TXMD_FLAGS_LAUNCH_TIME; + meta->request.launch_time = last_hw_rx_timestamp + + launch_time_delta_to_hw_rx_timestamp; + last_launch_time = meta->request.launch_time; + print_tstamp_delta("HW RX-time", "HW Launch-time", + last_hw_rx_timestamp, + meta->request.launch_time); + } + memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */ tx_desc->options |= XDP_TX_METADATA; tx_desc->len = len; @@ -407,6 +446,7 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t const struct xdp_desc *rx_desc; struct pollfd fds[rxq + 1]; __u64 comp_addr; + __u64 deadline; __u64 addr; __u32 idx = 0; int ret; @@ -477,9 +517,15 @@ peek: if (ret) printf("kick_tx ret=%d\n", ret); - for (int j = 0; j < 500; j++) { + /* wait 1 second + cover launch time */ + deadline = gettime(clock_id) + + NANOSEC_PER_SEC + + launch_time_delta_to_hw_rx_timestamp; + while (true) { if (complete_tx(xsk, clock_id)) break; + if (gettime(clock_id) >= deadline) + break; usleep(10); } } @@ -608,6 +654,10 @@ static void print_usage(void) " -h Display this help and exit\n\n" " -m Enable multi-buffer XDP for larger MTU\n" " -r Don't generate AF_XDP reply (rx metadata only)\n" + " -l Delta of launch time relative to HW RX-time in ns\n" + " default: 0 ns (launch time request is disabled)\n" + " -L Tx Queue to be enabled with launch time offload\n" + " default: 0 (Tx Queue 0)\n" "Generate test packets on the other machine with:\n" " echo -n xdp | nc -u -q1 <dst_ip> 9091\n"; @@ -618,7 +668,7 @@ static void read_args(int argc, char *argv[]) { int opt; - while ((opt = getopt(argc, argv, "chmr")) != -1) { + while ((opt = getopt(argc, argv, "chmrl:L:")) != -1) { switch (opt) { case 'c': bind_flags &= ~XDP_USE_NEED_WAKEUP; @@ -634,6 +684,12 @@ static void read_args(int argc, char *argv[]) case 'r': skip_tx = true; break; + case 'l': + launch_time_delta_to_hw_rx_timestamp = atoll(optarg); + break; + case 'L': + launch_time_queue = atoll(optarg); + break; case '?': if (isprint(optopt)) fprintf(stderr, "Unknown option: -%c\n", optopt); @@ -657,23 +713,118 @@ static void read_args(int argc, char *argv[]) error(-1, errno, "Invalid interface name"); } +void clean_existing_configurations(void) +{ + /* Check and delete root qdisc if exists */ + if (run_command("sudo tc qdisc show dev %s | grep -q 'qdisc mqprio 8001:'", ifname) == 0) + run_command("sudo tc qdisc del dev %s root", ifname); + + /* Check and delete ingress qdisc if exists */ + if (run_command("sudo tc qdisc show dev %s | grep -q 'qdisc ingress ffff:'", ifname) == 0) + run_command("sudo tc qdisc del dev %s ingress", ifname); + + /* Check and delete ethtool filters if any exist */ + if (run_command("sudo ethtool -n %s | grep -q 'Filter:'", ifname) == 0) { + run_command("sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 sudo ethtool -N %s delete >&2", + ifname, ifname); + } +} + +#define MAX_TC 16 + int main(int argc, char *argv[]) { clockid_t clock_id = CLOCK_TAI; + struct bpf_program *prog; int server_fd = -1; + size_t map_len = 0; + size_t que_len = 0; + char *buf = NULL; + char *map = NULL; + char *que = NULL; + char *tmp = NULL; + int tc = 0; int ret; int i; - struct bpf_program *prog; - read_args(argc, argv); rxq = rxq_num(ifname); - printf("rxq: %d\n", rxq); + if (launch_time_queue >= rxq || launch_time_queue < 0) + error(1, 0, "Invalid launch_time_queue."); + + clean_existing_configurations(); + sleep(1); + + /* Enable tx and rx hardware timestamping */ hwtstamp_enable(ifname); + /* Prepare priority to traffic class map for tc-mqprio */ + for (i = 0; i < MAX_TC; i++) { + if (i < rxq) + tc = i; + + if (asprintf(&buf, "%d ", tc) == -1) { + printf("Failed to malloc buf for tc map.\n"); + goto free_mem; + } + + map_len += strlen(buf); + tmp = realloc(map, map_len + 1); + if (!tmp) { + printf("Failed to realloc tc map.\n"); + goto free_mem; + } + map = tmp; + strcat(map, buf); + free(buf); + buf = NULL; + } + + /* Prepare traffic class to hardware queue map for tc-mqprio */ + for (i = 0; i <= tc; i++) { + if (asprintf(&buf, "1@%d ", i) == -1) { + printf("Failed to malloc buf for tc queues.\n"); + goto free_mem; + } + + que_len += strlen(buf); + tmp = realloc(que, que_len + 1); + if (!tmp) { + printf("Failed to realloc tc queues.\n"); + goto free_mem; + } + que = tmp; + strcat(que, buf); + free(buf); + buf = NULL; + } + + /* Add mqprio qdisc */ + run_command("sudo tc qdisc add dev %s handle 8001: parent root mqprio num_tc %d map %squeues %shw 0", + ifname, tc + 1, map, que); + + /* To test launch time, send UDP packet with VLAN priority 1 to port 9091 */ + if (launch_time_delta_to_hw_rx_timestamp) { + /* Enable launch time hardware offload on launch_time_queue */ + run_command("sudo tc qdisc replace dev %s parent 8001:%d etf offload clockid CLOCK_TAI delta 500000", + ifname, launch_time_queue + 1); + sleep(1); + + /* Route incoming packet with VLAN priority 1 into launch_time_queue */ + if (run_command("sudo ethtool -N %s flow-type ether vlan 0x2000 vlan-mask 0x1FFF action %d", + ifname, launch_time_queue)) { + run_command("sudo tc qdisc add dev %s ingress", ifname); + run_command("sudo tc filter add dev %s parent ffff: protocol 802.1Q flower vlan_prio 1 hw_tc %d", + ifname, launch_time_queue); + } + + /* Enable VLAN tag stripping offload */ + run_command("sudo ethtool -K %s rxvlan on", ifname); + } + rx_xsk = malloc(sizeof(struct xsk) * rxq); if (!rx_xsk) error(1, ENOMEM, "malloc"); @@ -733,4 +884,11 @@ int main(int argc, char *argv[]) cleanup(); if (ret) error(1, -ret, "verify_metadata"); + + clean_existing_configurations(); + +free_mem: + free(buf); + free(map); + free(que); } diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c deleted file mode 100644 index c1fc44c87c30..000000000000 --- a/tools/testing/selftests/bpf/xdp_redirect_multi.c +++ /dev/null @@ -1,226 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/bpf.h> -#include <linux/if_link.h> -#include <assert.h> -#include <errno.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <net/if.h> -#include <unistd.h> -#include <libgen.h> -#include <sys/ioctl.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <netinet/in.h> - -#include "bpf_util.h" -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#define MAX_IFACE_NUM 32 -#define MAX_INDEX_NUM 1024 - -static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; -static int ifaces[MAX_IFACE_NUM] = {}; - -static void int_exit(int sig) -{ - __u32 prog_id = 0; - int i; - - for (i = 0; ifaces[i] > 0; i++) { - if (bpf_xdp_query_id(ifaces[i], xdp_flags, &prog_id)) { - printf("bpf_xdp_query_id failed\n"); - exit(1); - } - if (prog_id) - bpf_xdp_detach(ifaces[i], xdp_flags, NULL); - } - - exit(0); -} - -static int get_mac_addr(unsigned int ifindex, void *mac_addr) -{ - char ifname[IF_NAMESIZE]; - struct ifreq ifr; - int fd, ret = -1; - - fd = socket(AF_INET, SOCK_DGRAM, 0); - if (fd < 0) - return ret; - - if (!if_indextoname(ifindex, ifname)) - goto err_out; - - strcpy(ifr.ifr_name, ifname); - - if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0) - goto err_out; - - memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char)); - ret = 0; - -err_out: - close(fd); - return ret; -} - -static void usage(const char *prog) -{ - fprintf(stderr, - "usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n" - "OPTS:\n" - " -S use skb-mode\n" - " -N enforce native mode\n" - " -F force loading prog\n" - " -X load xdp program on egress\n", - prog); -} - -int main(int argc, char **argv) -{ - int prog_fd, group_all, mac_map; - struct bpf_program *ingress_prog, *egress_prog; - int i, err, ret, opt, egress_prog_fd = 0; - struct bpf_devmap_val devmap_val; - bool attach_egress_prog = false; - unsigned char mac_addr[6]; - char ifname[IF_NAMESIZE]; - struct bpf_object *obj; - unsigned int ifindex; - char filename[256]; - - while ((opt = getopt(argc, argv, "SNFX")) != -1) { - switch (opt) { - case 'S': - xdp_flags |= XDP_FLAGS_SKB_MODE; - break; - case 'N': - /* default, set below */ - break; - case 'F': - xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; - break; - case 'X': - attach_egress_prog = true; - break; - default: - usage(basename(argv[0])); - return 1; - } - } - - if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) { - xdp_flags |= XDP_FLAGS_DRV_MODE; - } else if (attach_egress_prog) { - printf("Load xdp program on egress with SKB mode not supported yet\n"); - goto err_out; - } - - if (optind == argc) { - printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]); - goto err_out; - } - - printf("Get interfaces:"); - for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) { - ifaces[i] = if_nametoindex(argv[optind + i]); - if (!ifaces[i]) - ifaces[i] = strtoul(argv[optind + i], NULL, 0); - if (!if_indextoname(ifaces[i], ifname)) { - perror("Invalid interface name or i"); - goto err_out; - } - if (ifaces[i] > MAX_INDEX_NUM) { - printf(" interface index too large\n"); - goto err_out; - } - printf(" %d", ifaces[i]); - } - printf("\n"); - - snprintf(filename, sizeof(filename), "%s_kern.bpf.o", argv[0]); - obj = bpf_object__open_file(filename, NULL); - err = libbpf_get_error(obj); - if (err) - goto err_out; - err = bpf_object__load(obj); - if (err) - goto err_out; - prog_fd = bpf_program__fd(bpf_object__next_program(obj, NULL)); - - if (attach_egress_prog) - group_all = bpf_object__find_map_fd_by_name(obj, "map_egress"); - else - group_all = bpf_object__find_map_fd_by_name(obj, "map_all"); - mac_map = bpf_object__find_map_fd_by_name(obj, "mac_map"); - - if (group_all < 0 || mac_map < 0) { - printf("bpf_object__find_map_fd_by_name failed\n"); - goto err_out; - } - - if (attach_egress_prog) { - /* Find ingress/egress prog for 2nd xdp prog */ - ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_all_prog"); - egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog"); - if (!ingress_prog || !egress_prog) { - printf("finding ingress/egress_prog in obj file failed\n"); - goto err_out; - } - prog_fd = bpf_program__fd(ingress_prog); - egress_prog_fd = bpf_program__fd(egress_prog); - if (prog_fd < 0 || egress_prog_fd < 0) { - printf("find egress_prog fd failed\n"); - goto err_out; - } - } - - signal(SIGINT, int_exit); - signal(SIGTERM, int_exit); - - /* Init forward multicast groups and exclude group */ - for (i = 0; ifaces[i] > 0; i++) { - ifindex = ifaces[i]; - - if (attach_egress_prog) { - ret = get_mac_addr(ifindex, mac_addr); - if (ret < 0) { - printf("get interface %d mac failed\n", ifindex); - goto err_out; - } - ret = bpf_map_update_elem(mac_map, &ifindex, mac_addr, 0); - if (ret) { - perror("bpf_update_elem mac_map failed\n"); - goto err_out; - } - } - - /* Add all the interfaces to group all */ - devmap_val.ifindex = ifindex; - devmap_val.bpf_prog.fd = egress_prog_fd; - ret = bpf_map_update_elem(group_all, &ifindex, &devmap_val, 0); - if (ret) { - perror("bpf_map_update_elem"); - goto err_out; - } - - /* bind prog_fd to each interface */ - ret = bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL); - if (ret) { - printf("Set xdp fd failed on %d\n", ifindex); - goto err_out; - } - } - - /* sleep some time for testing */ - sleep(999); - - return 0; - -err_out: - return 1; -} diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore new file mode 100644 index 000000000000..ec746f374e85 --- /dev/null +++ b/tools/testing/selftests/drivers/net/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +xdp_helper diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 137470bdee0c..0c95bd944d56 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -1,13 +1,18 @@ # SPDX-License-Identifier: GPL-2.0 +CFLAGS += $(KHDR_INCLUDES) TEST_INCLUDES := $(wildcard lib/py/*.py) \ $(wildcard lib/sh/*.sh) \ ../../net/net_helper.sh \ ../../net/lib.sh \ +TEST_GEN_FILES := xdp_helper + TEST_PROGS := \ netcons_basic.sh \ + netcons_fragmented_msg.sh \ netcons_overflow.sh \ + netcons_sysdata.sh \ ping.py \ queues.py \ stats.py \ diff --git a/tools/testing/selftests/drivers/net/README.rst b/tools/testing/selftests/drivers/net/README.rst index 3b6a29e6564b..eb838ae94844 100644 --- a/tools/testing/selftests/drivers/net/README.rst +++ b/tools/testing/selftests/drivers/net/README.rst @@ -107,7 +107,7 @@ On the target machine, running the tests will use netdevsim by default:: 1..1 # timeout set to 45 # selftests: drivers/net: ping.py - # KTAP version 1 + # TAP version 13 # 1..3 # ok 1 ping.test_v4 # ok 2 ping.test_v6 @@ -128,7 +128,7 @@ Create a config with remote info:: Run the test:: [/root] # ./ksft-net-drv/drivers/net/ping.py - KTAP version 1 + TAP version 13 1..3 ok 1 ping.test_v4 ok 2 ping.test_v6 # SKIP Test requires IPv6 connectivity diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config index a2d8af60876d..f27172ddee0a 100644 --- a/tools/testing/selftests/drivers/net/config +++ b/tools/testing/selftests/drivers/net/config @@ -4,3 +4,4 @@ CONFIG_CONFIGFS_FS=y CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_NETCONSOLE_EXTENDED_LOG=y +CONFIG_XDP_SOCKETS=y diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 873f5219e41d..7cc74faed743 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -20,8 +20,7 @@ def _get_hds_mode(cfg, netnl) -> str: def _xdp_onoff(cfg): - test_dir = os.path.dirname(os.path.realpath(__file__)) - prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + prog = cfg.rpath("../../net/lib/xdp_dummy.bpf.o") ip("link set dev %s xdp obj %s sec xdp" % (cfg.ifname, prog)) ip("link set dev %s xdp off" % cfg.ifname) diff --git a/tools/testing/selftests/drivers/net/hw/.gitignore b/tools/testing/selftests/drivers/net/hw/.gitignore index e9fe6ede681a..6942bf575497 100644 --- a/tools/testing/selftests/drivers/net/hw/.gitignore +++ b/tools/testing/selftests/drivers/net/hw/.gitignore @@ -1 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +iou-zcrx ncdevmem diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 21ba64ce1e34..07cddb19ba35 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT +TEST_GEN_FILES = iou-zcrx + TEST_PROGS = \ csum.py \ devlink_port_split.py \ @@ -10,11 +12,15 @@ TEST_PROGS = \ ethtool_rmon.sh \ hw_stats_l3.sh \ hw_stats_l3_gre.sh \ + iou-zcrx.py \ + irq.py \ loopback.sh \ nic_link_layer.py \ nic_performance.py \ pp_alloc_fail.py \ rss_ctx.py \ + rss_input_xfrm.py \ + tso.py \ # TEST_FILES := \ @@ -32,9 +38,14 @@ TEST_INCLUDES := \ # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := ncdevmem TEST_GEN_FILES += $(YNL_GEN_FILES) +TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) include ../../../lib.mk # YNL build YNL_GENS := ethtool netdev include ../../../net/ynl.mk + +include ../../../net/bpf.mk + +$(OUTPUT)/iou-zcrx: LDLIBS += -luring diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py index cb40497faee4..701aca1361e0 100755 --- a/tools/testing/selftests/drivers/net/hw/csum.py +++ b/tools/testing/selftests/drivers/net/hw/csum.py @@ -9,15 +9,12 @@ from lib.py import ksft_run, ksft_exit, KsftSkipEx from lib.py import EthtoolFamily, NetDrvEpEnv from lib.py import bkg, cmd, wait_port_listen -def test_receive(cfg, ipv4=False, extra_args=None): +def test_receive(cfg, ipver="6", extra_args=None): """Test local nic checksum receive. Remote host sends crafted packets.""" if not cfg.have_rx_csum: raise KsftSkipEx(f"Test requires rx checksum offload on {cfg.ifname}") - if ipv4: - ip_args = f"-4 -S {cfg.remote_v4} -D {cfg.v4}" - else: - ip_args = f"-6 -S {cfg.remote_v6} -D {cfg.v6}" + ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}" rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}" tx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -n 100 {ip_args} -r 1 -T {extra_args}" @@ -27,17 +24,14 @@ def test_receive(cfg, ipv4=False, extra_args=None): cmd(tx_cmd, host=cfg.remote) -def test_transmit(cfg, ipv4=False, extra_args=None): +def test_transmit(cfg, ipver="6", extra_args=None): """Test local nic checksum transmit. Remote host verifies packets.""" if (not cfg.have_tx_csum_generic and - not (cfg.have_tx_csum_ipv4 and ipv4) and - not (cfg.have_tx_csum_ipv6 and not ipv4)): + not (cfg.have_tx_csum_ipv4 and ipver == "4") and + not (cfg.have_tx_csum_ipv6 and ipver == "6")): raise KsftSkipEx(f"Test requires tx checksum offload on {cfg.ifname}") - if ipv4: - ip_args = f"-4 -S {cfg.v4} -D {cfg.remote_v4}" - else: - ip_args = f"-6 -S {cfg.v6} -D {cfg.remote_v6}" + ip_args = f"-{ipver} -S {cfg.addr_v[ipver]} -D {cfg.remote_addr_v[ipver]}" # Cannot randomize input when calculating zero checksum if extra_args != "-U -Z": @@ -51,26 +45,20 @@ def test_transmit(cfg, ipv4=False, extra_args=None): cmd(tx_cmd) -def test_builder(name, cfg, ipv4=False, tx=False, extra_args=""): +def test_builder(name, cfg, ipver="6", tx=False, extra_args=""): """Construct specific tests from the common template. Most tests follow the same basic pattern, differing only in Direction of the test and optional flags passed to csum.""" def f(cfg): - if ipv4: - cfg.require_v4() - else: - cfg.require_v6() + cfg.require_ipver(ipver) if tx: - test_transmit(cfg, ipv4, extra_args) + test_transmit(cfg, ipver, extra_args) else: - test_receive(cfg, ipv4, extra_args) + test_receive(cfg, ipver, extra_args) - if ipv4: - f.__name__ = "ipv4_" + name - else: - f.__name__ = "ipv6_" + name + f.__name__ = f"ipv{ipver}_" + name return f @@ -100,19 +88,19 @@ def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: check_nic_features(cfg) - cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../net/lib/csum") + cfg.bin_local = cfg.rpath("../../../net/lib/csum") cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) cases = [] - for ipv4 in [True, False]: - cases.append(test_builder("rx_tcp", cfg, ipv4, False, "-t")) - cases.append(test_builder("rx_tcp_invalid", cfg, ipv4, False, "-t -E")) + for ipver in ["4", "6"]: + cases.append(test_builder("rx_tcp", cfg, ipver, False, "-t")) + cases.append(test_builder("rx_tcp_invalid", cfg, ipver, False, "-t -E")) - cases.append(test_builder("rx_udp", cfg, ipv4, False, "")) - cases.append(test_builder("rx_udp_invalid", cfg, ipv4, False, "-E")) + cases.append(test_builder("rx_udp", cfg, ipver, False, "")) + cases.append(test_builder("rx_udp_invalid", cfg, ipver, False, "-E")) - cases.append(test_builder("tx_udp_csum_offload", cfg, ipv4, True, "-U")) - cases.append(test_builder("tx_udp_zero_checksum", cfg, ipv4, True, "-U -Z")) + cases.append(test_builder("tx_udp_csum_offload", cfg, ipver, True, "-U")) + cases.append(test_builder("tx_udp_zero_checksum", cfg, ipver, True, "-U -Z")) ksft_run(cases=cases, args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index 1223f0f5c10c..3947e9157115 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -21,15 +21,15 @@ def require_devmem(cfg): @ksft_disruptive def check_rx(cfg) -> None: - cfg.require_v6() + cfg.require_ipver("6") require_devmem(cfg) port = rand_port() - listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.v6} -p {port}" + listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port}" with bkg(listen_cmd) as socat: wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.v6}]:{port}", host=cfg.remote, shell=True) + cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.addr_v['6']}]:{port}", host=cfg.remote, shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c new file mode 100644 index 000000000000..c26b4180eddd --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <assert.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <arpa/inet.h> +#include <linux/errqueue.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/wait.h> + +#include <liburing.h> + +#define PAGE_SIZE (4096) +#define AREA_SIZE (8192 * PAGE_SIZE) +#define SEND_SIZE (512 * 4096) +#define min(a, b) \ + ({ \ + typeof(a) _a = (a); \ + typeof(b) _b = (b); \ + _a < _b ? _a : _b; \ + }) +#define min_t(t, a, b) \ + ({ \ + t _ta = (a); \ + t _tb = (b); \ + min(_ta, _tb); \ + }) + +#define ALIGN_UP(v, align) (((v) + (align) - 1) & ~((align) - 1)) + +static int cfg_server; +static int cfg_client; +static int cfg_port = 8000; +static int cfg_payload_len; +static const char *cfg_ifname; +static int cfg_queue_id = -1; +static bool cfg_oneshot; +static int cfg_oneshot_recvs; +static int cfg_send_size = SEND_SIZE; +static struct sockaddr_in6 cfg_addr; + +static char payload[SEND_SIZE] __attribute__((aligned(PAGE_SIZE))); +static void *area_ptr; +static void *ring_ptr; +static size_t ring_size; +static struct io_uring_zcrx_rq rq_ring; +static unsigned long area_token; +static int connfd; +static bool stop; +static size_t received; + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) +{ + int ret; + + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + + ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); + if (ret != 1) { + /* fallback to plain IPv4 */ + ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); + if (ret != 1) + return -1; + + /* add ::ffff prefix */ + sin6->sin6_addr.s6_addr32[0] = 0; + sin6->sin6_addr.s6_addr32[1] = 0; + sin6->sin6_addr.s6_addr16[4] = 0; + sin6->sin6_addr.s6_addr16[5] = 0xffff; + } + + return 0; +} + +static inline size_t get_refill_ring_size(unsigned int rq_entries) +{ + size_t size; + + ring_size = rq_entries * sizeof(struct io_uring_zcrx_rqe); + /* add space for the header (head/tail/etc.) */ + ring_size += PAGE_SIZE; + return ALIGN_UP(ring_size, 4096); +} + +static void setup_zcrx(struct io_uring *ring) +{ + unsigned int ifindex; + unsigned int rq_entries = 4096; + int ret; + + ifindex = if_nametoindex(cfg_ifname); + if (!ifindex) + error(1, 0, "bad interface name: %s", cfg_ifname); + + area_ptr = mmap(NULL, + AREA_SIZE, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + 0, + 0); + if (area_ptr == MAP_FAILED) + error(1, 0, "mmap(): zero copy area"); + + ring_size = get_refill_ring_size(rq_entries); + ring_ptr = mmap(NULL, + ring_size, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + 0, + 0); + + struct io_uring_region_desc region_reg = { + .size = ring_size, + .user_addr = (__u64)(unsigned long)ring_ptr, + .flags = IORING_MEM_REGION_TYPE_USER, + }; + + struct io_uring_zcrx_area_reg area_reg = { + .addr = (__u64)(unsigned long)area_ptr, + .len = AREA_SIZE, + .flags = 0, + }; + + struct io_uring_zcrx_ifq_reg reg = { + .if_idx = ifindex, + .if_rxq = cfg_queue_id, + .rq_entries = rq_entries, + .area_ptr = (__u64)(unsigned long)&area_reg, + .region_ptr = (__u64)(unsigned long)®ion_reg, + }; + + ret = io_uring_register_ifq(ring, ®); + if (ret) + error(1, 0, "io_uring_register_ifq(): %d", ret); + + rq_ring.khead = (unsigned int *)((char *)ring_ptr + reg.offsets.head); + rq_ring.ktail = (unsigned int *)((char *)ring_ptr + reg.offsets.tail); + rq_ring.rqes = (struct io_uring_zcrx_rqe *)((char *)ring_ptr + reg.offsets.rqes); + rq_ring.rq_tail = 0; + rq_ring.ring_entries = reg.rq_entries; + + area_token = area_reg.rq_area_token; +} + +static void add_accept(struct io_uring *ring, int sockfd) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(ring); + + io_uring_prep_accept(sqe, sockfd, NULL, NULL, 0); + sqe->user_data = 1; +} + +static void add_recvzc(struct io_uring *ring, int sockfd) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(ring); + + io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, 0, 0); + sqe->ioprio |= IORING_RECV_MULTISHOT; + sqe->user_data = 2; +} + +static void add_recvzc_oneshot(struct io_uring *ring, int sockfd, size_t len) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(ring); + + io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, len, 0); + sqe->ioprio |= IORING_RECV_MULTISHOT; + sqe->user_data = 2; +} + +static void process_accept(struct io_uring *ring, struct io_uring_cqe *cqe) +{ + if (cqe->res < 0) + error(1, 0, "accept()"); + if (connfd) + error(1, 0, "Unexpected second connection"); + + connfd = cqe->res; + if (cfg_oneshot) + add_recvzc_oneshot(ring, connfd, PAGE_SIZE); + else + add_recvzc(ring, connfd); +} + +static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe) +{ + unsigned rq_mask = rq_ring.ring_entries - 1; + struct io_uring_zcrx_cqe *rcqe; + struct io_uring_zcrx_rqe *rqe; + struct io_uring_sqe *sqe; + uint64_t mask; + char *data; + ssize_t n; + int i; + + if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs == 0) { + stop = true; + return; + } + + if (cqe->res < 0) + error(1, 0, "recvzc(): %d", cqe->res); + + if (cfg_oneshot) { + if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs) { + add_recvzc_oneshot(ring, connfd, PAGE_SIZE); + cfg_oneshot_recvs--; + } + } else if (!(cqe->flags & IORING_CQE_F_MORE)) { + add_recvzc(ring, connfd); + } + + rcqe = (struct io_uring_zcrx_cqe *)(cqe + 1); + + n = cqe->res; + mask = (1ULL << IORING_ZCRX_AREA_SHIFT) - 1; + data = (char *)area_ptr + (rcqe->off & mask); + + for (i = 0; i < n; i++) { + if (*(data + i) != payload[(received + i)]) + error(1, 0, "payload mismatch at ", i); + } + received += n; + + rqe = &rq_ring.rqes[(rq_ring.rq_tail & rq_mask)]; + rqe->off = (rcqe->off & ~IORING_ZCRX_AREA_MASK) | area_token; + rqe->len = cqe->res; + io_uring_smp_store_release(rq_ring.ktail, ++rq_ring.rq_tail); +} + +static void server_loop(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + unsigned int count = 0; + unsigned int head; + int i, ret; + + io_uring_submit_and_wait(ring, 1); + + io_uring_for_each_cqe(ring, head, cqe) { + if (cqe->user_data == 1) + process_accept(ring, cqe); + else if (cqe->user_data == 2) + process_recvzc(ring, cqe); + else + error(1, 0, "unknown cqe"); + count++; + } + io_uring_cq_advance(ring, count); +} + +static void run_server(void) +{ + unsigned int flags = 0; + struct io_uring ring; + int fd, enable, ret; + uint64_t tstop; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, 0, "socket()"); + + enable = 1; + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int)); + if (ret < 0) + error(1, 0, "setsockopt(SO_REUSEADDR)"); + + ret = bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)); + if (ret < 0) + error(1, 0, "bind()"); + + if (listen(fd, 1024) < 0) + error(1, 0, "listen()"); + + flags |= IORING_SETUP_COOP_TASKRUN; + flags |= IORING_SETUP_SINGLE_ISSUER; + flags |= IORING_SETUP_DEFER_TASKRUN; + flags |= IORING_SETUP_SUBMIT_ALL; + flags |= IORING_SETUP_CQE32; + + io_uring_queue_init(512, &ring, flags); + + setup_zcrx(&ring); + + add_accept(&ring, fd); + + tstop = gettimeofday_ms() + 5000; + while (!stop && gettimeofday_ms() < tstop) + server_loop(&ring); + + if (!stop) + error(1, 0, "test failed\n"); +} + +static void run_client(void) +{ + ssize_t to_send = cfg_send_size; + ssize_t sent = 0; + ssize_t chunk, res; + int fd; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, 0, "socket()"); + + if (connect(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr))) + error(1, 0, "connect()"); + + while (to_send) { + void *src = &payload[sent]; + + chunk = min_t(ssize_t, cfg_payload_len, to_send); + res = send(fd, src, chunk, 0); + if (res < 0) + error(1, 0, "send(): %d", sent); + sent += res; + to_send -= res; + } + + close(fd); +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s (-4|-6) (-s|-c) -h<server_ip> -p<port> " + "-l<payload_size> -i<ifname> -q<rxq_id>", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + const int max_payload_len = sizeof(payload) - + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) - + 40 /* max tcp options */; + struct sockaddr_in6 *addr6 = (void *) &cfg_addr; + char *addr = NULL; + int ret; + int c; + + if (argc <= 1) + usage(argv[0]); + cfg_payload_len = max_payload_len; + + while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:")) != -1) { + switch (c) { + case 's': + if (cfg_client) + error(1, 0, "Pass one of -s or -c"); + cfg_server = 1; + break; + case 'c': + if (cfg_server) + error(1, 0, "Pass one of -s or -c"); + cfg_client = 1; + break; + case 'h': + addr = optarg; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 'l': + cfg_payload_len = strtoul(optarg, NULL, 0); + break; + case 'i': + cfg_ifname = optarg; + break; + case 'q': + cfg_queue_id = strtoul(optarg, NULL, 0); + break; + case 'o': { + cfg_oneshot = true; + cfg_oneshot_recvs = strtoul(optarg, NULL, 0); + break; + } + case 'z': + cfg_send_size = strtoul(optarg, NULL, 0); + break; + } + } + + if (cfg_server && addr) + error(1, 0, "Receiver cannot have -h specified"); + + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + addr6->sin6_addr = in6addr_any; + if (addr) { + ret = parse_address(addr, cfg_port, addr6); + if (ret) + error(1, 0, "receiver address parse error: %s", addr); + } + + if (cfg_payload_len > max_payload_len) + error(1, 0, "-l: payload exceeds max (%d)", max_payload_len); +} + +int main(int argc, char **argv) +{ + const char *cfg_test = argv[argc - 1]; + int i; + + parse_opts(argc, argv); + + for (i = 0; i < SEND_SIZE; i++) + payload[i] = 'a' + (i % 26); + + if (cfg_server) + run_server(); + else if (cfg_client) + run_client(); + + return 0; +} diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py new file mode 100755 index 000000000000..d301d9b356f7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import re +from os import path +from lib.py import ksft_run, ksft_exit +from lib.py import NetDrvEpEnv +from lib.py import bkg, cmd, ethtool, wait_port_listen + + +def _get_rx_ring_entries(cfg): + output = ethtool(f"-g {cfg.ifname}", host=cfg.remote).stdout + values = re.findall(r'RX:\s+(\d+)', output) + return int(values[1]) + + +def _get_combined_channels(cfg): + output = ethtool(f"-l {cfg.ifname}", host=cfg.remote).stdout + values = re.findall(r'Combined:\s+(\d+)', output) + return int(values[1]) + + +def _set_flow_rule(cfg, chan): + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port 9999 action {chan}", host=cfg.remote).stdout + values = re.search(r'ID (\d+)', output).group(1) + return int(values) + + +def test_zcrx(cfg) -> None: + cfg.require_v6() + + combined_chans = _get_combined_channels(cfg) + if combined_chans < 2: + raise KsftSkipEx('at least 2 combined channels required') + rx_ring = _get_rx_ring_entries(cfg) + + try: + ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) + flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) + + rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1}" + tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_v6} -p 9999 -l 12840" + with bkg(rx_cmd, host=cfg.remote, exit_wait=True): + wait_port_listen(9999, proto="tcp", host=cfg.remote) + cmd(tx_cmd) + finally: + ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) + ethtool(f"-X {cfg.ifname} default", host=cfg.remote) + ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + + +def test_zcrx_oneshot(cfg) -> None: + cfg.require_v6() + + combined_chans = _get_combined_channels(cfg) + if combined_chans < 2: + raise KsftSkipEx('at least 2 combined channels required') + rx_ring = _get_rx_ring_entries(cfg) + + try: + ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) + flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) + + rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1} -o 4" + tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_v6} -p 9999 -l 4096 -z 16384" + with bkg(rx_cmd, host=cfg.remote, exit_wait=True): + wait_port_listen(9999, proto="tcp", host=cfg.remote) + cmd(tx_cmd) + finally: + ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) + ethtool(f"-X {cfg.ifname} default", host=cfg.remote) + ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + + +def main() -> None: + with NetDrvEpEnv(__file__) as cfg: + cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx") + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/irq.py b/tools/testing/selftests/drivers/net/hw/irq.py new file mode 100755 index 000000000000..42ab98370245 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/irq.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_ge, ksft_eq +from lib.py import KsftSkipEx +from lib.py import ksft_disruptive +from lib.py import EthtoolFamily, NetdevFamily +from lib.py import NetDrvEnv +from lib.py import cmd, ip, defer + + +def read_affinity(irq) -> str: + with open(f'/proc/irq/{irq}/smp_affinity', 'r') as fp: + return fp.read().lstrip("0,").strip() + + +def write_affinity(irq, what) -> str: + if what != read_affinity(irq): + with open(f'/proc/irq/{irq}/smp_affinity', 'w') as fp: + fp.write(what) + + +def check_irqs_reported(cfg) -> None: + """ Check that device reports IRQs for NAPI instances """ + napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True) + irqs = sum(['irq' in x for x in napis]) + + ksft_ge(irqs, 1) + ksft_eq(irqs, len(napis)) + + +def _check_reconfig(cfg, reconfig_cb) -> None: + napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True) + for n in reversed(napis): + if 'irq' in n: + break + else: + raise KsftSkipEx(f"Device has no NAPI with IRQ attribute (#napis: {len(napis)}") + + old = read_affinity(n['irq']) + # pick an affinity that's not the current one + new = "3" if old != "3" else "5" + write_affinity(n['irq'], new) + defer(write_affinity, n['irq'], old) + + reconfig_cb(cfg) + + ksft_eq(read_affinity(n['irq']), new, comment="IRQ affinity changed after reconfig") + + +def check_reconfig_queues(cfg) -> None: + def reconfig(cfg) -> None: + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + if channels['combined-count'] == 0: + rx_type = 'rx' + else: + rx_type = 'combined' + cur_queue_cnt = channels[f'{rx_type}-count'] + max_queue_cnt = channels[f'{rx_type}-max'] + + cmd(f"ethtool -L {cfg.ifname} {rx_type} 1") + cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}") + cmd(f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}") + + _check_reconfig(cfg, reconfig) + + +def check_reconfig_xdp(cfg) -> None: + def reconfig(cfg) -> None: + ip(f"link set dev %s xdp obj %s sec xdp" % + (cfg.ifname, cfg.rpath("xdp_dummy.bpf.o"))) + ip(f"link set dev %s xdp off" % cfg.ifname) + + _check_reconfig(cfg, reconfig) + + +@ksft_disruptive +def check_down(cfg) -> None: + def reconfig(cfg) -> None: + ip("link set dev %s down" % cfg.ifname) + ip("link set dev %s up" % cfg.ifname) + + _check_reconfig(cfg, reconfig) + + +def main() -> None: + with NetDrvEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + + ksft_run([check_irqs_reported, check_reconfig_queues, + check_reconfig_xdp, check_down], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 19a6969643f4..2bf14ac2b8c6 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -50,7 +50,6 @@ #include <linux/memfd.h> #include <linux/dma-buf.h> #include <linux/udmabuf.h> -#include <libmnl/libmnl.h> #include <linux/types.h> #include <linux/netlink.h> #include <linux/genetlink.h> diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 319aaa004c40..ca60ae325c22 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -4,7 +4,8 @@ import datetime import random import re -from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt, ksft_true +from lib.py import ksft_run, ksft_pr, ksft_exit +from lib.py import ksft_eq, ksft_ne, ksft_ge, ksft_in, ksft_lt, ksft_true, ksft_raises from lib.py import NetDrvEpEnv from lib.py import EthtoolFamily, NetdevFamily from lib.py import KsftSkipEx, KsftFailEx @@ -58,6 +59,14 @@ def require_ntuple(cfg): raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"])) +def require_context_cnt(cfg, need_cnt): + # There's no good API to get the context count, so the tests + # which try to add a lot opportunisitically set the count they + # discovered. Careful with test ordering! + if need_cnt and cfg.context_cnt and cfg.context_cnt < need_cnt: + raise KsftSkipEx(f"Test requires at least {need_cnt} contexts, but device only has {cfg.context_cnt}") + + # Get Rx packet counts for all queues, as a simple list of integers # if @prev is specified the prev counts will be subtracted def _get_rx_cnts(cfg, prev=None): @@ -383,7 +392,7 @@ def test_rss_context_dump(cfg): # Sanity-check the results for data in ctxs: - ksft_ne(set(data['indir']), {0}, "indir table is all zero") + ksft_ne(set(data.get('indir', [1])), {0}, "indir table is all zero") ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero") # More specific checks @@ -456,6 +465,8 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): raise ksft_pr(f"Failed to create context {i + 1}, trying to test what we got") ctx_cnt = i + if cfg.context_cnt is None: + cfg.context_cnt = ctx_cnt break _rss_key_check(cfg, context=ctx_id) @@ -511,8 +522,7 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): """ require_ntuple(cfg) - - requested_ctx_cnt = ctx_cnt + require_context_cnt(cfg, 4) # Try to allocate more queues when necessary qcnt = len(_get_rx_cnts(cfg)) @@ -577,9 +587,6 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): remove_ctx(-1) check_traffic() - if requested_ctx_cnt != ctx_cnt: - raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}") - def test_rss_context_overlap(cfg, other_ctx=0): """ @@ -588,6 +595,8 @@ def test_rss_context_overlap(cfg, other_ctx=0): """ require_ntuple(cfg) + if other_ctx: + require_context_cnt(cfg, 2) queue_cnt = len(_get_rx_cnts(cfg)) if queue_cnt < 4: @@ -649,6 +658,29 @@ def test_rss_context_overlap2(cfg): test_rss_context_overlap(cfg, True) +def test_flow_add_context_missing(cfg): + """ + Test that we are not allowed to add a rule pointing to an RSS context + which was never created. + """ + + require_ntuple(cfg) + + # Find a context which doesn't exist + for ctx_id in range(1, 100): + try: + get_rss(cfg, context=ctx_id) + except CmdExitFailure: + break + + with ksft_raises(CmdExitFailure) as cm: + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port 1234 context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + ethtool(f"-N {cfg.ifname} delete {ntuple_id}") + if cm.exception: + ksft_in('Invalid argument', cm.exception.cmd.stderr) + + def test_delete_rss_context_busy(cfg): """ Test that deletion returns -EBUSY when an rss context is being used @@ -717,6 +749,7 @@ def test_rss_ntuple_addition(cfg): def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.context_cnt = None cfg.ethnl = EthtoolFamily() cfg.netdevnl = NetdevFamily() @@ -726,6 +759,7 @@ def main() -> None: test_rss_context_dump, test_rss_context_queue_reconfigure, test_rss_context_overlap, test_rss_context_overlap2, test_rss_context_out_of_order, test_rss_context4_create_with_cfg, + test_flow_add_context_missing, test_delete_rss_context_busy, test_rss_ntuple_addition], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py new file mode 100755 index 000000000000..53bb08cc29ec --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import multiprocessing +import socket +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, cmd, fd_read_timeout +from lib.py import NetDrvEpEnv +from lib.py import EthtoolFamily, NetdevFamily +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import rand_port + + +def traffic(cfg, local_port, remote_port, ipver): + af_inet = socket.AF_INET if ipver == "4" else socket.AF_INET6 + sock = socket.socket(af_inet, socket.SOCK_DGRAM) + sock.bind(("", local_port)) + sock.connect((cfg.remote_addr_v[ipver], remote_port)) + tgt = f"{ipver}:[{cfg.addr_v[ipver]}]:{local_port},sourceport={remote_port}" + cmd("echo a | socat - UDP" + tgt, host=cfg.remote) + fd_read_timeout(sock.fileno(), 5) + return sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU) + + +def test_rss_input_xfrm(cfg, ipver): + """ + Test symmetric input_xfrm. + If symmetric RSS hash is configured, send traffic twice, swapping the + src/dst UDP ports, and verify that the same queue is receiving the traffic + in both cases (IPs are constant). + """ + + if multiprocessing.cpu_count() < 2: + raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash") + + input_xfrm = cfg.ethnl.rss_get( + {'header': {'dev-name': cfg.ifname}}).get('input_xfrm') + + # Check for symmetric xor/or-xor + if not input_xfrm or (input_xfrm != 1 and input_xfrm != 2): + raise KsftSkipEx("Symmetric RSS hash not requested") + + cpus = set() + successful = 0 + for _ in range(100): + try: + port1 = rand_port(socket.SOCK_DGRAM) + port2 = rand_port(socket.SOCK_DGRAM) + cpu1 = traffic(cfg, port1, port2, ipver) + cpu2 = traffic(cfg, port2, port1, ipver) + cpus.update([cpu1, cpu2]) + ksft_eq( + cpu1, cpu2, comment=f"Received traffic on different cpus with ports ({port1 = }, {port2 = }) while symmetric hash is configured") + + successful += 1 + if successful == 10: + break + except: + continue + else: + raise KsftFailEx("Failed to run traffic") + + ksft_ge(len(cpus), 2, + comment=f"Received traffic on less than two cpus {cpus = }") + + +def test_rss_input_xfrm_ipv4(cfg): + cfg.require_ipver("4") + test_rss_input_xfrm(cfg, "4") + + +def test_rss_input_xfrm_ipv6(cfg): + cfg.require_ipver("6") + test_rss_input_xfrm(cfg, "6") + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netdevnl = NetdevFamily() + + ksft_run([test_rss_input_xfrm_ipv4, test_rss_input_xfrm_ipv6], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py new file mode 100755 index 000000000000..e1ecb92f79d9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Run the tools/testing/selftests/net/csum testsuite.""" + +import fcntl +import socket +import struct +import termios +import time + +from lib.py import ksft_pr, ksft_run, ksft_exit, KsftSkipEx, KsftXfailEx +from lib.py import ksft_eq, ksft_ge, ksft_lt +from lib.py import EthtoolFamily, NetdevFamily, NetDrvEpEnv +from lib.py import bkg, cmd, defer, ethtool, ip, rand_port, wait_port_listen + + +def sock_wait_drain(sock, max_wait=1000): + """Wait for all pending write data on the socket to get ACKed.""" + for _ in range(max_wait): + one = b'\0' * 4 + outq = fcntl.ioctl(sock.fileno(), termios.TIOCOUTQ, one) + outq = struct.unpack("I", outq)[0] + if outq == 0: + break + time.sleep(0.01) + ksft_eq(outq, 0) + + +def tcp_sock_get_retrans(sock): + """Get the number of retransmissions for the TCP socket.""" + info = sock.getsockopt(socket.SOL_TCP, socket.TCP_INFO, 512) + return struct.unpack("I", info[100:104])[0] + + +def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso): + cfg.require_cmd("socat", remote=True) + + port = rand_port() + listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof" + + with bkg(listen_cmd, host=cfg.remote) as nc: + wait_port_listen(port, host=cfg.remote) + + if ipver == "4": + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect((remote_v4, port)) + else: + sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + sock.connect((remote_v6, port)) + + # Small send to make sure the connection is working. + sock.send("ping".encode()) + sock_wait_drain(sock) + + # Send 4MB of data, record the LSO packet count. + qstat_old = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + buf = b"0" * 1024 * 1024 * 4 + sock.send(buf) + sock_wait_drain(sock) + qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + # No math behind the 10 here, but try to catch cases where + # TCP falls back to non-LSO. + ksft_lt(tcp_sock_get_retrans(sock), 10) + sock.close() + + # Check that at least 90% of the data was sent as LSO packets. + # System noise may cause false negatives. Also header overheads + # will add up to 5% of extra packes... The check is best effort. + total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"] + total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"] + if should_lso: + if cfg.have_stat_super_count: + ksft_ge(qstat_new['tx-hw-gso-packets'] - + qstat_old['tx-hw-gso-packets'], + total_lso_super, + comment="Number of LSO super-packets with LSO enabled") + if cfg.have_stat_wire_count: + ksft_ge(qstat_new['tx-hw-gso-wire-packets'] - + qstat_old['tx-hw-gso-wire-packets'], + total_lso_wire, + comment="Number of LSO wire-packets with LSO enabled") + else: + if cfg.have_stat_super_count: + ksft_lt(qstat_new['tx-hw-gso-packets'] - + qstat_old['tx-hw-gso-packets'], + 15, comment="Number of LSO super-packets with LSO disabled") + if cfg.have_stat_wire_count: + ksft_lt(qstat_new['tx-hw-gso-wire-packets'] - + qstat_old['tx-hw-gso-wire-packets'], + 500, comment="Number of LSO wire-packets with LSO disabled") + + +def build_tunnel(cfg, outer_ipver, tun_info): + local_v4 = NetDrvEpEnv.nsim_v4_pfx + "1" + local_v6 = NetDrvEpEnv.nsim_v6_pfx + "1" + remote_v4 = NetDrvEpEnv.nsim_v4_pfx + "2" + remote_v6 = NetDrvEpEnv.nsim_v6_pfx + "2" + + local_addr = cfg.addr_v[outer_ipver] + remote_addr = cfg.remote_addr_v[outer_ipver] + + tun_type = tun_info[0] + tun_arg = tun_info[2] + ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}") + defer(ip, f"link del {tun_type}-ksft") + ip(f"link set dev {tun_type}-ksft up") + ip(f"addr add {local_v4}/24 dev {tun_type}-ksft") + ip(f"addr add {local_v6}/64 dev {tun_type}-ksft") + + ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}", + host=cfg.remote) + defer(ip, f"link del {tun_type}-ksft", host=cfg.remote) + ip(f"link set dev {tun_type}-ksft up", host=cfg.remote) + ip(f"addr add {remote_v4}/24 dev {tun_type}-ksft", host=cfg.remote) + ip(f"addr add {remote_v6}/64 dev {tun_type}-ksft", host=cfg.remote) + + return remote_v4, remote_v6 + + +def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None): + """Construct specific tests from the common template.""" + def f(cfg): + cfg.require_ipver(outer_ipver) + + if not cfg.have_stat_super_count and \ + not cfg.have_stat_wire_count: + raise KsftSkipEx(f"Device does not support LSO queue stats") + + ipver = outer_ipver + if tun: + remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun) + ipver = inner_ipver + else: + remote_v4 = cfg.remote_addr_v["4"] + remote_v6 = cfg.remote_addr_v["6"] + + tun_partial = tun and tun[1] + # Tunnel which can silently fall back to gso-partial + has_gso_partial = tun and 'tx-gso-partial' in cfg.features + + # For TSO4 via partial we need mangleid + if ipver == "4" and feature in cfg.partial_features: + ksft_pr("Testing with mangleid enabled") + if 'tx-tcp-mangleid-segmentation' not in cfg.features: + ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on") + defer(ethtool, f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off") + + # First test without the feature enabled. + ethtool(f"-K {cfg.ifname} {feature} off") + if has_gso_partial: + ethtool(f"-K {cfg.ifname} tx-gso-partial off") + run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False) + + # Now test with the feature enabled. + # For compatible tunnels only - just GSO partial, not specific feature. + if has_gso_partial: + ethtool(f"-K {cfg.ifname} tx-gso-partial on") + run_one_stream(cfg, ipver, remote_v4, remote_v6, + should_lso=tun_partial) + + # Full feature enabled. + if feature in cfg.features: + ethtool(f"-K {cfg.ifname} {feature} on") + run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True) + else: + raise KsftXfailEx(f"Device does not support {feature}") + + f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver + return f + + +def query_nic_features(cfg) -> None: + """Query and cache the NIC features.""" + cfg.have_stat_super_count = False + cfg.have_stat_wire_count = False + + cfg.features = set() + features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) + for f in features["active"]["bits"]["bit"]: + cfg.features.add(f["name"]) + + # Check which features are supported via GSO partial + cfg.partial_features = set() + if 'tx-gso-partial' in cfg.features: + ethtool(f"-K {cfg.ifname} tx-gso-partial off") + + no_partial = set() + features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) + for f in features["active"]["bits"]["bit"]: + no_partial.add(f["name"]) + cfg.partial_features = cfg.features - no_partial + ethtool(f"-K {cfg.ifname} tx-gso-partial on") + + stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True) + if stats: + if 'tx-hw-gso-packets' in stats[0]: + ksft_pr("Detected qstat for LSO super-packets") + cfg.have_stat_super_count = True + if 'tx-hw-gso-wire-packets' in stats[0]: + ksft_pr("Detected qstat for LSO wire-packets") + cfg.have_stat_wire_count = True + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + + query_nic_features(cfg) + + test_info = ( + # name, v4/v6 ethtool_feature tun:(type, partial, args) + ("", "4", "tx-tcp-segmentation", None), + ("", "6", "tx-tcp6-segmentation", None), + ("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")), + ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")), + ("gre", "4", "tx-gre-segmentation", ("ipgre", False, "")), + ("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")), + ) + + cases = [] + for outer_ipver in ["4", "6"]: + for info in test_info: + # Skip if test which only works for a specific IP version + if info[1] and outer_ipver != info[1]: + continue + + cases.append(test_builder(info[0], cfg, outer_ipver, info[2], + tun=info[3], inner_ipver="4")) + if info[3]: + cases.append(test_builder(info[0], cfg, outer_ipver, info[2], + tun=info[3], inner_ipver="6")) + + ksft_run(cases=cases, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c b/tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c new file mode 100644 index 000000000000..d988b2e0cee8 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define KBUILD_MODNAME "xdp_dummy" +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +SEC("xdp") +int xdp_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 987e452d3a45..fd4d674e6c72 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -10,42 +10,68 @@ from lib.py import NetNS, NetdevSimDev from .remote import Remote -def _load_env_file(src_path): - env = os.environ.copy() +class NetDrvEnvBase: + """ + Base class for a NIC / host envirnoments + """ + def __init__(self, src_path): + self.src_path = src_path + self.env = self._load_env_file() - src_dir = Path(src_path).parent.resolve() - if not (src_dir / "net.config").exists(): + def rpath(self, path): + """ + Get an absolute path to a file based on a path relative to the directory + containing the test which constructed env. + + For example, if the test.py is in the same directory as + a binary (built from helper.c), the test can use env.rpath("helper") + to get the absolute path to the binary + """ + src_dir = Path(self.src_path).parent.resolve() + return (src_dir / path).as_posix() + + def _load_env_file(self): + env = os.environ.copy() + + src_dir = Path(self.src_path).parent.resolve() + if not (src_dir / "net.config").exists(): + return ksft_setup(env) + + with open((src_dir / "net.config").as_posix(), 'r') as fp: + for line in fp.readlines(): + full_file = line + # Strip comments + pos = line.find("#") + if pos >= 0: + line = line[:pos] + line = line.strip() + if not line: + continue + pair = line.split('=', maxsplit=1) + if len(pair) != 2: + raise Exception("Can't parse configuration line:", full_file) + env[pair[0]] = pair[1] return ksft_setup(env) - with open((src_dir / "net.config").as_posix(), 'r') as fp: - for line in fp.readlines(): - full_file = line - # Strip comments - pos = line.find("#") - if pos >= 0: - line = line[:pos] - line = line.strip() - if not line: - continue - pair = line.split('=', maxsplit=1) - if len(pair) != 2: - raise Exception("Can't parse configuration line:", full_file) - env[pair[0]] = pair[1] - return ksft_setup(env) - - -class NetDrvEnv: + +class NetDrvEnv(NetDrvEnvBase): """ Class for a single NIC / host env, with no remote end """ - def __init__(self, src_path, **kwargs): - self._ns = None + def __init__(self, src_path, nsim_test=None, **kwargs): + super().__init__(src_path) - self.env = _load_env_file(src_path) + self._ns = None if 'NETIF' in self.env: - self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0] + if nsim_test is True: + raise KsftXfailEx("Test only works on netdevsim") + + self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0] else: + if nsim_test is False: + raise KsftXfailEx("Test does not work on netdevsim") + self._ns = NetdevSimDev(**kwargs) self.dev = self._ns.nsims[0].dev self.ifname = self.dev['ifname'] @@ -68,7 +94,7 @@ class NetDrvEnv: self._ns = None -class NetDrvEpEnv: +class NetDrvEpEnv(NetDrvEnvBase): """ Class for an environment with a local device and "remote endpoint" which can be used to send traffic in. @@ -82,8 +108,7 @@ class NetDrvEpEnv: nsim_v6_pfx = "2001:db8::" def __init__(self, src_path, nsim_test=None): - - self.env = _load_env_file(src_path) + super().__init__(src_path) self._stats_settle_time = None @@ -94,17 +119,20 @@ class NetDrvEpEnv: self._ns = None self._ns_peer = None + self.addr_v = { "4": None, "6": None } + self.remote_addr_v = { "4": None, "6": None } + if "NETIF" in self.env: if nsim_test is True: raise KsftXfailEx("Test only works on netdevsim") self._check_env() - self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0] + self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0] - self.v4 = self.env.get("LOCAL_V4") - self.v6 = self.env.get("LOCAL_V6") - self.remote_v4 = self.env.get("REMOTE_V4") - self.remote_v6 = self.env.get("REMOTE_V6") + self.addr_v["4"] = self.env.get("LOCAL_V4") + self.addr_v["6"] = self.env.get("LOCAL_V6") + self.remote_addr_v["4"] = self.env.get("REMOTE_V4") + self.remote_addr_v["6"] = self.env.get("REMOTE_V6") kind = self.env["REMOTE_TYPE"] args = self.env["REMOTE_ARGS"] else: @@ -115,26 +143,29 @@ class NetDrvEpEnv: self.dev = self._ns.nsims[0].dev - self.v4 = self.nsim_v4_pfx + "1" - self.v6 = self.nsim_v6_pfx + "1" - self.remote_v4 = self.nsim_v4_pfx + "2" - self.remote_v6 = self.nsim_v6_pfx + "2" + self.addr_v["4"] = self.nsim_v4_pfx + "1" + self.addr_v["6"] = self.nsim_v6_pfx + "1" + self.remote_addr_v["4"] = self.nsim_v4_pfx + "2" + self.remote_addr_v["6"] = self.nsim_v6_pfx + "2" kind = "netns" args = self._netns.name self.remote = Remote(kind, args, src_path) - self.addr = self.v6 if self.v6 else self.v4 - self.remote_addr = self.remote_v6 if self.remote_v6 else self.remote_v4 + self.addr_ipver = "6" if self.addr_v["6"] else "4" + self.addr = self.addr_v[self.addr_ipver] + self.remote_addr = self.remote_addr_v[self.addr_ipver] - self.addr_ipver = "6" if self.v6 else "4" # Bracketed addresses, some commands need IPv6 to be inside [] - self.baddr = f"[{self.v6}]" if self.v6 else self.v4 - self.remote_baddr = f"[{self.remote_v6}]" if self.remote_v6 else self.remote_v4 + self.baddr = f"[{self.addr_v['6']}]" if self.addr_v["6"] else self.addr_v["4"] + self.remote_baddr = f"[{self.remote_addr_v['6']}]" if self.remote_addr_v["6"] else self.remote_addr_v["4"] self.ifname = self.dev['ifname'] self.ifindex = self.dev['ifindex'] + # resolve remote interface name + self.remote_ifname = self.resolve_remote_ifc() + self._required_cmd = {} def create_local(self): @@ -181,6 +212,18 @@ class NetDrvEpEnv: raise Exception("Invalid environment, missing configuration:", missing, "Please see tools/testing/selftests/drivers/net/README.rst") + def resolve_remote_ifc(self): + v4 = v6 = None + if self.remote_addr_v["4"]: + v4 = ip("addr show to " + self.remote_addr_v["4"], json=True, host=self.remote) + if self.remote_addr_v["6"]: + v6 = ip("addr show to " + self.remote_addr_v["6"], json=True, host=self.remote) + if v4 and v6 and v4[0]["ifname"] != v6[0]["ifname"]: + raise Exception("Can't resolve remote interface name, v4 and v6 don't match") + if (v4 and len(v4) > 1) or (v6 and len(v6) > 1): + raise Exception("Can't resolve remote interface name, multiple interfaces match") + return v6[0]["ifname"] if v6 else v4[0]["ifname"] + def __enter__(self): return self @@ -204,13 +247,9 @@ class NetDrvEpEnv: del self.remote self.remote = None - def require_v4(self): - if not self.v4 or not self.remote_v4: - raise KsftSkipEx("Test requires IPv4 connectivity") - - def require_v6(self): - if not self.v6 or not self.remote_v6: - raise KsftSkipEx("Test requires IPv6 connectivity") + def require_ipver(self, ipver): + if not self.addr_v[ipver] or not self.remote_addr_v[ipver]: + raise KsftSkipEx(f"Test requires IPv{ipver} connectivity") def _require_cmd(self, comm, key, host=None): cached = self._required_cmd.get(comm, {}) diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 3acaba41ac7b..3c96b022954d 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -110,6 +110,13 @@ function create_dynamic_target() { echo 1 > "${NETCONS_PATH}"/enabled } +# Do not append the release to the header of the message +function disable_release_append() { + echo 0 > "${NETCONS_PATH}"/enabled + echo 0 > "${NETCONS_PATH}"/release + echo 1 > "${NETCONS_PATH}"/enabled +} + function cleanup() { local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device" @@ -223,3 +230,20 @@ function check_for_dependencies() { exit "${ksft_skip}" fi } + +function check_for_taskset() { + if ! which taskset > /dev/null ; then + echo "SKIP: taskset(1) is not available" >&2 + exit "${ksft_skip}" + fi +} + +# This is necessary if running multiple tests in a row +function pkill_socat() { + PROCESS_NAME="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}" + # socat runs under timeout(1), kill it if it is still alive + # do not fail if socat doesn't exist anymore + set +e + pkill -f "${PROCESS_NAME}" + set -e +} diff --git a/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh new file mode 100755 index 000000000000..4a71e01a230c --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh @@ -0,0 +1,122 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# Test netconsole's message fragmentation functionality. +# +# When a message exceeds the maximum packet size, netconsole splits it into +# multiple fragments for transmission. This test verifies: +# - Correct fragmentation of large messages +# - Proper reassembly of fragments at the receiver +# - Preservation of userdata across fragments +# - Behavior with and without kernel release version appending +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# set userdata to a long value. In this case, it is "1-2-3-4...50-" +USERDATA_VALUE=$(printf -- '%.2s-' {1..60}) + +# Convert the header string in a regexp, so, we can remove +# the second header as well. +# A header looks like "13,468,514729715,-,ncfrag=0/1135;". If +# release is appended, you might find something like:L +# "6.13.0-04048-g4f561a87745a,13,468,514729715,-,ncfrag=0/1135;" +function header_to_regex() { + # header is everything before ; + local HEADER="${1}" + REGEX=$(echo "${HEADER}" | cut -d'=' -f1) + echo "${REGEX}=[0-9]*\/[0-9]*;" +} + +# We have two headers in the message. Remove both to get the full message, +# and extract the full message. +function extract_msg() { + local MSGFILE="${1}" + # Extract the header, which is the very first thing that arrives in the + # first list. + HEADER=$(sed -n '1p' "${MSGFILE}" | cut -d';' -f1) + HEADER_REGEX=$(header_to_regex "${HEADER}") + + # Remove the two headers from the received message + # This will return the message without any header, similarly to what + # was sent. + sed "s/""${HEADER_REGEX}""//g" "${MSGFILE}" +} + +# Validate the message, which has two messages glued together. +# unwrap them to make sure all the characters were transmitted. +# File will look like the following: +# 13,468,514729715,-,ncfrag=0/1135;<message> +# key=<part of key>-13,468,514729715,-,ncfrag=967/1135;<rest of the key> +function validate_fragmented_result() { + # Discard the netconsole headers, and assemble the full message + RCVMSG=$(extract_msg "${1}") + + # check for the main message + if ! echo "${RCVMSG}" | grep -q "${MSG}"; then + echo "Message body doesn't match." >&2 + echo "msg received=" "${RCVMSG}" >&2 + exit "${ksft_fail}" + fi + + # check userdata + if ! echo "${RCVMSG}" | grep -q "${USERDATA_VALUE}"; then + echo "message userdata doesn't match" >&2 + echo "msg received=" "${RCVMSG}" >&2 + exit "${ksft_fail}" + fi + # test passed. hooray +} + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create a dynamic target for netconsole +create_dynamic_target +# Set userdata "key" with the "value" value +set_user_data + + +# TEST 1: Send message and userdata. They will fragment +# ======= +MSG=$(printf -- 'MSG%.3s=' {1..150}) + +# Listen for netconsole port inside the namespace and destination interface +listen_port_and_save_to "${OUTPUT_FILE}" & +# Wait for socat to start and listen to the port. +wait_local_port_listen "${NAMESPACE}" "${PORT}" udp +# Send the message +echo "${MSG}: ${TARGET}" > /dev/kmsg +# Wait until socat saves the file to disk +busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +# Check if the message was not corrupted +validate_fragmented_result "${OUTPUT_FILE}" + +# TEST 2: Test with smaller message, and without release appended +# ======= +MSG=$(printf -- 'FOOBAR%.3s=' {1..100}) +# Let's disable release and test again. +disable_release_append + +listen_port_and_save_to "${OUTPUT_FILE}" & +wait_local_port_listen "${NAMESPACE}" "${PORT}" udp +echo "${MSG}: ${TARGET}" > /dev/kmsg +busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +validate_fragmented_result "${OUTPUT_FILE}" +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netcons_sysdata.sh new file mode 100755 index 000000000000..a737e377bf08 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_sysdata.sh @@ -0,0 +1,242 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# A test that makes sure that sysdata runtime CPU data is properly set +# when a message is sent. +# +# There are 3 different tests, every time sent using a random CPU. +# - Test #1 +# * Only enable cpu_nr sysdata feature. +# - Test #2 +# * Keep cpu_nr sysdata feature enable and enable userdata. +# - Test #3 +# * keep userdata enabled, and disable sysdata cpu_nr feature. +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +# Enable the sysdata cpu_nr feature +function set_cpu_nr() { + if [[ ! -f "${NETCONS_PATH}/userdata/cpu_nr_enabled" ]] + then + echo "Populate CPU configfs path not available in ${NETCONS_PATH}/userdata/cpu_nr_enabled" >&2 + exit "${ksft_skip}" + fi + + echo 1 > "${NETCONS_PATH}/userdata/cpu_nr_enabled" +} + +# Enable the taskname to be appended to sysdata +function set_taskname() { + if [[ ! -f "${NETCONS_PATH}/userdata/taskname_enabled" ]] + then + echo "Not able to enable taskname sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/taskname_enabled" >&2 + exit "${ksft_skip}" + fi + + echo 1 > "${NETCONS_PATH}/userdata/taskname_enabled" +} + +# Enable the release to be appended to sysdata +function set_release() { + if [[ ! -f "${NETCONS_PATH}/userdata/release_enabled" ]] + then + echo "Not able to enable release sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/release_enabled" >&2 + exit "${ksft_skip}" + fi + + echo 1 > "${NETCONS_PATH}/userdata/release_enabled" +} + +# Disable the sysdata cpu_nr feature +function unset_cpu_nr() { + echo 0 > "${NETCONS_PATH}/userdata/cpu_nr_enabled" +} + +# Once called, taskname=<..> will not be appended anymore +function unset_taskname() { + echo 0 > "${NETCONS_PATH}/userdata/taskname_enabled" +} + +function unset_release() { + echo 0 > "${NETCONS_PATH}/userdata/release_enabled" +} + +# Test if MSG contains sysdata +function validate_sysdata() { + # OUTPUT_FILE will contain something like: + # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM + # userdatakey=userdatavalue + # cpu=X + # taskname=<taskname> + + # Echo is what this test uses to create the message. See runtest() + # function + SENDER="echo" + + if [ ! -f "$OUTPUT_FILE" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then + echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + # Check if cpu=XX exists in the file and matches the one used + # in taskset(1) + if ! grep -q "cpu=${CPU}\+" "${OUTPUT_FILE}"; then + echo "FAIL: 'cpu=${CPU}' not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "taskname=${SENDER}" "${OUTPUT_FILE}"; then + echo "FAIL: 'taskname=echo' not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + rm "${OUTPUT_FILE}" + pkill_socat +} + +function validate_release() { + RELEASE=$(uname -r) + + if [ ! -f "$OUTPUT_FILE" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "release=${RELEASE}" "${OUTPUT_FILE}"; then + echo "FAIL: 'release=${RELEASE}' not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi +} + +# Test if MSG content exists in OUTPUT_FILE but no `cpu=` and `taskname=` +# strings +function validate_no_sysdata() { + if [ ! -f "$OUTPUT_FILE" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then + echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + if grep -q "cpu=" "${OUTPUT_FILE}"; then + echo "FAIL: 'cpu= found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + if grep -q "taskname=" "${OUTPUT_FILE}"; then + echo "FAIL: 'taskname= found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + if grep -q "release=" "${OUTPUT_FILE}"; then + echo "FAIL: 'release= found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + rm "${OUTPUT_FILE}" +} + +# Start socat, send the message and wait for the file to show up in the file +# system +function runtest { + # Listen for netconsole port inside the namespace and destination + # interface + listen_port_and_save_to "${OUTPUT_FILE}" & + # Wait for socat to start and listen to the port. + wait_local_port_listen "${NAMESPACE}" "${PORT}" udp + # Send the message + taskset -c "${CPU}" echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +} + +# ========== # +# Start here # +# ========== # + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# Check for basic system dependency and exit if not found +check_for_dependencies +# This test also depends on taskset(1). Check for it before starting the test +check_for_taskset + +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create a dynamic target for netconsole +create_dynamic_target + +#==================================================== +# TEST #1 +# Send message from a random CPU +#==================================================== +# Random CPU in the system +CPU=$((RANDOM % $(nproc))) +OUTPUT_FILE="/tmp/${TARGET}_1" +MSG="Test #1 from CPU${CPU}" +# Enable the auto population of cpu_nr +set_cpu_nr +# Enable taskname to be appended to sysdata +set_taskname +set_release +runtest +# Make sure the message was received in the dst part +# and exit +validate_release +validate_sysdata + +#==================================================== +# TEST #2 +# This test now adds userdata together with sysdata +# =================================================== +# Get a new random CPU +CPU=$((RANDOM % $(nproc))) +OUTPUT_FILE="/tmp/${TARGET}_2" +MSG="Test #2 from CPU${CPU}" +set_user_data +runtest +validate_release +validate_sysdata + +# =================================================== +# TEST #3 +# Unset all sysdata, fail if any userdata is set +# =================================================== +CPU=$((RANDOM % $(nproc))) +OUTPUT_FILE="/tmp/${TARGET}_3" +MSG="Test #3 from CPU${CPU}" +unset_cpu_nr +unset_taskname +unset_release +runtest +# At this time, cpu= shouldn't be present in the msg +validate_no_sysdata + +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index fc69bfcc37c4..93120e86e102 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -13,20 +13,20 @@ remote_ifname="" no_sleep=False def _test_v4(cfg) -> None: - cfg.require_v4() + cfg.require_ipver("4") - cmd(f"ping -c 1 -W0.5 {cfg.remote_v4}") - cmd(f"ping -c 1 -W0.5 {cfg.v4}", host=cfg.remote) - cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.remote_v4}") - cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.v4}", host=cfg.remote) + cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"]) + cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.remote_addr_v["4"]) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) def _test_v6(cfg) -> None: - cfg.require_v6() + cfg.require_ipver("6") - cmd(f"ping -c 1 -W5 {cfg.remote_v6}") - cmd(f"ping -c 1 -W5 {cfg.v6}", host=cfg.remote) - cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.remote_v6}") - cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.v6}", host=cfg.remote) + cmd("ping -c 1 -W5 " + cfg.remote_addr_v["6"]) + cmd("ping -c 1 -W5 " + cfg.addr_v["6"], host=cfg.remote) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.remote_addr_v["6"]) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote) def _test_tcp(cfg) -> None: cfg.require_cmd("socat", remote=True) @@ -126,7 +126,7 @@ def get_interface_info(cfg) -> None: global remote_ifname global no_sleep - remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_v4} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout + remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_addr_v['4']} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout remote_ifname = remote_info.rstrip('\n') if remote_ifname == "": raise KsftFailEx('Can not get remote interface') diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index 8a518905a9f9..cae923f84f69 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -2,13 +2,15 @@ # SPDX-License-Identifier: GPL-2.0 from lib.py import ksft_disruptive, ksft_exit, ksft_run -from lib.py import ksft_eq, ksft_raises, KsftSkipEx +from lib.py import ksft_eq, ksft_not_in, ksft_raises, KsftSkipEx, KsftFailEx from lib.py import EthtoolFamily, NetdevFamily, NlError from lib.py import NetDrvEnv -from lib.py import cmd, defer, ip +from lib.py import bkg, cmd, defer, ip import errno import glob - +import os +import socket +import struct def sys_get_queues(ifname, qtype='rx') -> int: folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*') @@ -22,6 +24,40 @@ def nl_get_queues(cfg, nl, qtype='rx'): return None +def check_xsk(cfg, nl, xdp_queue_id=0) -> None: + # Probe for support + xdp = cmd(cfg.rpath("xdp_helper") + ' - -', fail=False) + if xdp.ret == 255: + raise KsftSkipEx('AF_XDP unsupported') + elif xdp.ret > 0: + raise KsftFailEx('unable to create AF_XDP socket') + + with bkg(f'{cfg.rpath("xdp_helper")} {cfg.ifindex} {xdp_queue_id}', + ksft_wait=3): + + rx = tx = False + + queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True) + if not queues: + raise KsftSkipEx("Netlink reports no queues") + + for q in queues: + if q['id'] == 0: + if q['type'] == 'rx': + rx = True + if q['type'] == 'tx': + tx = True + + ksft_eq(q.get('xsk', None), {}, + comment="xsk attr on queue we configured") + else: + ksft_not_in('xsk', q, + comment="xsk attr on queue we didn't configure") + + ksft_eq(rx, True) + ksft_eq(tx, True) + + def get_queues(cfg, nl) -> None: snl = NetdevFamily(recv_size=4096) @@ -80,7 +116,8 @@ def check_down(cfg, nl) -> None: def main() -> None: with NetDrvEnv(__file__, queue_count=100) as cfg: - ksft_run([get_queues, addremove_queues, check_down], args=(cfg, NetdevFamily())) + ksft_run([get_queues, addremove_queues, check_down, check_xsk], + args=(cfg, NetdevFamily())) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/xdp_helper.c b/tools/testing/selftests/drivers/net/xdp_helper.c new file mode 100644 index 000000000000..aeed25914104 --- /dev/null +++ b/tools/testing/selftests/drivers/net/xdp_helper.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <linux/if_xdp.h> +#include <linux/if_link.h> +#include <net/if.h> +#include <inttypes.h> + +#define UMEM_SZ (1U << 16) +#define NUM_DESC (UMEM_SZ / 2048) + +/* Move this to a common header when reused! */ +static void ksft_ready(void) +{ + const char msg[7] = "ready\n"; + char *env_str; + int fd; + + env_str = getenv("KSFT_READY_FD"); + if (env_str) { + fd = atoi(env_str); + if (!fd) { + fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n", + env_str); + return; + } + } else { + fd = STDOUT_FILENO; + } + + write(fd, msg, sizeof(msg)); + if (fd != STDOUT_FILENO) + close(fd); +} + +static void ksft_wait(void) +{ + char *env_str; + char byte; + int fd; + + env_str = getenv("KSFT_WAIT_FD"); + if (env_str) { + fd = atoi(env_str); + if (!fd) { + fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n", + env_str); + return; + } + } else { + /* Not running in KSFT env, wait for input from STDIN instead */ + fd = STDIN_FILENO; + } + + read(fd, &byte, sizeof(byte)); + if (fd != STDIN_FILENO) + close(fd); +} + +/* this is a simple helper program that creates an XDP socket and does the + * minimum necessary to get bind() to succeed. + * + * this test program is not intended to actually process packets, but could be + * extended in the future if that is actually needed. + * + * it is used by queues.py to ensure the xsk netlinux attribute is set + * correctly. + */ +int main(int argc, char **argv) +{ + struct xdp_umem_reg umem_reg = { 0 }; + struct sockaddr_xdp sxdp = { 0 }; + int num_desc = NUM_DESC; + void *umem_area; + int ifindex; + int sock_fd; + int queue; + + if (argc != 3) { + fprintf(stderr, "Usage: %s ifindex queue_id\n", argv[0]); + return 1; + } + + sock_fd = socket(AF_XDP, SOCK_RAW, 0); + if (sock_fd < 0) { + perror("socket creation failed"); + /* if the kernel doesn't support AF_XDP, let the test program + * know with -1. All other error paths return 1. + */ + if (errno == EAFNOSUPPORT) + return -1; + return 1; + } + + /* "Probing mode", just checking if AF_XDP sockets are supported */ + if (!strcmp(argv[1], "-") && !strcmp(argv[2], "-")) { + printf("AF_XDP support detected\n"); + close(sock_fd); + return 0; + } + + ifindex = atoi(argv[1]); + queue = atoi(argv[2]); + + umem_area = mmap(NULL, UMEM_SZ, PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (umem_area == MAP_FAILED) { + perror("mmap failed"); + return 1; + } + + umem_reg.addr = (uintptr_t)umem_area; + umem_reg.len = UMEM_SZ; + umem_reg.chunk_size = 2048; + umem_reg.headroom = 0; + + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg, + sizeof(umem_reg)); + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_FILL_RING, &num_desc, + sizeof(num_desc)); + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &num_desc, + sizeof(num_desc)); + setsockopt(sock_fd, SOL_XDP, XDP_RX_RING, &num_desc, sizeof(num_desc)); + + sxdp.sxdp_family = AF_XDP; + sxdp.sxdp_ifindex = ifindex; + sxdp.sxdp_queue_id = queue; + sxdp.sxdp_flags = 0; + + if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) != 0) { + munmap(umem_area, UMEM_SZ); + perror("bind failed"); + close(sock_fd); + return 1; + } + + ksft_ready(); + ksft_wait(); + + /* parent program will write a byte to stdin when its ready for this + * helper to exit + */ + + close(sock_fd); + return 0; +} diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c index 46d289611ce8..f048042e53e9 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -26,13 +26,12 @@ static const char *const known_fs[] = { "hfsplus", "hostfs", "hpfs", "hugetlbfs", "ibmasmfs", "iomem", "ipathfs", "iso9660", "jffs2", "jfs", "minix", "mqueue", "msdos", "nfs", "nfs4", "nfsd", "nilfs2", "nsfs", "ntfs", "ntfs3", "ocfs2", - "ocfs2_dlmfs", "ocxlflash", "omfs", "openpromfs", "overlay", "pipefs", - "proc", "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", - "resctrl", "romfs", "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem", - "securityfs", "selinuxfs", "smackfs", "smb3", "sockfs", "spufs", - "squashfs", "sysfs", "sysv", "tmpfs", "tracefs", "ubifs", "udf", - "ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs", - "zonefs", NULL }; + "ocfs2_dlmfs", "omfs", "openpromfs", "overlay", "pipefs", "proc", + "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", "resctrl", "romfs", + "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem", "securityfs", + "selinuxfs", "smackfs", "smb3", "sockfs", "spufs", "squashfs", "sysfs", + "sysv", "tmpfs", "tracefs", "ubifs", "udf", "ufs", "v7", "vboxsf", + "vfat", "virtiofs", "vxfs", "xenfs", "xfs", "zonefs", NULL }; static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags) { diff --git a/tools/testing/selftests/ftrace/.gitignore b/tools/testing/selftests/ftrace/.gitignore index 2659417cb2c7..4d7fcb828850 100644 --- a/tools/testing/selftests/ftrace/.gitignore +++ b/tools/testing/selftests/ftrace/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only logs +poll diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc index 155792eaeee5..f271c4238b72 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc @@ -6,6 +6,7 @@ echo 0 > events/enable echo > dynamic_events +SUBSYSTEM=kmem TRACEPOINT1=kmem_cache_alloc TRACEPOINT2=kmem_cache_free @@ -24,4 +25,17 @@ grep -q myevent1 dynamic_events echo > dynamic_events +# auto naming check +echo "t $TRACEPOINT1" >> dynamic_events + +test -d events/tracepoints/$TRACEPOINT1 + +echo > dynamic_events + +# SUBSYSTEM is not supported +echo "t $SUBSYSTEM/$TRACEPOINT1" >> dynamic_events && exit_fail ||: +echo "t $SUBSYSTEM:$TRACEPOINT1" >> dynamic_events && exit_fail ||: +echo "t:myevent3 $SUBSYSTEM/$TRACEPOINT1" >> dynamic_events && exit_fail ||: +echo "t:myevent3 $SUBSYSTEM:$TRACEPOINT1" >> dynamic_events && exit_fail ||: + clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc index 86c76679c56e..f2048c244526 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc @@ -3,14 +3,18 @@ # description: Generic dynamic event - add/remove/test uprobe events # requires: uprobe_events +if ! which readelf > /dev/null 2>&1 ; then + echo "No readelf found. skipped." + exit_unresolved +fi + echo 0 > events/enable echo > dynamic_events REALBIN=`readlink -f /bin/sh` +ENTRYPOINT=`readelf -h ${REALBIN} | grep Entry | sed -e 's/[^0]*//'` -echo 'cat /proc/$$/maps' | /bin/sh | \ - grep "r-xp .*${REALBIN}$" | \ - awk '{printf "p:myevent %s:0x%s\n", $6,$3 }' >> uprobe_events +echo "p:myevent ${REALBIN}:${ENTRYPOINT}" >> uprobe_events grep -q myevent uprobe_events test -d events/uprobes/myevent diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc new file mode 100644 index 000000000000..6b94b678741a --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc @@ -0,0 +1,42 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Checking dynamic events limitations +# requires: dynamic_events "imm-value":README + +# Max arguments limitation +MAX_ARGS=128 +EXCEED_ARGS=$((MAX_ARGS + 1)) + +check_max_args() { # event_header + TEST_STRING=$1 + # Acceptable + for i in `seq 1 $MAX_ARGS`; do + TEST_STRING="$TEST_STRING \\$i" + done + echo "$TEST_STRING" >> dynamic_events + echo > dynamic_events + # Error + TEST_STRING="$TEST_STRING \\$EXCEED_ARGS" + ! echo "$TEST_STRING" >> dynamic_events + return 0 +} + +# Kprobe max args limitation +if grep -q "kprobe_events" README; then + check_max_args "p vfs_read" +fi + +# Fprobe max args limitation +if grep -q "f[:[<group>/][<event>]] <func-name>[%return] [<args>]" README; then + check_max_args "f vfs_read" +fi + +# Tprobe max args limitation +if grep -q "t[:[<group>/][<event>]] <tracepoint> [<args>]" README; then + check_max_args "t kfree" +fi + +# Uprobe max args limitation +if grep -q "uprobe_events" README; then + check_max_args "p /bin/sh:10" +fi diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc index c9425a34fae3..fee479295e2f 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc @@ -27,6 +27,7 @@ check_error 'f:^foo.1/bar vfs_read' # BAD_GROUP_NAME check_error 'f:^ vfs_read' # NO_EVENT_NAME check_error 'f:foo/^12345678901234567890123456789012345678901234567890123456789012345 vfs_read' # EVENT_TOO_LONG check_error 'f:foo/^bar.1 vfs_read' # BAD_EVENT_NAME +check_error 't kmem^/kfree' # BAD_TP_NAME check_error 'f vfs_read ^$stack10000' # BAD_STACK_NUM diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 84d6a9c7ad67..a1052bf460fc 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -156,7 +156,13 @@ check_requires() { # Check required files and tracers exit_unsupported fi elif [ "$r" != "$i" ]; then - if ! grep -Fq "$r" README ; then + # If this is an instance, check the top directory + if echo $TRACING_DIR | grep -q "/instances/"; then + test="$TRACING_DIR/../.." + else + test=$TRACING_DIR + fi + if ! grep -Fq "$r" $test/README ; then echo "Required feature pattern \"$r\" is not in README." exit_unsupported fi diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc index 1590d6bfb857..20a35fea13f8 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc @@ -2,6 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger expected fail actions # requires: set_event snapshot "snapshot()":README +# flags: instance fail() { #msg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc index 91339c130832..55ab0270e5f7 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc @@ -2,6 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger onchange action # requires: set_event "onchange(var)":README ping:program +# flags: instance fail() { #msg echo $1 @@ -19,4 +20,6 @@ if ! grep -q "changed:" events/sched/sched_waking/hist; then fail "Failed to create onchange action inter-event histogram" fi +echo '!hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio) if comm=="ping"' >> events/sched/sched_waking/trigger + exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc index 147967e86584..9eb37c2fa417 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc @@ -2,6 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger snapshot action # requires: set_event snapshot events/sched/sched_process_fork/hist "onchange(var)":README "snapshot()":README ping:program +# flags: instance fail() { #msg echo $1 @@ -27,4 +28,6 @@ if ! grep -q "comm=ping" snapshot; then fail "Failed to create snapshot action inter-event histogram" fi +echo '!hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio):onchange($newprio).snapshot() if comm=="ping"' >> events/sched/sched_waking/trigger + exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc index 05ffba299dbf..0ebda2068a00 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc @@ -2,6 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test histogram expression parsing # requires: set_event events/sched/sched_process_fork/trigger events/sched/sched_process_fork/hist error_log "<var1>=<field|var_ref|numeric_literal>":README +# flags: instance fail() { #msg diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index cdf91b0ca40f..c3b6d2604b1e 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -444,10 +444,6 @@ static inline __noreturn __printf(1, 2) void ksft_exit_skip(const char *msg, ... static inline int ksft_min_kernel_version(unsigned int min_major, unsigned int min_minor) { -#ifdef NOLIBC - ksft_print_msg("NOLIBC: Can't check kernel version: Function not implemented\n"); - return 0; -#else unsigned int major, minor; struct utsname info; @@ -455,7 +451,6 @@ static inline int ksft_min_kernel_version(unsigned int min_major, ksft_exit_fail_msg("Can't parse kernel version\n"); return major > min_major || (major == min_major && minor >= min_minor); -#endif } #endif /* __KSELFTEST_H */ diff --git a/tools/testing/selftests/kselftest/module.sh b/tools/testing/selftests/kselftest/module.sh index fb4733faff12..51fb65159932 100755 --- a/tools/testing/selftests/kselftest/module.sh +++ b/tools/testing/selftests/kselftest/module.sh @@ -11,7 +11,7 @@ # SPDX-License-Identifier: GPL-2.0+ # $(dirname $0)/../kselftest/module.sh "description" module_name # -# Example: tools/testing/selftests/lib/printf.sh +# Example: tools/testing/selftests/lib/bitmap.sh desc="" # Output prefix. module="" # Filename (without the .ko). diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 4277b983cace..f773f8f99249 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -69,6 +69,7 @@ TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush TEST_GEN_PROGS_x86 += x86/kvm_clock_test TEST_GEN_PROGS_x86 += x86/kvm_pv_test TEST_GEN_PROGS_x86 += x86/monitor_mwait_test +TEST_GEN_PROGS_x86 += x86/nested_emulation_test TEST_GEN_PROGS_x86 += x86/nested_exceptions_test TEST_GEN_PROGS_x86 += x86/platform_info_test TEST_GEN_PROGS_x86 += x86/pmu_counters_test diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 3c7defd34f56..447e619cf856 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -239,7 +239,7 @@ static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args) case ITERATION_MARK_IDLE: mark_vcpu_memory_idle(vm, vcpu_args); break; - }; + } vcpu_last_completed_iteration[vcpu_idx] = current_iteration; } diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index d43fb3f49050..d01798b6b3b4 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -332,6 +332,7 @@ static __u64 base_regs[] = { KVM_REG_ARM_FW_FEAT_BMAP_REG(0), /* KVM_REG_ARM_STD_BMAP */ KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */ KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */ + KVM_REG_ARM_FW_FEAT_BMAP_REG(3), /* KVM_REG_ARM_VENDOR_HYP_BMAP_2 */ ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ ARM64_SYS_REG(3, 3, 14, 0, 2), diff --git a/tools/testing/selftests/kvm/arm64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c index ec54ec7726e9..44cfcf8a7f46 100644 --- a/tools/testing/selftests/kvm/arm64/hypercalls.c +++ b/tools/testing/selftests/kvm/arm64/hypercalls.c @@ -21,22 +21,31 @@ #define KVM_REG_ARM_STD_BMAP_BIT_MAX 0 #define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX 0 #define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX 1 +#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_BIT_MAX 1 + +#define KVM_REG_ARM_STD_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_BMAP_BIT_MAX) +#define KVM_REG_ARM_STD_HYP_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX) +#define KVM_REG_ARM_VENDOR_HYP_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX) +#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_RESET_VAL 0 struct kvm_fw_reg_info { uint64_t reg; /* Register definition */ uint64_t max_feat_bit; /* Bit that represents the upper limit of the feature-map */ + uint64_t reset_val; /* Reset value for the register */ }; #define FW_REG_INFO(r) \ { \ .reg = r, \ .max_feat_bit = r##_BIT_MAX, \ + .reset_val = r##_RESET_VAL \ } static const struct kvm_fw_reg_info fw_reg_info[] = { FW_REG_INFO(KVM_REG_ARM_STD_BMAP), FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP), FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP), + FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP_2), }; enum test_stage { @@ -171,22 +180,39 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) { const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i]; + uint64_t set_val; - /* First 'read' should be an upper limit of the features supported */ + /* First 'read' should be the reset value for the reg */ val = vcpu_get_reg(vcpu, reg_info->reg); - TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), - "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx", - reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val); + TEST_ASSERT(val == reg_info->reset_val, + "Unexpected reset value for reg: 0x%lx; expected: 0x%lx; read: 0x%lx", + reg_info->reg, reg_info->reset_val, val); + + if (reg_info->reset_val) + set_val = 0; + else + set_val = FW_REG_ULIMIT_VAL(reg_info->max_feat_bit); - /* Test a 'write' by disabling all the features of the register map */ - ret = __vcpu_set_reg(vcpu, reg_info->reg, 0); + ret = __vcpu_set_reg(vcpu, reg_info->reg, set_val); TEST_ASSERT(ret == 0, - "Failed to clear all the features of reg: 0x%lx; ret: %d", - reg_info->reg, errno); + "Failed to %s all the features of reg: 0x%lx; ret: %d", + (set_val ? "set" : "clear"), reg_info->reg, errno); val = vcpu_get_reg(vcpu, reg_info->reg); - TEST_ASSERT(val == 0, - "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); + TEST_ASSERT(val == set_val, + "Expected all the features to be %s for reg: 0x%lx", + (set_val ? "set" : "cleared"), reg_info->reg); + + /* + * If the reg has been set, clear it as test_fw_regs_after_vm_start() + * expects it to be cleared. + */ + if (set_val) { + ret = __vcpu_set_reg(vcpu, reg_info->reg, 0); + TEST_ASSERT(ret == 0, + "Failed to clear all the features of reg: 0x%lx; ret: %d", + reg_info->reg, errno); + } /* * Test enabling a feature that's not supported. diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 217541fe6536..322b9d3b0125 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -146,6 +146,9 @@ static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = { static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0), + REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN4_2, 1), + REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN64_2, 1), + REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN16_2, 1), S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0), S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0), @@ -230,6 +233,9 @@ static void guest_code(void) GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1); GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1); GUEST_REG_SYNC(SYS_CTR_EL0); + GUEST_REG_SYNC(SYS_MIDR_EL1); + GUEST_REG_SYNC(SYS_REVIDR_EL1); + GUEST_REG_SYNC(SYS_AIDR_EL1); GUEST_DONE(); } @@ -609,18 +615,31 @@ static void test_ctr(struct kvm_vcpu *vcpu) test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr; } -static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu) +static void test_id_reg(struct kvm_vcpu *vcpu, u32 id) { u64 val; + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(id)); + val++; + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(id), val); + test_reg_vals[encoding_to_range_idx(id)] = val; +} + +static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu) +{ test_clidr(vcpu); test_ctr(vcpu); - val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1)); - val++; - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val); + test_id_reg(vcpu, SYS_MPIDR_EL1); + ksft_test_result_pass("%s\n", __func__); +} + +static void test_vcpu_non_ftr_id_regs(struct kvm_vcpu *vcpu) +{ + test_id_reg(vcpu, SYS_MIDR_EL1); + test_id_reg(vcpu, SYS_REVIDR_EL1); + test_id_reg(vcpu, SYS_AIDR_EL1); - test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val; ksft_test_result_pass("%s\n", __func__); } @@ -647,6 +666,9 @@ static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu) test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1); test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1); test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0); + test_assert_id_reg_unchanged(vcpu, SYS_MIDR_EL1); + test_assert_id_reg_unchanged(vcpu, SYS_REVIDR_EL1); + test_assert_id_reg_unchanged(vcpu, SYS_AIDR_EL1); ksft_test_result_pass("%s\n", __func__); } @@ -660,8 +682,11 @@ int main(void) int test_cnt; TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_WRITABLE_IMP_ID_REGS)); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_ARM_WRITABLE_IMP_ID_REGS, 0); + vcpu = vm_vcpu_add(vm, 0, guest_code); /* Check for AARCH64 only system */ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); @@ -675,13 +700,14 @@ int main(void) ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + - ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2 + + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 3 + MPAM_IDREG_TEST; ksft_set_plan(test_cnt); test_vm_ftr_id_regs(vcpu, aarch64_only); test_vcpu_ftr_id_regs(vcpu); + test_vcpu_non_ftr_id_regs(vcpu); test_user_set_mpam_reg(vcpu); test_guest_reg_read(vcpu); diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index aacf80f57439..23593d9eeba9 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -31,15 +31,18 @@ /* Default guest test virtual memory offset */ #define DEFAULT_GUEST_TEST_MEM 0xc0000000 -/* How many pages to dirty for each guest loop */ -#define TEST_PAGES_PER_LOOP 1024 - /* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */ #define TEST_HOST_LOOP_N 32UL /* Interval for each host loop (ms) */ #define TEST_HOST_LOOP_INTERVAL 10UL +/* + * Ensure the vCPU is able to perform a reasonable number of writes in each + * iteration to provide a lower bound on coverage. + */ +#define TEST_MIN_WRITES_PER_ITERATION 0x100 + /* Dirty bitmaps are always little endian, so we need to swap on big endian */ #if defined(__s390x__) # define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) @@ -75,6 +78,8 @@ static uint64_t host_page_size; static uint64_t guest_page_size; static uint64_t guest_num_pages; static uint64_t iteration; +static uint64_t nr_writes; +static bool vcpu_stop; /* * Guest physical memory offset of the testing memory slot. @@ -96,7 +101,9 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; static void guest_code(void) { uint64_t addr; - int i; + +#ifdef __s390x__ + uint64_t i; /* * On s390x, all pages of a 1M segment are initially marked as dirty @@ -107,16 +114,19 @@ static void guest_code(void) for (i = 0; i < guest_num_pages; i++) { addr = guest_test_virt_mem + i * guest_page_size; vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration)); + nr_writes++; } +#endif while (true) { - for (i = 0; i < TEST_PAGES_PER_LOOP; i++) { + while (!READ_ONCE(vcpu_stop)) { addr = guest_test_virt_mem; addr += (guest_random_u64(&guest_rng) % guest_num_pages) * guest_page_size; addr = align_down(addr, host_page_size); vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration)); + nr_writes++; } GUEST_SYNC(1); @@ -133,25 +143,18 @@ static uint64_t host_num_pages; /* For statistics only */ static uint64_t host_dirty_count; static uint64_t host_clear_count; -static uint64_t host_track_next_count; /* Whether dirty ring reset is requested, or finished */ static sem_t sem_vcpu_stop; static sem_t sem_vcpu_cont; -/* - * This is only set by main thread, and only cleared by vcpu thread. It is - * used to request vcpu thread to stop at the next GUEST_SYNC, since GUEST_SYNC - * is the only place that we'll guarantee both "dirty bit" and "dirty data" - * will match. E.g., SIG_IPI won't guarantee that if the vcpu is interrupted - * after setting dirty bit but before the data is written. - */ -static atomic_t vcpu_sync_stop_requested; + /* * This is updated by the vcpu thread to tell the host whether it's a * ring-full event. It should only be read until a sem_wait() of * sem_vcpu_stop and before vcpu continues to run. */ static bool dirty_ring_vcpu_ring_full; + /* * This is only used for verifying the dirty pages. Dirty ring has a very * tricky case when the ring just got full, kvm will do userspace exit due to @@ -166,7 +169,51 @@ static bool dirty_ring_vcpu_ring_full; * dirty gfn we've collected, so that if a mismatch of data found later in the * verifying process, we let it pass. */ -static uint64_t dirty_ring_last_page; +static uint64_t dirty_ring_last_page = -1ULL; + +/* + * In addition to the above, it is possible (especially if this + * test is run nested) for the above scenario to repeat multiple times: + * + * The following can happen: + * + * - L1 vCPU: Memory write is logged to PML but not committed. + * + * - L1 test thread: Ignores the write because its last dirty ring entry + * Resets the dirty ring which: + * - Resets the A/D bits in EPT + * - Issues tlb flush (invept), which is intercepted by L0 + * + * - L0: frees the whole nested ept mmu root as the response to invept, + * and thus ensures that when memory write is retried, it will fault again + * + * - L1 vCPU: Same memory write is logged to the PML but not committed again. + * + * - L1 test thread: Ignores the write because its last dirty ring entry (again) + * Resets the dirty ring which: + * - Resets the A/D bits in EPT (again) + * - Issues tlb flush (again) which is intercepted by L0 + * + * ... + * + * N times + * + * - L1 vCPU: Memory write is logged in the PML and then committed. + * Lots of other memory writes are logged and committed. + * ... + * + * - L1 test thread: Sees the memory write along with other memory writes + * in the dirty ring, and since the write is usually not + * the last entry in the dirty-ring and has a very outdated + * iteration, the test fails. + * + * + * Note that this is only possible when the write was the last log entry + * write during iteration N-1, thus remember last iteration last log entry + * and also don't fail when it is reported in the next iteration, together with + * an outdated iteration count. + */ +static uint64_t dirty_ring_prev_iteration_last_page; enum log_mode_t { /* Only use KVM_GET_DIRTY_LOG for logging */ @@ -191,24 +238,6 @@ static enum log_mode_t host_log_mode; static pthread_t vcpu_thread; static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT; -static void vcpu_kick(void) -{ - pthread_kill(vcpu_thread, SIG_IPI); -} - -/* - * In our test we do signal tricks, let's use a better version of - * sem_wait to avoid signal interrupts - */ -static void sem_wait_until(sem_t *sem) -{ - int ret; - - do - ret = sem_wait(sem); - while (ret == -1 && errno == EINTR); -} - static bool clear_log_supported(void) { return kvm_has_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); @@ -243,21 +272,16 @@ static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, /* Should only be called after a GUEST_SYNC */ static void vcpu_handle_sync_stop(void) { - if (atomic_read(&vcpu_sync_stop_requested)) { - /* It means main thread is sleeping waiting */ - atomic_set(&vcpu_sync_stop_requested, false); + if (READ_ONCE(vcpu_stop)) { sem_post(&sem_vcpu_stop); - sem_wait_until(&sem_vcpu_cont); + sem_wait(&sem_vcpu_cont); } } -static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) +static void default_after_vcpu_run(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR), - "vcpu run failed: errno=%d", err); - TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, "Invalid guest sync status: exit_reason=%s", exit_reason_str(run->exit_reason)); @@ -324,7 +348,6 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, "%u != %u", cur->slot, slot); TEST_ASSERT(cur->offset < num_pages, "Offset overflow: " "0x%llx >= 0x%x", cur->offset, num_pages); - //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset); __set_bit_le(cur->offset, bitmap); dirty_ring_last_page = cur->offset; dirty_gfn_set_collected(cur); @@ -335,36 +358,11 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, return count; } -static void dirty_ring_wait_vcpu(void) -{ - /* This makes sure that hardware PML cache flushed */ - vcpu_kick(); - sem_wait_until(&sem_vcpu_stop); -} - -static void dirty_ring_continue_vcpu(void) -{ - pr_info("Notifying vcpu to continue\n"); - sem_post(&sem_vcpu_cont); -} - static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, void *bitmap, uint32_t num_pages, uint32_t *ring_buf_idx) { - uint32_t count = 0, cleared; - bool continued_vcpu = false; - - dirty_ring_wait_vcpu(); - - if (!dirty_ring_vcpu_ring_full) { - /* - * This is not a ring-full event, it's safe to allow - * vcpu to continue - */ - dirty_ring_continue_vcpu(); - continued_vcpu = true; - } + uint32_t count, cleared; /* Only have one vcpu */ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu), @@ -379,35 +377,18 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, */ TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " "with collected (%u)", cleared, count); - - if (!continued_vcpu) { - TEST_ASSERT(dirty_ring_vcpu_ring_full, - "Didn't continue vcpu even without ring full"); - dirty_ring_continue_vcpu(); - } - - pr_info("Iteration %ld collected %u pages\n", iteration, count); } -static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) +static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; /* A ucall-sync or ring-full event is allowed */ if (get_ucall(vcpu, NULL) == UCALL_SYNC) { - /* We should allow this to continue */ - ; - } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL || - (ret == -1 && err == EINTR)) { - /* Update the flag first before pause */ - WRITE_ONCE(dirty_ring_vcpu_ring_full, - run->exit_reason == KVM_EXIT_DIRTY_RING_FULL); - sem_post(&sem_vcpu_stop); - pr_info("vcpu stops because %s...\n", - dirty_ring_vcpu_ring_full ? - "dirty ring is full" : "vcpu is kicked out"); - sem_wait_until(&sem_vcpu_cont); - pr_info("vcpu continues now.\n"); + vcpu_handle_sync_stop(); + } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) { + WRITE_ONCE(dirty_ring_vcpu_ring_full, true); + vcpu_handle_sync_stop(); } else { TEST_ASSERT(false, "Invalid guest sync status: " "exit_reason=%s", @@ -426,7 +407,7 @@ struct log_mode { void *bitmap, uint32_t num_pages, uint32_t *ring_buf_idx); /* Hook to call when after each vcpu run */ - void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err); + void (*after_vcpu_run)(struct kvm_vcpu *vcpu); } log_modes[LOG_MODE_NUM] = { { .name = "dirty-log", @@ -449,15 +430,6 @@ struct log_mode { }, }; -/* - * We use this bitmap to track some pages that should have its dirty - * bit set in the _next_ iteration. For example, if we detected the - * page value changed to current iteration but at the same time the - * page bit is cleared in the latest bitmap, then the system must - * report that write in the next get dirty log call. - */ -static unsigned long *host_bmap_track; - static void log_modes_dump(void) { int i; @@ -497,170 +469,109 @@ static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx); } -static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) +static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu) { struct log_mode *mode = &log_modes[host_log_mode]; if (mode->after_vcpu_run) - mode->after_vcpu_run(vcpu, ret, err); + mode->after_vcpu_run(vcpu); } static void *vcpu_worker(void *data) { - int ret; struct kvm_vcpu *vcpu = data; - uint64_t pages_count = 0; - struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset) - + sizeof(sigset_t)); - sigset_t *sigset = (sigset_t *) &sigmask->sigset; - /* - * SIG_IPI is unblocked atomically while in KVM_RUN. It causes the - * ioctl to return with -EINTR, but it is still pending and we need - * to accept it with the sigwait. - */ - sigmask->len = 8; - pthread_sigmask(0, NULL, sigset); - sigdelset(sigset, SIG_IPI); - vcpu_ioctl(vcpu, KVM_SET_SIGNAL_MASK, sigmask); - - sigemptyset(sigset); - sigaddset(sigset, SIG_IPI); + sem_wait(&sem_vcpu_cont); while (!READ_ONCE(host_quit)) { - /* Clear any existing kick signals */ - pages_count += TEST_PAGES_PER_LOOP; /* Let the guest dirty the random pages */ - ret = __vcpu_run(vcpu); - if (ret == -1 && errno == EINTR) { - int sig = -1; - sigwait(sigset, &sig); - assert(sig == SIG_IPI); - } - log_mode_after_vcpu_run(vcpu, ret, errno); + vcpu_run(vcpu); + log_mode_after_vcpu_run(vcpu); } - pr_info("Dirtied %"PRIu64" pages\n", pages_count); - return NULL; } -static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) +static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap) { + uint64_t page, nr_dirty_pages = 0, nr_clean_pages = 0; uint64_t step = vm_num_host_pages(mode, 1); - uint64_t page; - uint64_t *value_ptr; - uint64_t min_iter = 0; for (page = 0; page < host_num_pages; page += step) { - value_ptr = host_test_mem + page * host_page_size; - - /* If this is a special page that we were tracking... */ - if (__test_and_clear_bit_le(page, host_bmap_track)) { - host_track_next_count++; - TEST_ASSERT(test_bit_le(page, bmap), - "Page %"PRIu64" should have its dirty bit " - "set in this iteration but it is missing", - page); - } + uint64_t val = *(uint64_t *)(host_test_mem + page * host_page_size); + bool bmap0_dirty = __test_and_clear_bit_le(page, bmap[0]); - if (__test_and_clear_bit_le(page, bmap)) { - bool matched; - - host_dirty_count++; + /* + * Ensure both bitmaps are cleared, as a page can be written + * multiple times per iteration, i.e. can show up in both + * bitmaps, and the dirty ring is additive, i.e. doesn't purge + * bitmap entries from previous collections. + */ + if (__test_and_clear_bit_le(page, bmap[1]) || bmap0_dirty) { + nr_dirty_pages++; /* - * If the bit is set, the value written onto - * the corresponding page should be either the - * previous iteration number or the current one. + * If the page is dirty, the value written to memory + * should be the current iteration number. */ - matched = (*value_ptr == iteration || - *value_ptr == iteration - 1); - - if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) { - if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) { - /* - * Short answer: this case is special - * only for dirty ring test where the - * page is the last page before a kvm - * dirty ring full in iteration N-2. - * - * Long answer: Assuming ring size R, - * one possible condition is: - * - * main thr vcpu thr - * -------- -------- - * iter=1 - * write 1 to page 0~(R-1) - * full, vmexit - * collect 0~(R-1) - * kick vcpu - * write 1 to (R-1)~(2R-2) - * full, vmexit - * iter=2 - * collect (R-1)~(2R-2) - * kick vcpu - * write 1 to (2R-2) - * (NOTE!!! "1" cached in cpu reg) - * write 2 to (2R-1)~(3R-3) - * full, vmexit - * iter=3 - * collect (2R-2)~(3R-3) - * (here if we read value on page - * "2R-2" is 1, while iter=3!!!) - * - * This however can only happen once per iteration. - */ - min_iter = iteration - 1; + if (val == iteration) + continue; + + if (host_log_mode == LOG_MODE_DIRTY_RING) { + /* + * The last page in the ring from previous + * iteration can be written with the value + * from the previous iteration, as the value to + * be written may be cached in a CPU register. + */ + if (page == dirty_ring_prev_iteration_last_page && + val == iteration - 1) continue; - } else if (page == dirty_ring_last_page) { - /* - * Please refer to comments in - * dirty_ring_last_page. - */ + + /* + * Any value from a previous iteration is legal + * for the last entry, as the write may not yet + * have retired, i.e. the page may hold whatever + * it had before this iteration started. + */ + if (page == dirty_ring_last_page && + val < iteration) continue; - } + } else if (!val && iteration == 1 && bmap0_dirty) { + /* + * When testing get+clear, the dirty bitmap + * starts with all bits set, and so the first + * iteration can observe a "dirty" page that + * was never written, but only in the first + * bitmap (collecting the bitmap also clears + * all dirty pages). + */ + continue; } - TEST_ASSERT(matched, - "Set page %"PRIu64" value %"PRIu64 - " incorrect (iteration=%"PRIu64")", - page, *value_ptr, iteration); + TEST_FAIL("Dirty page %lu value (%lu) != iteration (%lu) " + "(last = %lu, prev_last = %lu)", + page, val, iteration, dirty_ring_last_page, + dirty_ring_prev_iteration_last_page); } else { - host_clear_count++; + nr_clean_pages++; /* * If cleared, the value written can be any - * value smaller or equals to the iteration - * number. Note that the value can be exactly - * (iteration-1) if that write can happen - * like this: - * - * (1) increase loop count to "iteration-1" - * (2) write to page P happens (with value - * "iteration-1") - * (3) get dirty log for "iteration-1"; we'll - * see that page P bit is set (dirtied), - * and not set the bit in host_bmap_track - * (4) increase loop count to "iteration" - * (which is current iteration) - * (5) get dirty log for current iteration, - * we'll see that page P is cleared, with - * value "iteration-1". + * value smaller than the iteration number. */ - TEST_ASSERT(*value_ptr <= iteration, - "Clear page %"PRIu64" value %"PRIu64 - " incorrect (iteration=%"PRIu64")", - page, *value_ptr, iteration); - if (*value_ptr == iteration) { - /* - * This page is _just_ modified; it - * should report its dirtyness in the - * next run - */ - __set_bit_le(page, host_bmap_track); - } + TEST_ASSERT(val < iteration, + "Clear page %lu value (%lu) >= iteration (%lu) " + "(last = %lu, prev_last = %lu)", + page, val, iteration, dirty_ring_last_page, + dirty_ring_prev_iteration_last_page); } } + + pr_info("Iteration %2ld: dirty: %-6lu clean: %-6lu writes: %-6lu\n", + iteration, nr_dirty_pages, nr_clean_pages, nr_writes); + + host_dirty_count += nr_dirty_pages; + host_clear_count += nr_clean_pages; } static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu, @@ -688,7 +599,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct test_params *p = arg; struct kvm_vcpu *vcpu; struct kvm_vm *vm; - unsigned long *bmap; + unsigned long *bmap[2]; uint32_t ring_buf_idx = 0; int sem_val; @@ -731,12 +642,21 @@ static void run_test(enum vm_guest_mode mode, void *arg) #ifdef __s390x__ /* Align to 1M (segment size) */ guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20); + + /* + * The workaround in guest_code() to write all pages prior to the first + * iteration isn't compatible with the dirty ring, as the dirty ring + * support relies on the vCPU to actually stop when vcpu_stop is set so + * that the vCPU doesn't hang waiting for the dirty ring to be emptied. + */ + TEST_ASSERT(host_log_mode != LOG_MODE_DIRTY_RING, + "Test needs to be updated to support s390 dirty ring"); #endif pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - bmap = bitmap_zalloc(host_num_pages); - host_bmap_track = bitmap_zalloc(host_num_pages); + bmap[0] = bitmap_zalloc(host_num_pages); + bmap[1] = bitmap_zalloc(host_num_pages); /* Add an extra memory slot for testing dirty logging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, @@ -757,14 +677,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) sync_global_to_guest(vm, guest_test_virt_mem); sync_global_to_guest(vm, guest_num_pages); - /* Start the iterations */ - iteration = 1; - sync_global_to_guest(vm, iteration); - WRITE_ONCE(host_quit, false); host_dirty_count = 0; host_clear_count = 0; - host_track_next_count = 0; - WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + WRITE_ONCE(host_quit, false); /* * Ensure the previous iteration didn't leave a dangling semaphore, i.e. @@ -776,21 +691,95 @@ static void run_test(enum vm_guest_mode mode, void *arg) sem_getvalue(&sem_vcpu_cont, &sem_val); TEST_ASSERT_EQ(sem_val, 0); + TEST_ASSERT_EQ(vcpu_stop, false); + pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); - while (iteration < p->iterations) { - /* Give the vcpu thread some time to dirty some pages */ - usleep(p->interval * 1000); - log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, - bmap, host_num_pages, - &ring_buf_idx); + for (iteration = 1; iteration <= p->iterations; iteration++) { + unsigned long i; + + sync_global_to_guest(vm, iteration); + + WRITE_ONCE(nr_writes, 0); + sync_global_to_guest(vm, nr_writes); + + dirty_ring_prev_iteration_last_page = dirty_ring_last_page; + WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + + sem_post(&sem_vcpu_cont); + + /* + * Let the vCPU run beyond the configured interval until it has + * performed the minimum number of writes. This verifies the + * guest is making forward progress, e.g. isn't stuck because + * of a KVM bug, and puts a firm floor on test coverage. + */ + for (i = 0; i < p->interval || nr_writes < TEST_MIN_WRITES_PER_ITERATION; i++) { + /* + * Sleep in 1ms chunks to keep the interval math simple + * and so that the test doesn't run too far beyond the + * specified interval. + */ + usleep(1000); + + sync_global_from_guest(vm, nr_writes); + + /* + * Reap dirty pages while the guest is running so that + * dirty ring full events are resolved, i.e. so that a + * larger interval doesn't always end up with a vCPU + * that's effectively blocked. Collecting while the + * guest is running also verifies KVM doesn't lose any + * state. + * + * For bitmap modes, KVM overwrites the entire bitmap, + * i.e. collecting the bitmaps is destructive. Collect + * the bitmap only on the first pass, otherwise this + * test would lose track of dirty pages. + */ + if (i && host_log_mode != LOG_MODE_DIRTY_RING) + continue; + + /* + * For the dirty ring, empty the ring on subsequent + * passes only if the ring was filled at least once, + * to verify KVM's handling of a full ring (emptying + * the ring on every pass would make it unlikely the + * vCPU would ever fill the fing). + */ + if (i && !READ_ONCE(dirty_ring_vcpu_ring_full)) + continue; + + log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, + bmap[0], host_num_pages, + &ring_buf_idx); + } + + /* + * Stop the vCPU prior to collecting and verifying the dirty + * log. If the vCPU is allowed to run during collection, then + * pages that are written during this iteration may be missed, + * i.e. collected in the next iteration. And if the vCPU is + * writing memory during verification, pages that this thread + * sees as clean may be written with this iteration's value. + */ + WRITE_ONCE(vcpu_stop, true); + sync_global_to_guest(vm, vcpu_stop); + sem_wait(&sem_vcpu_stop); /* - * See vcpu_sync_stop_requested definition for details on why - * we need to stop vcpu when verify data. + * Clear vcpu_stop after the vCPU thread has acknowledge the + * stop request and is waiting, i.e. is definitely not running! */ - atomic_set(&vcpu_sync_stop_requested, true); - sem_wait_until(&sem_vcpu_stop); + WRITE_ONCE(vcpu_stop, false); + sync_global_to_guest(vm, vcpu_stop); + + /* + * Sync the number of writes performed before verification, the + * info will be printed along with the dirty/clean page counts. + */ + sync_global_from_guest(vm, nr_writes); + /* * NOTE: for dirty ring, it's possible that we didn't stop at * GUEST_SYNC but instead we stopped because ring is full; @@ -798,32 +787,22 @@ static void run_test(enum vm_guest_mode mode, void *arg) * the flush of the last page, and since we handle the last * page specially verification will succeed anyway. */ - assert(host_log_mode == LOG_MODE_DIRTY_RING || - atomic_read(&vcpu_sync_stop_requested) == false); + log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, + bmap[1], host_num_pages, + &ring_buf_idx); vm_dirty_log_verify(mode, bmap); - - /* - * Set host_quit before sem_vcpu_cont in the final iteration to - * ensure that the vCPU worker doesn't resume the guest. As - * above, the dirty ring test may stop and wait even when not - * explicitly request to do so, i.e. would hang waiting for a - * "continue" if it's allowed to resume the guest. - */ - if (++iteration == p->iterations) - WRITE_ONCE(host_quit, true); - - sem_post(&sem_vcpu_cont); - sync_global_to_guest(vm, iteration); } + WRITE_ONCE(host_quit, true); + sem_post(&sem_vcpu_cont); + pthread_join(vcpu_thread, NULL); - pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " - "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count, - host_track_next_count); + pr_info("Total bits checked: dirty (%lu), clear (%lu)\n", + host_dirty_count, host_clear_count); - free(bmap); - free(host_bmap_track); + free(bmap[0]); + free(bmap[1]); kvm_vm_free(vm); } @@ -857,7 +836,6 @@ int main(int argc, char *argv[]) .interval = TEST_HOST_LOOP_INTERVAL, }; int opt, i; - sigset_t sigset; sem_init(&sem_vcpu_stop, 0, 0); sem_init(&sem_vcpu_cont, 0, 0); @@ -908,19 +886,12 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two"); + TEST_ASSERT(p.iterations > 0, "Iterations must be greater than zero"); TEST_ASSERT(p.interval > 0, "Interval must be greater than zero"); pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", p.iterations, p.interval); - srandom(time(0)); - - /* Ensure that vCPU threads start with SIG_IPI blocked. */ - sigemptyset(&sigset); - sigaddset(&sigset, SIG_IPI); - pthread_sigmask(SIG_BLOCK, &sigset, NULL); - if (host_log_mode_option == LOG_MODE_ALL) { /* Run each log mode */ for (i = 0; i < LOG_MODE_NUM; i++) { diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 4c4e5a847f67..373912464fb4 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -46,6 +46,12 @@ struct userspace_mem_region { struct hlist_node slot_node; }; +struct kvm_binary_stats { + int fd; + struct kvm_stats_header header; + struct kvm_stats_desc *desc; +}; + struct kvm_vcpu { struct list_head list; uint32_t id; @@ -55,6 +61,7 @@ struct kvm_vcpu { #ifdef __x86_64__ struct kvm_cpuid2 *cpuid; #endif + struct kvm_binary_stats stats; struct kvm_dirty_gfn *dirty_gfns; uint32_t fetch_index; uint32_t dirty_gfns_count; @@ -99,10 +106,7 @@ struct kvm_vm { struct kvm_vm_arch arch; - /* Cache of information for binary stats interface */ - int stats_fd; - struct kvm_stats_header stats_header; - struct kvm_stats_desc *stats_desc; + struct kvm_binary_stats stats; /* * KVM region slots. These are the default memslots used by page @@ -531,16 +535,19 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header, struct kvm_stats_desc *desc, uint64_t *data, size_t max_elements); -void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, - size_t max_elements); +void kvm_get_stat(struct kvm_binary_stats *stats, const char *name, + uint64_t *data, size_t max_elements); -static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name) -{ - uint64_t data; +#define __get_stat(stats, stat) \ +({ \ + uint64_t data; \ + \ + kvm_get_stat(stats, #stat, &data, 1); \ + data; \ +}) - __vm_get_stat(vm, stat_name, &data, 1); - return data; -} +#define vm_get_stat(vm, stat) __get_stat(&(vm)->stats, stat) +#define vcpu_get_stat(vcpu, stat) __get_stat(&(vcpu)->stats, stat) void vm_create_irqchip(struct kvm_vm *vm); @@ -963,6 +970,8 @@ static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); +void kvm_set_files_rlimit(uint32_t nr_vcpus); + void kvm_pin_this_task_to_pcpu(uint32_t pcpu); void kvm_print_vcpu_pinning_help(void); void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 3e473058849f..77d13d7920cb 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -22,7 +22,7 @@ #define msecs_to_usecs(msec) ((msec) * 1000ULL) -static inline int _no_printf(const char *format, ...) { return 0; } +static inline __printf(1, 2) int _no_printf(const char *format, ...) { return 0; } #ifdef DEBUG #define pr_debug(...) printf(__VA_ARGS__) diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index d60da8966772..32ab6ca7ec32 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -183,6 +183,9 @@ struct kvm_x86_cpu_feature { * Extended Leafs, a.k.a. AMD defined */ #define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2) +#define X86_FEATURE_PERFCTR_CORE KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 23) +#define X86_FEATURE_PERFCTR_NB KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 24) +#define X86_FEATURE_PERFCTR_LLC KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 28) #define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20) #define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26) #define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27) @@ -197,8 +200,11 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10) #define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12) #define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16) +#define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30) #define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1) #define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3) +#define X86_FEATURE_PERFMON_V2 KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0) +#define X86_FEATURE_LBR_PMC_FREEZE KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2) /* * KVM defined paravirt features. @@ -285,6 +291,8 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23) #define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5) #define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11) +#define X86_PROPERTY_NR_PERFCTR_CORE KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 0, 3) +#define X86_PROPERTY_NR_PERFCTR_NB KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 10, 15) #define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31) @@ -1244,7 +1252,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, uint64_t ign_error_code; \ uint8_t vector; \ \ - asm volatile(KVM_ASM_SAFE(insn) \ + asm volatile(KVM_ASM_SAFE_FEP(insn) \ : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ : inputs \ : KVM_ASM_SAFE_CLOBBERS); \ @@ -1339,6 +1347,46 @@ static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size, GUEST_ASSERT(!ret); } +/* + * Execute HLT in an STI interrupt shadow to ensure that a pending IRQ that's + * intended to be a wake event arrives *after* HLT is executed. Modern CPUs, + * except for a few oddballs that KVM is unlikely to run on, block IRQs for one + * instruction after STI, *if* RFLAGS.IF=0 before STI. Note, Intel CPUs may + * block other events beyond regular IRQs, e.g. may block NMIs and SMIs too. + */ +static inline void safe_halt(void) +{ + asm volatile("sti; hlt"); +} + +/* + * Enable interrupts and ensure that interrupts are evaluated upon return from + * this function, i.e. execute a nop to consume the STi interrupt shadow. + */ +static inline void sti_nop(void) +{ + asm volatile ("sti; nop"); +} + +/* + * Enable interrupts for one instruction (nop), to allow the CPU to process all + * interrupts that are already pending. + */ +static inline void sti_nop_cli(void) +{ + asm volatile ("sti; nop; cli"); +} + +static inline void sti(void) +{ + asm volatile("sti"); +} + +static inline void cli(void) +{ + asm volatile ("cli"); +} + void __vm_xsave_require_permission(uint64_t xfeature, const char *name); #define vm_xsave_require_permission(xfeature) \ diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index c78f34699f73..c5310736ed06 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -10,7 +10,6 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <sys/resource.h> #include "test_util.h" @@ -39,36 +38,11 @@ int main(int argc, char *argv[]) { int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID); int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); - /* - * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds + - * an arbitrary number for everything else. - */ - int nr_fds_wanted = kvm_max_vcpus + 100; - struct rlimit rl; pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id); pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus); - /* - * Check that we're allowed to open nr_fds_wanted file descriptors and - * try raising the limits if needed. - */ - TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); - - if (rl.rlim_cur < nr_fds_wanted) { - rl.rlim_cur = nr_fds_wanted; - if (rl.rlim_max < nr_fds_wanted) { - int old_rlim_max = rl.rlim_max; - rl.rlim_max = nr_fds_wanted; - - int r = setrlimit(RLIMIT_NOFILE, &rl); - __TEST_REQUIRE(r >= 0, - "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)", - old_rlim_max, nr_fds_wanted); - } else { - TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); - } - } + kvm_set_files_rlimit(kvm_max_vcpus); /* * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID. diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 33fefeb3ca44..279ad8946040 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -12,6 +12,7 @@ #include <assert.h> #include <sched.h> #include <sys/mman.h> +#include <sys/resource.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> @@ -196,6 +197,11 @@ static void vm_open(struct kvm_vm *vm) vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); + + if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD)) + vm->stats.fd = vm_get_stats_fd(vm); + else + vm->stats.fd = -1; } const char *vm_guest_mode_string(uint32_t i) @@ -406,6 +412,38 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, return vm_adjust_num_guest_pages(mode, nr_pages); } +void kvm_set_files_rlimit(uint32_t nr_vcpus) +{ + /* + * Each vCPU will open two file descriptors: the vCPU itself and the + * vCPU's binary stats file descriptor. Add an arbitrary amount of + * buffer for all other files a test may open. + */ + int nr_fds_wanted = nr_vcpus * 2 + 100; + struct rlimit rl; + + /* + * Check that we're allowed to open nr_fds_wanted file descriptors and + * try raising the limits if needed. + */ + TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); + + if (rl.rlim_cur < nr_fds_wanted) { + rl.rlim_cur = nr_fds_wanted; + if (rl.rlim_max < nr_fds_wanted) { + int old_rlim_max = rl.rlim_max; + + rl.rlim_max = nr_fds_wanted; + __TEST_REQUIRE(setrlimit(RLIMIT_NOFILE, &rl) >= 0, + "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)", + old_rlim_max, nr_fds_wanted); + } else { + TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); + } + } + +} + struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, uint64_t nr_extra_pages) { @@ -415,6 +453,8 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, struct kvm_vm *vm; int i; + kvm_set_files_rlimit(nr_runnable_vcpus); + pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__, vm_guest_mode_string(shape.mode), shape.type, nr_pages); @@ -657,6 +697,23 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) return NULL; } +static void kvm_stats_release(struct kvm_binary_stats *stats) +{ + int ret; + + if (stats->fd < 0) + return; + + if (stats->desc) { + free(stats->desc); + stats->desc = NULL; + } + + ret = close(stats->fd); + TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + stats->fd = -1; +} + __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) { @@ -690,6 +747,8 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) ret = close(vcpu->fd); TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + kvm_stats_release(&vcpu->stats); + list_del(&vcpu->list); vcpu_arch_free(vcpu); @@ -709,6 +768,9 @@ void kvm_vm_release(struct kvm_vm *vmp) ret = close(vmp->kvm_fd); TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + + /* Free cached stats metadata and close FD */ + kvm_stats_release(&vmp->stats); } static void __vm_mem_region_delete(struct kvm_vm *vm, @@ -748,12 +810,6 @@ void kvm_vm_free(struct kvm_vm *vmp) if (vmp == NULL) return; - /* Free cached stats metadata and close FD */ - if (vmp->stats_fd) { - free(vmp->stats_desc); - close(vmp->stats_fd); - } - /* Free userspace_mem_regions. */ hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) __vm_mem_region_delete(vmp, region); @@ -1286,6 +1342,11 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) TEST_ASSERT(vcpu->run != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD)) + vcpu->stats.fd = vcpu_get_stats_fd(vcpu); + else + vcpu->stats.fd = -1; + /* Add to linked-list of VCPUs. */ list_add(&vcpu->list, &vm->vcpus); @@ -2198,46 +2259,31 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header, desc->name, size, ret); } -/* - * Read the data of the named stat - * - * Input Args: - * vm - the VM for which the stat should be read - * stat_name - the name of the stat to read - * max_elements - the maximum number of 8-byte values to read into data - * - * Output Args: - * data - the buffer into which stat data should be read - * - * Read the data values of a specified stat from the binary stats interface. - */ -void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, - size_t max_elements) +void kvm_get_stat(struct kvm_binary_stats *stats, const char *name, + uint64_t *data, size_t max_elements) { struct kvm_stats_desc *desc; size_t size_desc; int i; - if (!vm->stats_fd) { - vm->stats_fd = vm_get_stats_fd(vm); - read_stats_header(vm->stats_fd, &vm->stats_header); - vm->stats_desc = read_stats_descriptors(vm->stats_fd, - &vm->stats_header); + if (!stats->desc) { + read_stats_header(stats->fd, &stats->header); + stats->desc = read_stats_descriptors(stats->fd, &stats->header); } - size_desc = get_stats_descriptor_size(&vm->stats_header); + size_desc = get_stats_descriptor_size(&stats->header); - for (i = 0; i < vm->stats_header.num_desc; ++i) { - desc = (void *)vm->stats_desc + (i * size_desc); + for (i = 0; i < stats->header.num_desc; ++i) { + desc = (void *)stats->desc + (i * size_desc); - if (strcmp(desc->name, stat_name)) + if (strcmp(desc->name, name)) continue; - read_stat_data(vm->stats_fd, &vm->stats_header, desc, - data, max_elements); - - break; + read_stat_data(stats->fd, &stats->header, desc, data, max_elements); + return; } + + TEST_FAIL("Unable to find stat '%s'", name); } __weak void kvm_arch_vm_post_create(struct kvm_vm *vm) diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c index 7c9de8414462..5bde176cedd5 100644 --- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -114,7 +114,7 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", is_minor ? "MINOR" : "MISSING", - is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); + is_minor ? "UFFDIO_CONTINUE" : "UFFDIO_COPY"); uffd_desc = malloc(sizeof(struct uffd_desc)); TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor"); diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c index f45c0ecc902d..03406de4989d 100644 --- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -39,7 +39,13 @@ static bool illegal_handler_invoked; #define SBI_PMU_TEST_SNAPSHOT BIT(2) #define SBI_PMU_TEST_OVERFLOW BIT(3) -static int disabled_tests; +#define SBI_PMU_OVERFLOW_IRQNUM_DEFAULT 5 +struct test_args { + int disabled_tests; + int overflow_irqnum; +}; + +static struct test_args targs; unsigned long pmu_csr_read_num(int csr_num) { @@ -118,8 +124,8 @@ static void stop_counter(unsigned long counter, unsigned long stop_flags) ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, stop_flags, 0, 0, 0); - __GUEST_ASSERT(ret.error == 0, "Unable to stop counter %ld error %ld\n", - counter, ret.error); + __GUEST_ASSERT(ret.error == 0 || ret.error == SBI_ERR_ALREADY_STOPPED, + "Unable to stop counter %ld error %ld\n", counter, ret.error); } static void guest_illegal_exception_handler(struct ex_regs *regs) @@ -137,7 +143,6 @@ static void guest_irq_handler(struct ex_regs *regs) unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG; struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva; unsigned long overflown_mask; - unsigned long counter_val = 0; /* Validate that we are in the correct irq handler */ GUEST_ASSERT_EQ(irq_num, IRQ_PMU_OVF); @@ -151,10 +156,6 @@ static void guest_irq_handler(struct ex_regs *regs) GUEST_ASSERT(overflown_mask & 0x01); WRITE_ONCE(vcpu_shared_irq_count, vcpu_shared_irq_count+1); - - counter_val = READ_ONCE(snapshot_data->ctr_values[0]); - /* Now start the counter to mimick the real driver behavior */ - start_counter(counter_in_use, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_val); } static unsigned long get_counter_index(unsigned long cbase, unsigned long cmask, @@ -479,7 +480,7 @@ static void test_pmu_events_snaphost(void) static void test_pmu_events_overflow(void) { - int num_counters = 0; + int num_counters = 0, i = 0; /* Verify presence of SBI PMU and minimum requrired SBI version */ verify_sbi_requirement_assert(); @@ -496,11 +497,15 @@ static void test_pmu_events_overflow(void) * Qemu supports overflow for cycle/instruction. * This test may fail on any platform that do not support overflow for these two events. */ - test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES); - GUEST_ASSERT_EQ(vcpu_shared_irq_count, 1); + for (i = 0; i < targs.overflow_irqnum; i++) + test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES); + GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum); + + vcpu_shared_irq_count = 0; - test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS); - GUEST_ASSERT_EQ(vcpu_shared_irq_count, 2); + for (i = 0; i < targs.overflow_irqnum; i++) + test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS); + GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum); GUEST_DONE(); } @@ -609,7 +614,11 @@ static void test_vm_events_overflow(void *guest_code) vcpu_init_vector_tables(vcpu); /* Initialize guest timer frequency. */ timer_freq = vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency)); + + /* Export the shared variables to the guest */ sync_global_to_guest(vm, timer_freq); + sync_global_to_guest(vm, vcpu_shared_irq_count); + sync_global_to_guest(vm, targs); run_vcpu(vcpu); @@ -618,28 +627,38 @@ static void test_vm_events_overflow(void *guest_code) static void test_print_help(char *name) { - pr_info("Usage: %s [-h] [-d <test name>]\n", name); - pr_info("\t-d: Test to disable. Available tests are 'basic', 'events', 'snapshot', 'overflow'\n"); + pr_info("Usage: %s [-h] [-t <test name>] [-n <number of LCOFI interrupt for overflow test>]\n", + name); + pr_info("\t-t: Test to run (default all). Available tests are 'basic', 'events', 'snapshot', 'overflow'\n"); + pr_info("\t-n: Number of LCOFI interrupt to trigger for each event in overflow test (default: %d)\n", + SBI_PMU_OVERFLOW_IRQNUM_DEFAULT); pr_info("\t-h: print this help screen\n"); } static bool parse_args(int argc, char *argv[]) { int opt; + int temp_disabled_tests = SBI_PMU_TEST_BASIC | SBI_PMU_TEST_EVENTS | SBI_PMU_TEST_SNAPSHOT | + SBI_PMU_TEST_OVERFLOW; + int overflow_interrupts = 0; - while ((opt = getopt(argc, argv, "hd:")) != -1) { + while ((opt = getopt(argc, argv, "ht:n:")) != -1) { switch (opt) { - case 'd': + case 't': if (!strncmp("basic", optarg, 5)) - disabled_tests |= SBI_PMU_TEST_BASIC; + temp_disabled_tests &= ~SBI_PMU_TEST_BASIC; else if (!strncmp("events", optarg, 6)) - disabled_tests |= SBI_PMU_TEST_EVENTS; + temp_disabled_tests &= ~SBI_PMU_TEST_EVENTS; else if (!strncmp("snapshot", optarg, 8)) - disabled_tests |= SBI_PMU_TEST_SNAPSHOT; + temp_disabled_tests &= ~SBI_PMU_TEST_SNAPSHOT; else if (!strncmp("overflow", optarg, 8)) - disabled_tests |= SBI_PMU_TEST_OVERFLOW; + temp_disabled_tests &= ~SBI_PMU_TEST_OVERFLOW; else goto done; + targs.disabled_tests = temp_disabled_tests; + break; + case 'n': + overflow_interrupts = atoi_positive("Number of LCOFI", optarg); break; case 'h': default: @@ -647,6 +666,15 @@ static bool parse_args(int argc, char *argv[]) } } + if (overflow_interrupts > 0) { + if (targs.disabled_tests & SBI_PMU_TEST_OVERFLOW) { + pr_info("-n option is only available for overflow test\n"); + goto done; + } else { + targs.overflow_irqnum = overflow_interrupts; + } + } + return true; done: test_print_help(argv[0]); @@ -655,25 +683,28 @@ done: int main(int argc, char *argv[]) { + targs.disabled_tests = 0; + targs.overflow_irqnum = SBI_PMU_OVERFLOW_IRQNUM_DEFAULT; + if (!parse_args(argc, argv)) exit(KSFT_SKIP); - if (!(disabled_tests & SBI_PMU_TEST_BASIC)) { + if (!(targs.disabled_tests & SBI_PMU_TEST_BASIC)) { test_vm_basic_test(test_pmu_basic_sanity); pr_info("SBI PMU basic test : PASS\n"); } - if (!(disabled_tests & SBI_PMU_TEST_EVENTS)) { + if (!(targs.disabled_tests & SBI_PMU_TEST_EVENTS)) { test_vm_events_test(test_pmu_events); pr_info("SBI PMU event verification test : PASS\n"); } - if (!(disabled_tests & SBI_PMU_TEST_SNAPSHOT)) { + if (!(targs.disabled_tests & SBI_PMU_TEST_SNAPSHOT)) { test_vm_events_snapshot_test(test_pmu_events_snaphost); pr_info("SBI PMU event verification with snapshot test : PASS\n"); } - if (!(disabled_tests & SBI_PMU_TEST_OVERFLOW)) { + if (!(targs.disabled_tests & SBI_PMU_TEST_OVERFLOW)) { test_vm_events_overflow(test_pmu_events_overflow); pr_info("SBI PMU event verification with overflow test : PASS\n"); } diff --git a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c index 2929c067c207..b0d2b04a7ff2 100644 --- a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c +++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c @@ -41,9 +41,9 @@ struct kvm_page_stats { static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage) { - stats->pages_4k = vm_get_stat(vm, "pages_4k"); - stats->pages_2m = vm_get_stat(vm, "pages_2m"); - stats->pages_1g = vm_get_stat(vm, "pages_1g"); + stats->pages_4k = vm_get_stat(vm, pages_4k); + stats->pages_2m = vm_get_stat(vm, pages_2m); + stats->pages_1g = vm_get_stat(vm, pages_1g); stats->hugepages = stats->pages_2m + stats->pages_1g; pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n", diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c index 22c0c124582f..2b5b4bc6ef7e 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c @@ -63,8 +63,10 @@ static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa) /* Signal sender vCPU we're ready */ ipis_rcvd[vcpu_id] = (u64)-1; - for (;;) - asm volatile("sti; hlt; cli"); + for (;;) { + safe_halt(); + cli(); + } } static void guest_ipi_handler(struct ex_regs *regs) diff --git a/tools/testing/selftests/kvm/x86/nested_emulation_test.c b/tools/testing/selftests/kvm/x86/nested_emulation_test.c new file mode 100644 index 000000000000..abc824dba04f --- /dev/null +++ b/tools/testing/selftests/kvm/x86/nested_emulation_test.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" +#include "svm_util.h" + +enum { + SVM_F, + VMX_F, + NR_VIRTUALIZATION_FLAVORS, +}; + +struct emulated_instruction { + const char name[32]; + uint8_t opcode[15]; + uint32_t exit_reason[NR_VIRTUALIZATION_FLAVORS]; +}; + +static struct emulated_instruction instructions[] = { + { + .name = "pause", + .opcode = { 0xf3, 0x90 }, + .exit_reason = { SVM_EXIT_PAUSE, + EXIT_REASON_PAUSE_INSTRUCTION, } + }, + { + .name = "hlt", + .opcode = { 0xf4 }, + .exit_reason = { SVM_EXIT_HLT, + EXIT_REASON_HLT, } + }, +}; + +static uint8_t kvm_fep[] = { 0x0f, 0x0b, 0x6b, 0x76, 0x6d }; /* ud2 ; .ascii "kvm" */ +static uint8_t l2_guest_code[sizeof(kvm_fep) + 15]; +static uint8_t *l2_instruction = &l2_guest_code[sizeof(kvm_fep)]; + +static uint32_t get_instruction_length(struct emulated_instruction *insn) +{ + uint32_t i; + + for (i = 0; i < ARRAY_SIZE(insn->opcode) && insn->opcode[i]; i++) + ; + + return i; +} + +static void guest_code(void *test_data) +{ + int f = this_cpu_has(X86_FEATURE_SVM) ? SVM_F : VMX_F; + int i; + + memcpy(l2_guest_code, kvm_fep, sizeof(kvm_fep)); + + if (f == SVM_F) { + struct svm_test_data *svm = test_data; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, NULL, NULL); + vmcb->save.idtr.limit = 0; + vmcb->save.rip = (u64)l2_guest_code; + + vmcb->control.intercept |= BIT_ULL(INTERCEPT_SHUTDOWN) | + BIT_ULL(INTERCEPT_PAUSE) | + BIT_ULL(INTERCEPT_HLT); + vmcb->control.intercept_exceptions = 0; + } else { + GUEST_ASSERT(prepare_for_vmx_operation(test_data)); + GUEST_ASSERT(load_vmcs(test_data)); + + prepare_vmcs(test_data, NULL, NULL); + GUEST_ASSERT(!vmwrite(GUEST_IDTR_LIMIT, 0)); + GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code)); + GUEST_ASSERT(!vmwrite(EXCEPTION_BITMAP, 0)); + + vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) | + CPU_BASED_PAUSE_EXITING | + CPU_BASED_HLT_EXITING); + } + + for (i = 0; i < ARRAY_SIZE(instructions); i++) { + struct emulated_instruction *insn = &instructions[i]; + uint32_t insn_len = get_instruction_length(insn); + uint32_t exit_insn_len; + u32 exit_reason; + + /* + * Copy the target instruction to the L2 code stream, and fill + * the remaining bytes with INT3s so that a missed intercept + * results in a consistent failure mode (SHUTDOWN). + */ + memcpy(l2_instruction, insn->opcode, insn_len); + memset(l2_instruction + insn_len, 0xcc, sizeof(insn->opcode) - insn_len); + + if (f == SVM_F) { + struct svm_test_data *svm = test_data; + struct vmcb *vmcb = svm->vmcb; + + run_guest(vmcb, svm->vmcb_gpa); + exit_reason = vmcb->control.exit_code; + exit_insn_len = vmcb->control.next_rip - vmcb->save.rip; + GUEST_ASSERT_EQ(vmcb->save.rip, (u64)l2_instruction); + } else { + GUEST_ASSERT_EQ(i ? vmresume() : vmlaunch(), 0); + exit_reason = vmreadz(VM_EXIT_REASON); + exit_insn_len = vmreadz(VM_EXIT_INSTRUCTION_LEN); + GUEST_ASSERT_EQ(vmreadz(GUEST_RIP), (u64)l2_instruction); + } + + __GUEST_ASSERT(exit_reason == insn->exit_reason[f], + "Wanted exit_reason '0x%x' for '%s', got '0x%x'", + insn->exit_reason[f], insn->name, exit_reason); + + __GUEST_ASSERT(exit_insn_len == insn_len, + "Wanted insn_len '%u' for '%s', got '%u'", + insn_len, insn->name, exit_insn_len); + } + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t nested_test_data_gva; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(is_forced_emulation_enabled); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul); + + if (kvm_cpu_has(X86_FEATURE_SVM)) + vcpu_alloc_svm(vm, &nested_test_data_gva); + else + vcpu_alloc_vmx(vm, &nested_test_data_gva); + + vcpu_args_set(vcpu, 1, nested_test_data_gva); + + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c index e7efb2b35f8b..c0d84827f736 100644 --- a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c @@ -73,7 +73,7 @@ static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m) { int actual_pages_2m; - actual_pages_2m = vm_get_stat(vm, "pages_2m"); + actual_pages_2m = vm_get_stat(vm, pages_2m); TEST_ASSERT(actual_pages_2m == expected_pages_2m, "Unexpected 2m page count. Expected %d, got %d", @@ -84,7 +84,7 @@ static void check_split_count(struct kvm_vm *vm, int expected_splits) { int actual_splits; - actual_splits = vm_get_stat(vm, "nx_lpage_splits"); + actual_splits = vm_get_stat(vm, nx_lpage_splits); TEST_ASSERT(actual_splits == expected_splits, "Unexpected NX huge page split count. Expected %d, got %d", diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c index 698cb36989db..8aaaf25b6111 100644 --- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c @@ -17,7 +17,7 @@ * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, * 1 LOOP. */ -#define NUM_INSNS_PER_LOOP 3 +#define NUM_INSNS_PER_LOOP 4 /* * Number of "extra" instructions that will be counted, i.e. the number of @@ -29,10 +29,59 @@ /* Total number of instructions retired within the measured section. */ #define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS) +/* Track which architectural events are supported by hardware. */ +static uint32_t hardware_pmu_arch_events; static uint8_t kvm_pmu_version; static bool kvm_has_perf_caps; +#define X86_PMU_FEATURE_NULL \ +({ \ + struct kvm_x86_pmu_feature feature = {}; \ + \ + feature; \ +}) + +static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event) +{ + return !(*(u64 *)&event); +} + +struct kvm_intel_pmu_event { + struct kvm_x86_pmu_feature gp_event; + struct kvm_x86_pmu_feature fixed_event; +}; + +/* + * Wrap the array to appease the compiler, as the macros used to construct each + * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the + * compiler often thinks the feature definitions aren't compile-time constants. + */ +static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx) +{ + const struct kvm_intel_pmu_event __intel_event_to_feature[] = { + [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED }, + [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED }, + /* + * Note, the fixed counter for reference cycles is NOT the same as the + * general purpose architectural event. The fixed counter explicitly + * counts at the same frequency as the TSC, whereas the GP event counts + * at a fixed, but uarch specific, frequency. Bundle them here for + * simplicity. + */ + [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED }, + [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED }, + }; + + kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS); + + return __intel_event_to_feature[idx]; +} + static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, void *guest_code, uint8_t pmu_version, @@ -42,6 +91,7 @@ static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, vm = vm_create_with_one_vcpu(vcpu, guest_code); sync_global_to_guest(vm, kvm_pmu_version); + sync_global_to_guest(vm, hardware_pmu_arch_events); /* * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling @@ -98,14 +148,12 @@ static uint8_t guest_get_pmu_version(void) * Sanity check that in all cases, the event doesn't count when it's disabled, * and that KVM correctly emulates the write of an arbitrary value. */ -static void guest_assert_event_count(uint8_t idx, - struct kvm_x86_pmu_feature event, - uint32_t pmc, uint32_t pmc_msr) +static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr) { uint64_t count; count = _rdpmc(pmc); - if (!this_pmu_has(event)) + if (!(hardware_pmu_arch_events & BIT(idx))) goto sanity_checks; switch (idx) { @@ -126,7 +174,9 @@ static void guest_assert_event_count(uint8_t idx, GUEST_ASSERT_NE(count, 0); break; case INTEL_ARCH_TOPDOWN_SLOTS_INDEX: - GUEST_ASSERT(count >= NUM_INSNS_RETIRED); + __GUEST_ASSERT(count >= NUM_INSNS_RETIRED, + "Expected top-down slots >= %u, got count = %lu", + NUM_INSNS_RETIRED, count); break; default: break; @@ -162,75 +212,42 @@ do { \ "1:\n\t" \ clflush "\n\t" \ "mfence\n\t" \ + "mov %[m], %%eax\n\t" \ FEP "loop 1b\n\t" \ FEP "mov %%edi, %%ecx\n\t" \ FEP "xor %%eax, %%eax\n\t" \ FEP "xor %%edx, %%edx\n\t" \ "wrmsr\n\t" \ :: "a"((uint32_t)_value), "d"(_value >> 32), \ - "c"(_msr), "D"(_msr) \ + "c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version) \ ); \ } while (0) -#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \ +#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \ do { \ - wrmsr(pmc_msr, 0); \ + wrmsr(_pmc_msr, 0); \ \ if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \ - GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP); \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \ else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \ - GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP); \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \ else \ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \ \ - guest_assert_event_count(_idx, _event, _pmc, _pmc_msr); \ + guest_assert_event_count(_idx, _pmc, _pmc_msr); \ } while (0) -static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event, - uint32_t pmc, uint32_t pmc_msr, +static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr, uint32_t ctrl_msr, uint64_t ctrl_msr_value) { - GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, ""); + GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, ""); if (is_forced_emulation_enabled) - GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP); -} - -#define X86_PMU_FEATURE_NULL \ -({ \ - struct kvm_x86_pmu_feature feature = {}; \ - \ - feature; \ -}) - -static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event) -{ - return !(*(u64 *)&event); + GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP); } static void guest_test_arch_event(uint8_t idx) { - const struct { - struct kvm_x86_pmu_feature gp_event; - struct kvm_x86_pmu_feature fixed_event; - } intel_event_to_feature[] = { - [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED }, - [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED }, - /* - * Note, the fixed counter for reference cycles is NOT the same - * as the general purpose architectural event. The fixed counter - * explicitly counts at the same frequency as the TSC, whereas - * the GP event counts at a fixed, but uarch specific, frequency. - * Bundle them here for simplicity. - */ - [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED }, - [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED }, - }; - uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); uint32_t pmu_version = guest_get_pmu_version(); /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */ @@ -248,7 +265,7 @@ static void guest_test_arch_event(uint8_t idx) else base_pmc_msr = MSR_IA32_PERFCTR0; - gp_event = intel_event_to_feature[idx].gp_event; + gp_event = intel_event_to_feature(idx).gp_event; GUEST_ASSERT_EQ(idx, gp_event.f.bit); GUEST_ASSERT(nr_gp_counters); @@ -262,14 +279,14 @@ static void guest_test_arch_event(uint8_t idx) if (guest_has_perf_global_ctrl) wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i)); - __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i, + __guest_test_arch_event(idx, i, base_pmc_msr + i, MSR_P6_EVNTSEL0 + i, eventsel); } if (!guest_has_perf_global_ctrl) return; - fixed_event = intel_event_to_feature[idx].fixed_event; + fixed_event = intel_event_to_feature(idx).fixed_event; if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event)) return; @@ -277,7 +294,7 @@ static void guest_test_arch_event(uint8_t idx) wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); - __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED, + __guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED, MSR_CORE_PERF_FIXED_CTR0 + i, MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); @@ -331,9 +348,9 @@ __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \ expect_gp ? "#GP" : "no fault", msr, vector) \ #define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \ - __GUEST_ASSERT(val == expected_val, \ + __GUEST_ASSERT(val == expected, \ "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \ - msr, expected_val, val); + msr, expected, val); static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success, uint64_t expected_val) @@ -545,7 +562,6 @@ static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities, static void test_intel_counters(void) { - uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); @@ -567,18 +583,26 @@ static void test_intel_counters(void) /* * Detect the existence of events that aren't supported by selftests. - * This will (obviously) fail any time the kernel adds support for a - * new event, but it's worth paying that price to keep the test fresh. + * This will (obviously) fail any time hardware adds support for a new + * event, but it's worth paying that price to keep the test fresh. */ - TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS, + TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS, "New architectural event(s) detected; please update this test (length = %u, mask = %x)", - nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK)); + this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH), + this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK)); /* - * Force iterating over known arch events regardless of whether or not - * KVM/hardware supports a given event. + * Iterate over known arch events irrespective of KVM/hardware support + * to verify that KVM doesn't reject programming of events just because + * the *architectural* encoding is unsupported. Track which events are + * supported in hardware; the guest side will validate supported events + * count correctly, even if *enumeration* of the event is unsupported + * by KVM and/or isn't exposed to the guest. */ - nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS); + for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) { + if (this_pmu_has(intel_event_to_feature(i).gp_event)) + hardware_pmu_arch_events |= BIT(i); + } for (v = 0; v <= max_pmu_version; v++) { for (i = 0; i < ARRAY_SIZE(perf_caps); i++) { @@ -594,8 +618,8 @@ static void test_intel_counters(void) * vector length. */ if (v == pmu_version) { - for (k = 1; k < (BIT(nr_arch_events) - 1); k++) - test_arch_events(v, perf_caps[i], nr_arch_events, k); + for (k = 1; k < (BIT(NR_INTEL_ARCH_EVENTS) - 1); k++) + test_arch_events(v, perf_caps[i], NR_INTEL_ARCH_EVENTS, k); } /* * Test single bits for all PMU version and lengths up @@ -604,11 +628,11 @@ static void test_intel_counters(void) * host length). Explicitly test a mask of '0' and all * ones i.e. all events being available and unavailable. */ - for (j = 0; j <= nr_arch_events + 1; j++) { + for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) { test_arch_events(v, perf_caps[i], j, 0); test_arch_events(v, perf_caps[i], j, 0xff); - for (k = 0; k < nr_arch_events; k++) + for (k = 0; k < NR_INTEL_ARCH_EVENTS; k++) test_arch_events(v, perf_caps[i], j, BIT(k)); } diff --git a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c index 916e04248fbb..917b6066cfc1 100644 --- a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c +++ b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c @@ -42,10 +42,7 @@ static void l2_guest_code(struct svm_test_data *svm) x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER); - __asm__ __volatile__( - "sti\n" - "nop\n" - ); + sti_nop(); GUEST_ASSERT(vintr_irq_called); GUEST_ASSERT(intr_irq_called); diff --git a/tools/testing/selftests/kvm/x86/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c index 57f157c06b39..1e5e564523b3 100644 --- a/tools/testing/selftests/kvm/x86/ucna_injection_test.c +++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c @@ -86,7 +86,7 @@ static void ucna_injection_guest_code(void) wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN); /* Enables interrupt in guest. */ - asm volatile("sti"); + sti(); /* Let user space inject the first UCNA */ GUEST_SYNC(SYNC_FIRST_UCNA); diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c index a76078a08ff8..35cb9de54a82 100644 --- a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c @@ -106,7 +106,8 @@ static void halter_guest_code(struct test_data_page *data) data->halter_tpr = xapic_read_reg(APIC_TASKPRI); data->halter_ppr = xapic_read_reg(APIC_PROCPRI); data->hlt_count++; - asm volatile("sti; hlt; cli"); + safe_halt(); + cli(); data->wake_count++; } } @@ -465,6 +466,19 @@ int main(int argc, char *argv[]) cancel_join_vcpu_thread(threads[0], params[0].vcpu); cancel_join_vcpu_thread(threads[1], params[1].vcpu); + /* + * If the host support Idle HLT, i.e. KVM *might* be using Idle HLT, + * then the number of HLT exits may be less than the number of HLTs + * that were executed, as Idle HLT elides the exit if the vCPU has an + * unmasked, pending IRQ (or NMI). + */ + if (this_cpu_has(X86_FEATURE_IDLE_HLT)) + TEST_ASSERT(data->hlt_count >= vcpu_get_stat(params[0].vcpu, halt_exits), + "HLT insns = %lu, HLT exits = %lu", + data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits)); + else + TEST_ASSERT_EQ(data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits)); + fprintf(stderr, "Test successful after running for %d seconds.\n" "Sending vCPU sent %lu IPIs to halting vCPU\n" diff --git a/tools/testing/selftests/kvm/x86/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c index 88bcca188799..fdebff1165c7 100644 --- a/tools/testing/selftests/kvm/x86/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c @@ -18,7 +18,7 @@ struct xapic_vcpu { static void xapic_guest_code(void) { - asm volatile("cli"); + cli(); xapic_enable(); @@ -38,7 +38,7 @@ static void xapic_guest_code(void) static void x2apic_guest_code(void) { - asm volatile("cli"); + cli(); x2apic_enable(); diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c index a59b3c799bb2..287829f850f7 100644 --- a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c @@ -191,10 +191,7 @@ static void guest_code(void) struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; int i; - __asm__ __volatile__( - "sti\n" - "nop\n" - ); + sti_nop(); /* Trigger an interrupt injection */ GUEST_SYNC(TEST_INJECT_VECTOR); diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore index 335b2b1a3463..a820329cae0d 100644 --- a/tools/testing/selftests/landlock/.gitignore +++ b/tools/testing/selftests/landlock/.gitignore @@ -2,3 +2,4 @@ /sandbox-and-launch /true /wait-pipe +/wait-pipe-sandbox diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile index 5cb0828f0514..a3f449914bf9 100644 --- a/tools/testing/selftests/landlock/Makefile +++ b/tools/testing/selftests/landlock/Makefile @@ -10,7 +10,11 @@ src_test := $(wildcard *_test.c) TEST_GEN_PROGS := $(src_test:.c=) -TEST_GEN_PROGS_EXTENDED := true sandbox-and-launch wait-pipe +TEST_GEN_PROGS_EXTENDED := \ + true \ + sandbox-and-launch \ + wait-pipe \ + wait-pipe-sandbox # Short targets: $(TEST_GEN_PROGS): LDLIBS += -lcap -lpthread diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h new file mode 100644 index 000000000000..b9054086a0c9 --- /dev/null +++ b/tools/testing/selftests/landlock/audit.h @@ -0,0 +1,472 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Landlock audit helpers + * + * Copyright © 2024-2025 Microsoft Corporation + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <linux/audit.h> +#include <linux/limits.h> +#include <linux/netlink.h> +#include <regex.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <unistd.h> + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#endif + +#ifndef __maybe_unused +#define __maybe_unused __attribute__((__unused__)) +#endif + +#define REGEX_LANDLOCK_PREFIX "^audit([0-9.:]\\+): domain=\\([0-9a-f]\\+\\)" + +struct audit_filter { + __u32 record_type; + size_t exe_len; + char exe[PATH_MAX]; +}; + +struct audit_message { + struct nlmsghdr header; + union { + struct audit_status status; + struct audit_features features; + struct audit_rule_data rule; + struct nlmsgerr err; + char data[PATH_MAX + 200]; + }; +}; + +static const struct timeval audit_tv_dom_drop = { + /* + * Because domain deallocation is tied to asynchronous credential + * freeing, receiving such event may take some time. In practice, + * on a small VM, it should not exceed 100k usec, but let's wait up + * to 1 second to be safe. + */ + .tv_sec = 1, +}; + +static const struct timeval audit_tv_default = { + .tv_usec = 1, +}; + +static int audit_send(const int fd, const struct audit_message *const msg) +{ + struct sockaddr_nl addr = { + .nl_family = AF_NETLINK, + }; + int ret; + + do { + ret = sendto(fd, msg, msg->header.nlmsg_len, 0, + (struct sockaddr *)&addr, sizeof(addr)); + } while (ret < 0 && errno == EINTR); + + if (ret < 0) + return -errno; + + if (ret != msg->header.nlmsg_len) + return -E2BIG; + + return 0; +} + +static int audit_recv(const int fd, struct audit_message *msg) +{ + struct sockaddr_nl addr; + socklen_t addrlen = sizeof(addr); + struct audit_message msg_tmp; + int err; + + if (!msg) + msg = &msg_tmp; + + do { + err = recvfrom(fd, msg, sizeof(*msg), 0, + (struct sockaddr *)&addr, &addrlen); + } while (err < 0 && errno == EINTR); + + if (err < 0) + return -errno; + + if (addrlen != sizeof(addr) || addr.nl_pid != 0) + return -EINVAL; + + /* Checks Netlink error or end of messages. */ + if (msg->header.nlmsg_type == NLMSG_ERROR) + return msg->err.error; + + return 0; +} + +static int audit_request(const int fd, + const struct audit_message *const request, + struct audit_message *reply) +{ + struct audit_message msg_tmp; + bool first_reply = true; + int err; + + err = audit_send(fd, request); + if (err) + return err; + + if (!reply) + reply = &msg_tmp; + + do { + if (first_reply) + first_reply = false; + else + reply = &msg_tmp; + + err = audit_recv(fd, reply); + if (err) + return err; + } while (reply->header.nlmsg_type != NLMSG_ERROR && + reply->err.msg.nlmsg_type != request->header.nlmsg_type); + + return reply->err.error; +} + +static int audit_filter_exe(const int audit_fd, + const struct audit_filter *const filter, + const __u16 type) +{ + struct audit_message msg = { + .header = { + .nlmsg_len = NLMSG_SPACE(sizeof(msg.rule)) + + NLMSG_ALIGN(filter->exe_len), + .nlmsg_type = type, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + }, + .rule = { + .flags = AUDIT_FILTER_EXCLUDE, + .action = AUDIT_NEVER, + .field_count = 1, + .fields[0] = filter->record_type, + .fieldflags[0] = AUDIT_NOT_EQUAL, + .values[0] = filter->exe_len, + .buflen = filter->exe_len, + } + }; + + if (filter->record_type != AUDIT_EXE) + return -EINVAL; + + memcpy(msg.rule.buf, filter->exe, filter->exe_len); + return audit_request(audit_fd, &msg, NULL); +} + +static int audit_filter_drop(const int audit_fd, const __u16 type) +{ + struct audit_message msg = { + .header = { + .nlmsg_len = NLMSG_SPACE(sizeof(msg.rule)), + .nlmsg_type = type, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + }, + .rule = { + .flags = AUDIT_FILTER_EXCLUDE, + .action = AUDIT_NEVER, + .field_count = 1, + .fields[0] = AUDIT_MSGTYPE, + .fieldflags[0] = AUDIT_NOT_EQUAL, + .values[0] = AUDIT_LANDLOCK_DOMAIN, + } + }; + + return audit_request(audit_fd, &msg, NULL); +} + +static int audit_set_status(int fd, __u32 key, __u32 val) +{ + const struct audit_message msg = { + .header = { + .nlmsg_len = NLMSG_SPACE(sizeof(msg.status)), + .nlmsg_type = AUDIT_SET, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + }, + .status = { + .mask = key, + .enabled = key == AUDIT_STATUS_ENABLED ? val : 0, + .pid = key == AUDIT_STATUS_PID ? val : 0, + } + }; + + return audit_request(fd, &msg, NULL); +} + +/* Returns a pointer to the last filled character of @dst, which is `\0`. */ +static __maybe_unused char *regex_escape(const char *const src, char *dst, + size_t dst_size) +{ + char *d = dst; + + for (const char *s = src; *s; s++) { + switch (*s) { + case '$': + case '*': + case '.': + case '[': + case '\\': + case ']': + case '^': + if (d >= dst + dst_size - 2) + return (char *)-ENOMEM; + + *d++ = '\\'; + *d++ = *s; + break; + default: + if (d >= dst + dst_size - 1) + return (char *)-ENOMEM; + + *d++ = *s; + } + } + if (d >= dst + dst_size - 1) + return (char *)-ENOMEM; + + *d = '\0'; + return d; +} + +/* + * @domain_id: The domain ID extracted from the audit message (if the first part + * of @pattern is REGEX_LANDLOCK_PREFIX). It is set to 0 if the domain ID is + * not found. + */ +static int audit_match_record(int audit_fd, const __u16 type, + const char *const pattern, __u64 *domain_id) +{ + struct audit_message msg; + int ret, err = 0; + bool matches_record = !type; + regmatch_t matches[2]; + regex_t regex; + + ret = regcomp(®ex, pattern, 0); + if (ret) + return -EINVAL; + + do { + memset(&msg, 0, sizeof(msg)); + err = audit_recv(audit_fd, &msg); + if (err) + goto out; + + if (msg.header.nlmsg_type == type) + matches_record = true; + } while (!matches_record); + + ret = regexec(®ex, msg.data, ARRAY_SIZE(matches), matches, 0); + if (ret) { + printf("DATA: %s\n", msg.data); + printf("ERROR: no match for pattern: %s\n", pattern); + err = -ENOENT; + } + + if (domain_id) { + *domain_id = 0; + if (matches[1].rm_so != -1) { + int match_len = matches[1].rm_eo - matches[1].rm_so; + /* The maximal characters of a 2^64 hexadecimal number is 17. */ + char dom_id[18]; + + if (match_len > 0 && match_len < sizeof(dom_id)) { + memcpy(dom_id, msg.data + matches[1].rm_so, + match_len); + dom_id[match_len] = '\0'; + if (domain_id) + *domain_id = strtoull(dom_id, NULL, 16); + } + } + } + +out: + regfree(®ex); + return err; +} + +static int __maybe_unused matches_log_domain_allocated(int audit_fd, + __u64 *domain_id) +{ + return audit_match_record( + audit_fd, AUDIT_LANDLOCK_DOMAIN, + REGEX_LANDLOCK_PREFIX + " status=allocated mode=enforcing pid=[0-9]\\+ uid=[0-9]\\+" + " exe=\"[^\"]\\+\" comm=\".*_test\"$", + domain_id); +} + +static int __maybe_unused matches_log_domain_deallocated( + int audit_fd, unsigned int num_denials, __u64 *domain_id) +{ + static const char log_template[] = REGEX_LANDLOCK_PREFIX + " status=deallocated denials=%u$"; + char log_match[sizeof(log_template) + 10]; + int log_match_len; + + log_match_len = snprintf(log_match, sizeof(log_match), log_template, + num_denials); + if (log_match_len > sizeof(log_match)) + return -E2BIG; + + return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match, + domain_id); +} + +struct audit_records { + size_t access; + size_t domain; +}; + +static int audit_count_records(int audit_fd, struct audit_records *records) +{ + struct audit_message msg; + int err; + + records->access = 0; + records->domain = 0; + + do { + memset(&msg, 0, sizeof(msg)); + err = audit_recv(audit_fd, &msg); + if (err) { + if (err == -EAGAIN) + return 0; + else + return err; + } + + switch (msg.header.nlmsg_type) { + case AUDIT_LANDLOCK_ACCESS: + records->access++; + break; + case AUDIT_LANDLOCK_DOMAIN: + records->domain++; + break; + } + } while (true); + + return 0; +} + +static int audit_init(void) +{ + int fd, err; + + fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT); + if (fd < 0) + return -errno; + + err = audit_set_status(fd, AUDIT_STATUS_ENABLED, 1); + if (err) + return err; + + err = audit_set_status(fd, AUDIT_STATUS_PID, getpid()); + if (err) + return err; + + /* Sets a timeout for negative tests. */ + err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_default, + sizeof(audit_tv_default)); + if (err) + return -errno; + + return fd; +} + +static int audit_init_filter_exe(struct audit_filter *filter, const char *path) +{ + char *absolute_path = NULL; + + /* It is assume that there is not already filtering rules. */ + filter->record_type = AUDIT_EXE; + if (!path) { + filter->exe_len = readlink("/proc/self/exe", filter->exe, + sizeof(filter->exe) - 1); + if (filter->exe_len < 0) + return -errno; + + return 0; + } + + absolute_path = realpath(path, NULL); + if (!absolute_path) + return -errno; + + /* No need for the terminating NULL byte. */ + filter->exe_len = strlen(absolute_path); + if (filter->exe_len > sizeof(filter->exe)) + return -E2BIG; + + memcpy(filter->exe, absolute_path, filter->exe_len); + free(absolute_path); + return 0; +} + +static int audit_cleanup(int audit_fd, struct audit_filter *filter) +{ + struct audit_filter new_filter; + + if (audit_fd < 0 || !filter) { + int err; + + /* + * Simulates audit_init_with_exe_filter() when called from + * FIXTURE_TEARDOWN_PARENT(). + */ + audit_fd = audit_init(); + if (audit_fd < 0) + return audit_fd; + + filter = &new_filter; + err = audit_init_filter_exe(filter, NULL); + if (err) + return err; + } + + /* Filters might not be in place. */ + audit_filter_exe(audit_fd, filter, AUDIT_DEL_RULE); + audit_filter_drop(audit_fd, AUDIT_DEL_RULE); + + /* + * Because audit_cleanup() might not be called by the test auditd + * process, it might not be possible to explicitly set it. Anyway, + * AUDIT_STATUS_ENABLED will implicitly be set to 0 when the auditd + * process will exit. + */ + return close(audit_fd); +} + +static int audit_init_with_exe_filter(struct audit_filter *filter) +{ + int fd, err; + + fd = audit_init(); + if (fd < 0) + return fd; + + err = audit_init_filter_exe(filter, NULL); + if (err) + return err; + + err = audit_filter_exe(fd, filter, AUDIT_ADD_RULE); + if (err) + return err; + + return fd; +} diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c new file mode 100644 index 000000000000..a0643070c403 --- /dev/null +++ b/tools/testing/selftests/landlock/audit_test.c @@ -0,0 +1,551 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Landlock tests - Audit + * + * Copyright © 2024-2025 Microsoft Corporation + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <limits.h> +#include <linux/landlock.h> +#include <stdlib.h> +#include <sys/mount.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "audit.h" +#include "common.h" + +static int matches_log_signal(struct __test_metadata *const _metadata, + int audit_fd, const pid_t opid, __u64 *domain_id) +{ + static const char log_template[] = REGEX_LANDLOCK_PREFIX + " blockers=scope\\.signal opid=%d ocomm=\"audit_test\"$"; + char log_match[sizeof(log_template) + 10]; + int log_match_len; + + log_match_len = + snprintf(log_match, sizeof(log_match), log_template, opid); + if (log_match_len > sizeof(log_match)) + return -E2BIG; + + return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match, + domain_id); +} + +FIXTURE(audit) +{ + struct audit_filter audit_filter; + int audit_fd; + __u64(*domain_stack)[16]; +}; + +FIXTURE_SETUP(audit) +{ + disable_caps(_metadata); + set_cap(_metadata, CAP_AUDIT_CONTROL); + self->audit_fd = audit_init_with_exe_filter(&self->audit_filter); + EXPECT_LE(0, self->audit_fd) + { + const char *error_msg; + + /* kill "$(auditctl -s | sed -ne 's/^pid \([0-9]\+\)$/\1/p')" */ + if (self->audit_fd == -EEXIST) + error_msg = "socket already in use (e.g. auditd)"; + else + error_msg = strerror(-self->audit_fd); + TH_LOG("Failed to initialize audit: %s", error_msg); + } + clear_cap(_metadata, CAP_AUDIT_CONTROL); + + self->domain_stack = mmap(NULL, sizeof(*self->domain_stack), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, self->domain_stack); + memset(self->domain_stack, 0, sizeof(*self->domain_stack)); +} + +FIXTURE_TEARDOWN(audit) +{ + EXPECT_EQ(0, munmap(self->domain_stack, sizeof(*self->domain_stack))); + + set_cap(_metadata, CAP_AUDIT_CONTROL); + EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter)); + clear_cap(_metadata, CAP_AUDIT_CONTROL); +} + +TEST_F(audit, layers) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .scoped = LANDLOCK_SCOPE_SIGNAL, + }; + int status, ruleset_fd, i; + __u64 prev_dom = 3; + pid_t child; + + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + for (i = 0; i < ARRAY_SIZE(*self->domain_stack); i++) { + __u64 denial_dom = 1; + __u64 allocated_dom = 2; + + EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, 0)); + + /* Creates a denial to get the domain ID. */ + EXPECT_EQ(-1, kill(getppid(), 0)); + EXPECT_EQ(EPERM, errno); + EXPECT_EQ(0, + matches_log_signal(_metadata, self->audit_fd, + getppid(), &denial_dom)); + EXPECT_EQ(0, matches_log_domain_allocated( + self->audit_fd, &allocated_dom)); + EXPECT_NE(denial_dom, 1); + EXPECT_NE(denial_dom, 0); + EXPECT_EQ(denial_dom, allocated_dom); + + /* Checks that the new domain is younger than the previous one. */ + EXPECT_GT(allocated_dom, prev_dom); + prev_dom = allocated_dom; + (*self->domain_stack)[i] = allocated_dom; + } + + /* Checks that we reached the maximum number of layers. */ + EXPECT_EQ(-1, landlock_restrict_self(ruleset_fd, 0)); + EXPECT_EQ(E2BIG, errno); + + /* Updates filter rules to match the drop record. */ + set_cap(_metadata, CAP_AUDIT_CONTROL); + EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE)); + EXPECT_EQ(0, + audit_filter_exe(self->audit_fd, &self->audit_filter, + AUDIT_DEL_RULE)); + clear_cap(_metadata, CAP_AUDIT_CONTROL); + + _exit(_metadata->exit_code); + return; + } + + ASSERT_EQ(child, waitpid(child, &status, 0)); + if (WIFSIGNALED(status) || !WIFEXITED(status) || + WEXITSTATUS(status) != EXIT_SUCCESS) + _metadata->exit_code = KSFT_FAIL; + + /* Purges log from deallocated domains. */ + EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, + &audit_tv_dom_drop, sizeof(audit_tv_dom_drop))); + for (i = ARRAY_SIZE(*self->domain_stack) - 1; i >= 0; i--) { + __u64 deallocated_dom = 2; + + EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1, + &deallocated_dom)); + EXPECT_EQ((*self->domain_stack)[i], deallocated_dom) + { + TH_LOG("Failed to match domain %llx (#%d)", + (*self->domain_stack)[i], i); + } + } + EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, + &audit_tv_default, sizeof(audit_tv_default))); + + EXPECT_EQ(0, close(ruleset_fd)); +} + +FIXTURE(audit_flags) +{ + struct audit_filter audit_filter; + int audit_fd; + __u64 *domain_id; +}; + +FIXTURE_VARIANT(audit_flags) +{ + const int restrict_flags; +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(audit_flags, default) { + /* clang-format on */ + .restrict_flags = 0, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(audit_flags, same_exec_off) { + /* clang-format on */ + .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(audit_flags, subdomains_off) { + /* clang-format on */ + .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(audit_flags, cross_exec_on) { + /* clang-format on */ + .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON, +}; + +FIXTURE_SETUP(audit_flags) +{ + disable_caps(_metadata); + set_cap(_metadata, CAP_AUDIT_CONTROL); + self->audit_fd = audit_init_with_exe_filter(&self->audit_filter); + EXPECT_LE(0, self->audit_fd) + { + const char *error_msg; + + /* kill "$(auditctl -s | sed -ne 's/^pid \([0-9]\+\)$/\1/p')" */ + if (self->audit_fd == -EEXIST) + error_msg = "socket already in use (e.g. auditd)"; + else + error_msg = strerror(-self->audit_fd); + TH_LOG("Failed to initialize audit: %s", error_msg); + } + clear_cap(_metadata, CAP_AUDIT_CONTROL); + + self->domain_id = mmap(NULL, sizeof(*self->domain_id), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, self->domain_id); + /* Domain IDs are greater or equal to 2^32. */ + *self->domain_id = 1; +} + +FIXTURE_TEARDOWN(audit_flags) +{ + EXPECT_EQ(0, munmap(self->domain_id, sizeof(*self->domain_id))); + + set_cap(_metadata, CAP_AUDIT_CONTROL); + EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter)); + clear_cap(_metadata, CAP_AUDIT_CONTROL); +} + +TEST_F(audit_flags, signal) +{ + int status; + pid_t child; + struct audit_records records; + __u64 deallocated_dom = 2; + + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + const struct landlock_ruleset_attr ruleset_attr = { + .scoped = LANDLOCK_SCOPE_SIGNAL, + }; + int ruleset_fd; + + /* Add filesystem restrictions. */ + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, + variant->restrict_flags)); + EXPECT_EQ(0, close(ruleset_fd)); + + /* First signal checks to test log entries. */ + EXPECT_EQ(-1, kill(getppid(), 0)); + EXPECT_EQ(EPERM, errno); + + if (variant->restrict_flags & + LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) { + EXPECT_EQ(-EAGAIN, matches_log_signal( + _metadata, self->audit_fd, + getppid(), self->domain_id)); + EXPECT_EQ(*self->domain_id, 1); + } else { + __u64 allocated_dom = 3; + + EXPECT_EQ(0, matches_log_signal( + _metadata, self->audit_fd, + getppid(), self->domain_id)); + + /* Checks domain information records. */ + EXPECT_EQ(0, matches_log_domain_allocated( + self->audit_fd, &allocated_dom)); + EXPECT_NE(*self->domain_id, 1); + EXPECT_NE(*self->domain_id, 0); + EXPECT_EQ(*self->domain_id, allocated_dom); + } + + /* Second signal checks to test audit_count_records(). */ + EXPECT_EQ(-1, kill(getppid(), 0)); + EXPECT_EQ(EPERM, errno); + + /* Makes sure there is no superfluous logged records. */ + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + if (variant->restrict_flags & + LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) { + EXPECT_EQ(0, records.access); + } else { + EXPECT_EQ(1, records.access); + } + EXPECT_EQ(0, records.domain); + + /* Updates filter rules to match the drop record. */ + set_cap(_metadata, CAP_AUDIT_CONTROL); + EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE)); + EXPECT_EQ(0, + audit_filter_exe(self->audit_fd, &self->audit_filter, + AUDIT_DEL_RULE)); + clear_cap(_metadata, CAP_AUDIT_CONTROL); + + _exit(_metadata->exit_code); + return; + } + + ASSERT_EQ(child, waitpid(child, &status, 0)); + if (WIFSIGNALED(status) || !WIFEXITED(status) || + WEXITSTATUS(status) != EXIT_SUCCESS) + _metadata->exit_code = KSFT_FAIL; + + if (variant->restrict_flags & + LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) { + EXPECT_EQ(-EAGAIN, + matches_log_domain_deallocated(self->audit_fd, 0, + &deallocated_dom)); + EXPECT_EQ(deallocated_dom, 2); + } else { + EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, + &audit_tv_dom_drop, + sizeof(audit_tv_dom_drop))); + EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 2, + &deallocated_dom)); + EXPECT_NE(deallocated_dom, 2); + EXPECT_NE(deallocated_dom, 0); + EXPECT_EQ(deallocated_dom, *self->domain_id); + EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, + &audit_tv_default, + sizeof(audit_tv_default))); + } +} + +static int matches_log_fs_read_root(int audit_fd) +{ + return audit_match_record( + audit_fd, AUDIT_LANDLOCK_ACCESS, + REGEX_LANDLOCK_PREFIX + " blockers=fs\\.read_dir path=\"/\" dev=\"[^\"]\\+\" ino=[0-9]\\+$", + NULL); +} + +FIXTURE(audit_exec) +{ + struct audit_filter audit_filter; + int audit_fd; +}; + +FIXTURE_VARIANT(audit_exec) +{ + const int restrict_flags; +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(audit_exec, default) { + /* clang-format on */ + .restrict_flags = 0, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(audit_exec, same_exec_off) { + /* clang-format on */ + .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(audit_exec, subdomains_off) { + /* clang-format on */ + .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(audit_exec, cross_exec_on) { + /* clang-format on */ + .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(audit_exec, subdomains_off_and_cross_exec_on) { + /* clang-format on */ + .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF | + LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON, +}; + +FIXTURE_SETUP(audit_exec) +{ + disable_caps(_metadata); + set_cap(_metadata, CAP_AUDIT_CONTROL); + + self->audit_fd = audit_init(); + EXPECT_LE(0, self->audit_fd) + { + const char *error_msg; + + /* kill "$(auditctl -s | sed -ne 's/^pid \([0-9]\+\)$/\1/p')" */ + if (self->audit_fd == -EEXIST) + error_msg = "socket already in use (e.g. auditd)"; + else + error_msg = strerror(-self->audit_fd); + TH_LOG("Failed to initialize audit: %s", error_msg); + } + + /* Applies test filter for the bin_wait_pipe_sandbox program. */ + EXPECT_EQ(0, audit_init_filter_exe(&self->audit_filter, + bin_wait_pipe_sandbox)); + EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter, + AUDIT_ADD_RULE)); + + clear_cap(_metadata, CAP_AUDIT_CONTROL); +} + +FIXTURE_TEARDOWN(audit_exec) +{ + set_cap(_metadata, CAP_AUDIT_CONTROL); + EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter, + AUDIT_DEL_RULE)); + clear_cap(_metadata, CAP_AUDIT_CONTROL); + EXPECT_EQ(0, close(self->audit_fd)); +} + +TEST_F(audit_exec, signal_and_open) +{ + struct audit_records records; + int pipe_child[2], pipe_parent[2]; + char buf_parent; + pid_t child; + int status; + + ASSERT_EQ(0, pipe2(pipe_child, 0)); + ASSERT_EQ(0, pipe2(pipe_parent, 0)); + + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + const struct landlock_ruleset_attr layer1 = { + .scoped = LANDLOCK_SCOPE_SIGNAL, + }; + char pipe_child_str[12], pipe_parent_str[12]; + char *const argv[] = { (char *)bin_wait_pipe_sandbox, + pipe_child_str, pipe_parent_str, NULL }; + int ruleset_fd; + + /* Passes the pipe FDs to the executed binary. */ + EXPECT_EQ(0, close(pipe_child[0])); + EXPECT_EQ(0, close(pipe_parent[1])); + snprintf(pipe_child_str, sizeof(pipe_child_str), "%d", + pipe_child[1]); + snprintf(pipe_parent_str, sizeof(pipe_parent_str), "%d", + pipe_parent[0]); + + ruleset_fd = + landlock_create_ruleset(&layer1, sizeof(layer1), 0); + if (ruleset_fd < 0) { + perror("Failed to create a ruleset"); + _exit(1); + } + prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + if (landlock_restrict_self(ruleset_fd, + variant->restrict_flags)) { + perror("Failed to restrict self"); + _exit(1); + } + close(ruleset_fd); + + ASSERT_EQ(0, execve(argv[0], argv, NULL)) + { + TH_LOG("Failed to execute \"%s\": %s", argv[0], + strerror(errno)); + }; + _exit(1); + return; + } + + EXPECT_EQ(0, close(pipe_child[1])); + EXPECT_EQ(0, close(pipe_parent[0])); + + /* Waits for the child. */ + EXPECT_EQ(1, read(pipe_child[0], &buf_parent, 1)); + + /* Tests that there was no denial until now. */ + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(0, records.domain); + + /* + * Wait for the child to do a first denied action by layer1 and + * sandbox itself with layer2. + */ + EXPECT_EQ(1, write(pipe_parent[1], ".", 1)); + EXPECT_EQ(1, read(pipe_child[0], &buf_parent, 1)); + + /* Tests that the audit record only matches the child. */ + if (variant->restrict_flags & LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON) { + /* Matches the current domain. */ + EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd, + getpid(), NULL)); + } + + /* Checks that we didn't miss anything. */ + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + + /* + * Wait for the child to do a second denied action by layer1 and + * layer2, and sandbox itself with layer3. + */ + EXPECT_EQ(1, write(pipe_parent[1], ".", 1)); + EXPECT_EQ(1, read(pipe_child[0], &buf_parent, 1)); + + /* Tests that the audit record only matches the child. */ + if (variant->restrict_flags & LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON) { + /* Matches the current domain. */ + EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd, + getpid(), NULL)); + } + + if (!(variant->restrict_flags & + LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) { + /* Matches the child domain. */ + EXPECT_EQ(0, matches_log_fs_read_root(self->audit_fd)); + } + + /* Checks that we didn't miss anything. */ + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + + /* Waits for the child to terminate. */ + EXPECT_EQ(1, write(pipe_parent[1], ".", 1)); + ASSERT_EQ(child, waitpid(child, &status, 0)); + ASSERT_EQ(1, WIFEXITED(status)); + ASSERT_EQ(0, WEXITSTATUS(status)); + + /* Tests that the audit record only matches the child. */ + if (!(variant->restrict_flags & + LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) { + /* + * Matches the child domains, which tests that the + * llcred->domain_exec bitmask is correctly updated with a new + * domain. + */ + EXPECT_EQ(0, matches_log_fs_read_root(self->audit_fd)); + EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd, + getpid(), NULL)); + } + + /* Checks that we didn't miss anything. */ + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c index 1bc16fde2e8a..7b69002239d7 100644 --- a/tools/testing/selftests/landlock/base_test.c +++ b/tools/testing/selftests/landlock/base_test.c @@ -76,7 +76,7 @@ TEST(abi_version) const struct landlock_ruleset_attr ruleset_attr = { .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, }; - ASSERT_EQ(6, landlock_create_ruleset(NULL, 0, + ASSERT_EQ(7, landlock_create_ruleset(NULL, 0, LANDLOCK_CREATE_RULESET_VERSION)); ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, @@ -98,10 +98,54 @@ TEST(abi_version) ASSERT_EQ(EINVAL, errno); } +/* + * Old source trees might not have the set of Kselftest fixes related to kernel + * UAPI headers. + */ +#ifndef LANDLOCK_CREATE_RULESET_ERRATA +#define LANDLOCK_CREATE_RULESET_ERRATA (1U << 1) +#endif + +TEST(errata) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, + }; + int errata; + + errata = landlock_create_ruleset(NULL, 0, + LANDLOCK_CREATE_RULESET_ERRATA); + /* The errata bitmask will not be backported to tests. */ + ASSERT_LE(0, errata); + TH_LOG("errata: 0x%x", errata); + + ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, + LANDLOCK_CREATE_RULESET_ERRATA)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr), + LANDLOCK_CREATE_RULESET_ERRATA)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), + LANDLOCK_CREATE_RULESET_ERRATA)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset( + NULL, 0, + LANDLOCK_CREATE_RULESET_VERSION | + LANDLOCK_CREATE_RULESET_ERRATA)); + ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0, + LANDLOCK_CREATE_RULESET_ERRATA | + 1 << 31)); + ASSERT_EQ(EINVAL, errno); +} + /* Tests ordering of syscall argument checks. */ TEST(create_ruleset_checks_ordering) { - const int last_flag = LANDLOCK_CREATE_RULESET_VERSION; + const int last_flag = LANDLOCK_CREATE_RULESET_ERRATA; const int invalid_flag = last_flag << 1; int ruleset_fd; const struct landlock_ruleset_attr ruleset_attr = { @@ -233,6 +277,88 @@ TEST(restrict_self_checks_ordering) ASSERT_EQ(0, close(ruleset_fd)); } +TEST(restrict_self_fd) +{ + int fd; + + fd = open("/dev/null", O_RDONLY | O_CLOEXEC); + ASSERT_LE(0, fd); + + EXPECT_EQ(-1, landlock_restrict_self(fd, 0)); + EXPECT_EQ(EBADFD, errno); +} + +TEST(restrict_self_fd_flags) +{ + int fd; + + fd = open("/dev/null", O_RDONLY | O_CLOEXEC); + ASSERT_LE(0, fd); + + /* + * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF accepts -1 but not any file + * descriptor. + */ + EXPECT_EQ(-1, landlock_restrict_self( + fd, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)); + EXPECT_EQ(EBADFD, errno); +} + +TEST(restrict_self_flags) +{ + const __u32 last_flag = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF; + + /* Tests invalid flag combinations. */ + + EXPECT_EQ(-1, landlock_restrict_self(-1, last_flag << 1)); + EXPECT_EQ(EINVAL, errno); + + EXPECT_EQ(-1, landlock_restrict_self(-1, -1)); + EXPECT_EQ(EINVAL, errno); + + /* Tests valid flag combinations. */ + + EXPECT_EQ(-1, landlock_restrict_self(-1, 0)); + EXPECT_EQ(EBADF, errno); + + EXPECT_EQ(-1, landlock_restrict_self( + -1, LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF)); + EXPECT_EQ(EBADF, errno); + + EXPECT_EQ(-1, + landlock_restrict_self( + -1, + LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF | + LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)); + EXPECT_EQ(EBADF, errno); + + EXPECT_EQ(-1, + landlock_restrict_self( + -1, + LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON | + LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)); + EXPECT_EQ(EBADF, errno); + + EXPECT_EQ(-1, landlock_restrict_self( + -1, LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON)); + EXPECT_EQ(EBADF, errno); + + EXPECT_EQ(-1, + landlock_restrict_self( + -1, LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF | + LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON)); + EXPECT_EQ(EBADF, errno); + + /* Tests with an invalid ruleset_fd. */ + + EXPECT_EQ(-1, landlock_restrict_self( + -2, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)); + EXPECT_EQ(EBADF, errno); + + EXPECT_EQ(0, landlock_restrict_self( + -1, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)); +} + TEST(ruleset_fd_io) { struct landlock_ruleset_attr ruleset_attr = { diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index 6064c9ac0532..88a3c78f5d98 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -11,6 +11,7 @@ #include <errno.h> #include <linux/securebits.h> #include <sys/capability.h> +#include <sys/prctl.h> #include <sys/socket.h> #include <sys/un.h> #include <sys/wait.h> @@ -30,6 +31,7 @@ static const char bin_sandbox_and_launch[] = "./sandbox-and-launch"; static const char bin_wait_pipe[] = "./wait-pipe"; +static const char bin_wait_pipe_sandbox[] = "./wait-pipe-sandbox"; static void _init_caps(struct __test_metadata *const _metadata, bool drop_all) { @@ -37,10 +39,12 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all) /* Only these three capabilities are useful for the tests. */ const cap_value_t caps[] = { /* clang-format off */ + CAP_AUDIT_CONTROL, CAP_DAC_OVERRIDE, CAP_MKNOD, CAP_NET_ADMIN, CAP_NET_BIND_SERVICE, + CAP_SETUID, CAP_SYS_ADMIN, CAP_SYS_CHROOT, /* clang-format on */ @@ -204,6 +208,22 @@ enforce_ruleset(struct __test_metadata *const _metadata, const int ruleset_fd) } } +static void __maybe_unused +drop_access_rights(struct __test_metadata *const _metadata, + const struct landlock_ruleset_attr *const ruleset_attr) +{ + int ruleset_fd; + + ruleset_fd = + landlock_create_ruleset(ruleset_attr, sizeof(*ruleset_attr), 0); + EXPECT_LE(0, ruleset_fd) + { + TH_LOG("Failed to create a ruleset: %s", strerror(errno)); + } + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); +} + struct protocol_variant { int domain; int type; diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config index 425de4c20271..8fe9b461b1fd 100644 --- a/tools/testing/selftests/landlock/config +++ b/tools/testing/selftests/landlock/config @@ -1,4 +1,5 @@ CONFIG_AF_UNIX_OOB=y +CONFIG_AUDIT=y CONFIG_CGROUPS=y CONFIG_CGROUP_SCHED=y CONFIG_INET=y diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index aa6f2c1cbec7..f819011a8798 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -41,6 +41,7 @@ #define _ASM_GENERIC_FCNTL_H #include <linux/fcntl.h> +#include "audit.h" #include "common.h" #ifndef renameat2 @@ -5554,4 +5555,597 @@ TEST_F_FORK(layout3_fs, release_inodes) ASSERT_EQ(EACCES, test_open(TMP_DIR, O_RDONLY)); } +static int matches_log_fs_extra(struct __test_metadata *const _metadata, + int audit_fd, const char *const blockers, + const char *const path, const char *const extra) +{ + static const char log_template[] = REGEX_LANDLOCK_PREFIX + " blockers=fs\\.%s path=\"%s\" dev=\"[^\"]\\+\" ino=[0-9]\\+$"; + char *absolute_path = NULL; + size_t log_match_remaining = sizeof(log_template) + strlen(blockers) + + PATH_MAX * 2 + + (extra ? strlen(extra) : 0) + 1; + char log_match[log_match_remaining]; + char *log_match_cursor = log_match; + size_t chunk_len; + + chunk_len = snprintf(log_match_cursor, log_match_remaining, + REGEX_LANDLOCK_PREFIX " blockers=%s path=\"", + blockers); + if (chunk_len < 0 || chunk_len >= log_match_remaining) + return -E2BIG; + + /* + * It is assume that absolute_path does not contain control characters nor + * spaces, see audit_string_contains_control(). + */ + absolute_path = realpath(path, NULL); + if (!absolute_path) + return -errno; + + log_match_remaining -= chunk_len; + log_match_cursor += chunk_len; + log_match_cursor = regex_escape(absolute_path, log_match_cursor, + log_match_remaining); + free(absolute_path); + if (log_match_cursor < 0) + return (long long)log_match_cursor; + + log_match_remaining -= log_match_cursor - log_match; + chunk_len = snprintf(log_match_cursor, log_match_remaining, + "\" dev=\"[^\"]\\+\" ino=[0-9]\\+%s$", + extra ?: ""); + if (chunk_len < 0 || chunk_len >= log_match_remaining) + return -E2BIG; + + return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match, + NULL); +} + +static int matches_log_fs(struct __test_metadata *const _metadata, int audit_fd, + const char *const blockers, const char *const path) +{ + return matches_log_fs_extra(_metadata, audit_fd, blockers, path, NULL); +} + +FIXTURE(audit_layout1) +{ + struct audit_filter audit_filter; + int audit_fd; +}; + +FIXTURE_SETUP(audit_layout1) +{ + prepare_layout(_metadata); + + create_layout1(_metadata); + + set_cap(_metadata, CAP_AUDIT_CONTROL); + self->audit_fd = audit_init_with_exe_filter(&self->audit_filter); + EXPECT_LE(0, self->audit_fd); + disable_caps(_metadata); +} + +FIXTURE_TEARDOWN_PARENT(audit_layout1) +{ + remove_layout1(_metadata); + + cleanup_layout(_metadata); + + EXPECT_EQ(0, audit_cleanup(-1, NULL)); +} + +TEST_F(audit_layout1, execute_make) +{ + struct audit_records records; + + copy_file(_metadata, bin_true, file1_s1d1); + test_execute(_metadata, 0, file1_s1d1); + test_check_exec(_metadata, 0, file1_s1d1); + + drop_access_rights(_metadata, + &(struct landlock_ruleset_attr){ + .handled_access_fs = + LANDLOCK_ACCESS_FS_EXECUTE, + }); + + test_execute(_metadata, EACCES, file1_s1d1); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.execute", + file1_s1d1)); + test_check_exec(_metadata, EACCES, file1_s1d1); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.execute", + file1_s1d1)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(0, records.domain); +} + +/* + * Using a set of handled/denied access rights make it possible to check that + * only the blocked ones are logged. + */ + +/* clang-format off */ +static const __u64 access_fs_16 = + LANDLOCK_ACCESS_FS_EXECUTE | + LANDLOCK_ACCESS_FS_WRITE_FILE | + LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_READ_DIR | + LANDLOCK_ACCESS_FS_REMOVE_DIR | + LANDLOCK_ACCESS_FS_REMOVE_FILE | + LANDLOCK_ACCESS_FS_MAKE_CHAR | + LANDLOCK_ACCESS_FS_MAKE_DIR | + LANDLOCK_ACCESS_FS_MAKE_REG | + LANDLOCK_ACCESS_FS_MAKE_SOCK | + LANDLOCK_ACCESS_FS_MAKE_FIFO | + LANDLOCK_ACCESS_FS_MAKE_BLOCK | + LANDLOCK_ACCESS_FS_MAKE_SYM | + LANDLOCK_ACCESS_FS_REFER | + LANDLOCK_ACCESS_FS_TRUNCATE | + LANDLOCK_ACCESS_FS_IOCTL_DEV; +/* clang-format on */ + +TEST_F(audit_layout1, execute_read) +{ + struct audit_records records; + + copy_file(_metadata, bin_true, file1_s1d1); + test_execute(_metadata, 0, file1_s1d1); + test_check_exec(_metadata, 0, file1_s1d1); + + drop_access_rights(_metadata, &(struct landlock_ruleset_attr){ + .handled_access_fs = access_fs_16, + }); + + /* + * The only difference with the previous audit_layout1.execute_read test is + * the extra ",fs\\.read_file" blocked by the executable file. + */ + test_execute(_metadata, EACCES, file1_s1d1); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, + "fs\\.execute,fs\\.read_file", file1_s1d1)); + test_check_exec(_metadata, EACCES, file1_s1d1); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, + "fs\\.execute,fs\\.read_file", file1_s1d1)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(0, records.domain); +} + +TEST_F(audit_layout1, write_file) +{ + struct audit_records records; + + drop_access_rights(_metadata, &(struct landlock_ruleset_attr){ + .handled_access_fs = access_fs_16, + }); + + EXPECT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, + "fs\\.write_file", file1_s1d1)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(1, records.domain); +} + +TEST_F(audit_layout1, read_file) +{ + struct audit_records records; + + drop_access_rights(_metadata, &(struct landlock_ruleset_attr){ + .handled_access_fs = access_fs_16, + }); + + EXPECT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.read_file", + file1_s1d1)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(1, records.domain); +} + +TEST_F(audit_layout1, read_dir) +{ + struct audit_records records; + + drop_access_rights(_metadata, &(struct landlock_ruleset_attr){ + .handled_access_fs = access_fs_16, + }); + + EXPECT_EQ(EACCES, test_open(dir_s1d1, O_DIRECTORY)); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.read_dir", + dir_s1d1)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(1, records.domain); +} + +TEST_F(audit_layout1, remove_dir) +{ + struct audit_records records; + + EXPECT_EQ(0, unlink(file1_s1d3)); + EXPECT_EQ(0, unlink(file2_s1d3)); + + drop_access_rights(_metadata, &(struct landlock_ruleset_attr){ + .handled_access_fs = access_fs_16, + }); + + EXPECT_EQ(-1, rmdir(dir_s1d3)); + EXPECT_EQ(EACCES, errno); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, + "fs\\.remove_dir", dir_s1d2)); + + EXPECT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d3, AT_REMOVEDIR)); + EXPECT_EQ(EACCES, errno); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, + "fs\\.remove_dir", dir_s1d2)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(0, records.domain); +} + +TEST_F(audit_layout1, remove_file) +{ + struct audit_records records; + + drop_access_rights(_metadata, &(struct landlock_ruleset_attr){ + .handled_access_fs = access_fs_16, + }); + + EXPECT_EQ(-1, unlink(file1_s1d3)); + EXPECT_EQ(EACCES, errno); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, + "fs\\.remove_file", dir_s1d3)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(1, records.domain); +} + +TEST_F(audit_layout1, make_char) +{ + struct audit_records records; + + EXPECT_EQ(0, unlink(file1_s1d3)); + + drop_access_rights(_metadata, &(struct landlock_ruleset_attr){ + .handled_access_fs = access_fs_16, + }); + + EXPECT_EQ(-1, mknod(file1_s1d3, S_IFCHR | 0644, 0)); + EXPECT_EQ(EACCES, errno); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_char", + dir_s1d3)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(1, records.domain); +} + +TEST_F(audit_layout1, make_dir) +{ + struct audit_records records; + + EXPECT_EQ(0, unlink(file1_s1d3)); + + drop_access_rights(_metadata, &(struct landlock_ruleset_attr){ + .handled_access_fs = access_fs_16, + }); + + EXPECT_EQ(-1, mkdir(file1_s1d3, 0755)); + EXPECT_EQ(EACCES, errno); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_dir", + dir_s1d3)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(1, records.domain); +} + +TEST_F(audit_layout1, make_reg) +{ + struct audit_records records; + + EXPECT_EQ(0, unlink(file1_s1d3)); + + drop_access_rights(_metadata, &(struct landlock_ruleset_attr){ + .handled_access_fs = access_fs_16, + }); + + EXPECT_EQ(-1, mknod(file1_s1d3, S_IFREG | 0644, 0)); + EXPECT_EQ(EACCES, errno); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_reg", + dir_s1d3)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(1, records.domain); +} + +TEST_F(audit_layout1, make_sock) +{ + struct audit_records records; + + EXPECT_EQ(0, unlink(file1_s1d3)); + + drop_access_rights(_metadata, &(struct landlock_ruleset_attr){ + .handled_access_fs = access_fs_16, + }); + + EXPECT_EQ(-1, mknod(file1_s1d3, S_IFSOCK | 0644, 0)); + EXPECT_EQ(EACCES, errno); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_sock", + dir_s1d3)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(1, records.domain); +} + +TEST_F(audit_layout1, make_fifo) +{ + struct audit_records records; + + EXPECT_EQ(0, unlink(file1_s1d3)); + + drop_access_rights(_metadata, &(struct landlock_ruleset_attr){ + .handled_access_fs = access_fs_16, + }); + + EXPECT_EQ(-1, mknod(file1_s1d3, S_IFIFO | 0644, 0)); + EXPECT_EQ(EACCES, errno); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_fifo", + dir_s1d3)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(1, records.domain); +} + +TEST_F(audit_layout1, make_block) +{ + struct audit_records records; + + EXPECT_EQ(0, unlink(file1_s1d3)); + + drop_access_rights(_metadata, &(struct landlock_ruleset_attr){ + .handled_access_fs = access_fs_16, + }); + + EXPECT_EQ(-1, mknod(file1_s1d3, S_IFBLK | 0644, 0)); + EXPECT_EQ(EACCES, errno); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, + "fs\\.make_block", dir_s1d3)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(1, records.domain); +} + +TEST_F(audit_layout1, make_sym) +{ + struct audit_records records; + + EXPECT_EQ(0, unlink(file1_s1d3)); + + drop_access_rights(_metadata, &(struct landlock_ruleset_attr){ + .handled_access_fs = access_fs_16, + }); + + EXPECT_EQ(-1, symlink("target", file1_s1d3)); + EXPECT_EQ(EACCES, errno); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_sym", + dir_s1d3)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(1, records.domain); +} + +TEST_F(audit_layout1, refer_handled) +{ + struct audit_records records; + + EXPECT_EQ(0, unlink(file1_s1d3)); + + drop_access_rights(_metadata, &(struct landlock_ruleset_attr){ + .handled_access_fs = + LANDLOCK_ACCESS_FS_REFER, + }); + + EXPECT_EQ(-1, link(file1_s1d1, file1_s1d3)); + EXPECT_EQ(EXDEV, errno); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer", + dir_s1d1)); + EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, NULL)); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer", + dir_s1d3)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(0, records.domain); +} + +TEST_F(audit_layout1, refer_make) +{ + struct audit_records records; + + EXPECT_EQ(0, unlink(file1_s1d3)); + + drop_access_rights(_metadata, + &(struct landlock_ruleset_attr){ + .handled_access_fs = + LANDLOCK_ACCESS_FS_MAKE_REG | + LANDLOCK_ACCESS_FS_REFER, + }); + + EXPECT_EQ(-1, link(file1_s1d1, file1_s1d3)); + EXPECT_EQ(EACCES, errno); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer", + dir_s1d1)); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, + "fs\\.make_reg,fs\\.refer", dir_s1d3)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(0, records.domain); +} + +TEST_F(audit_layout1, refer_rename) +{ + struct audit_records records; + + EXPECT_EQ(0, unlink(file1_s1d3)); + + drop_access_rights(_metadata, &(struct landlock_ruleset_attr){ + .handled_access_fs = access_fs_16, + }); + + EXPECT_EQ(EACCES, test_rename(file1_s1d2, file1_s2d3)); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, + "fs\\.remove_file,fs\\.refer", dir_s1d2)); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, + "fs\\.remove_file,fs\\.make_reg,fs\\.refer", + dir_s2d3)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(0, records.domain); +} + +TEST_F(audit_layout1, refer_exchange) +{ + struct audit_records records; + + EXPECT_EQ(0, unlink(file1_s1d3)); + + drop_access_rights(_metadata, &(struct landlock_ruleset_attr){ + .handled_access_fs = access_fs_16, + }); + + /* + * The only difference with the previous audit_layout1.refer_rename test is + * the extra ",fs\\.make_reg" blocked by the source directory. + */ + EXPECT_EQ(EACCES, test_exchange(file1_s1d2, file1_s2d3)); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, + "fs\\.remove_file,fs\\.make_reg,fs\\.refer", + dir_s1d2)); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, + "fs\\.remove_file,fs\\.make_reg,fs\\.refer", + dir_s2d3)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(0, records.domain); +} + +/* + * This test checks that the audit record is correctly generated when the + * operation is only partially denied. This is the case for rename(2) when the + * source file is allowed to be referenced but the destination directory is not. + * + * This is also a regression test for commit d617f0d72d80 ("landlock: Optimize + * file path walks and prepare for audit support") and commit 058518c20920 + * ("landlock: Align partial refer access checks with final ones"). + */ +TEST_F(audit_layout1, refer_rename_half) +{ + struct audit_records records; + const struct rule layer1[] = { + { + .path = dir_s2d2, + .access = LANDLOCK_ACCESS_FS_REFER, + }, + {}, + }; + int ruleset_fd = + create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REFER, layer1); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d3)); + ASSERT_EQ(EXDEV, errno); + + /* Only half of the request is denied. */ + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer", + dir_s1d1)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(1, records.domain); +} + +TEST_F(audit_layout1, truncate) +{ + struct audit_records records; + + drop_access_rights(_metadata, &(struct landlock_ruleset_attr){ + .handled_access_fs = access_fs_16, + }); + + EXPECT_EQ(-1, truncate(file1_s1d3, 0)); + EXPECT_EQ(EACCES, errno); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.truncate", + file1_s1d3)); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(1, records.domain); +} + +TEST_F(audit_layout1, ioctl_dev) +{ + struct audit_records records; + int fd; + + drop_access_rights(_metadata, + &(struct landlock_ruleset_attr){ + .handled_access_fs = + access_fs_16 & + ~LANDLOCK_ACCESS_FS_READ_FILE, + }); + + fd = open("/dev/null", O_RDONLY | O_CLOEXEC); + ASSERT_LE(0, fd); + EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FIONREAD)); + EXPECT_EQ(0, matches_log_fs_extra(_metadata, self->audit_fd, + "fs\\.ioctl_dev", "/dev/null", + " ioctlcmd=0x541b")); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(1, records.domain); +} + +TEST_F(audit_layout1, mount) +{ + struct audit_records records; + + drop_access_rights(_metadata, + &(struct landlock_ruleset_attr){ + .handled_access_fs = + LANDLOCK_ACCESS_FS_EXECUTE, + }); + + set_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_RDONLY, NULL)); + EXPECT_EQ(EPERM, errno); + clear_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, + "fs\\.change_topology", dir_s3d2)); + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(1, records.domain); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c index d9de0ee49ebc..2a45208551e6 100644 --- a/tools/testing/selftests/landlock/net_test.c +++ b/tools/testing/selftests/landlock/net_test.c @@ -20,6 +20,7 @@ #include <sys/syscall.h> #include <sys/un.h> +#include "audit.h" #include "common.h" const short sock_port_start = (1 << 10); @@ -1868,4 +1869,135 @@ TEST_F(port_specific, bind_connect_1023) EXPECT_EQ(0, close(bind_fd)); } +static int matches_log_tcp(const int audit_fd, const char *const blockers, + const char *const dir_addr, const char *const addr, + const char *const dir_port) +{ + static const char log_template[] = REGEX_LANDLOCK_PREFIX + " blockers=%s %s=%s %s=1024$"; + /* + * Max strlen(blockers): 16 + * Max strlen(dir_addr): 5 + * Max strlen(addr): 12 + * Max strlen(dir_port): 4 + */ + char log_match[sizeof(log_template) + 37]; + int log_match_len; + + log_match_len = snprintf(log_match, sizeof(log_match), log_template, + blockers, dir_addr, addr, dir_port); + if (log_match_len > sizeof(log_match)) + return -E2BIG; + + return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match, + NULL); +} + +FIXTURE(audit) +{ + struct service_fixture srv0; + struct audit_filter audit_filter; + int audit_fd; +}; + +FIXTURE_VARIANT(audit) +{ + const char *const addr; + const struct protocol_variant prot; +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(audit, ipv4) { + /* clang-format on */ + .addr = "127\\.0\\.0\\.1", + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(audit, ipv6) { + /* clang-format on */ + .addr = "::1", + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + }, +}; + +FIXTURE_SETUP(audit) +{ + ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0)); + setup_loopback(_metadata); + + set_cap(_metadata, CAP_AUDIT_CONTROL); + self->audit_fd = audit_init_with_exe_filter(&self->audit_filter); + EXPECT_LE(0, self->audit_fd); + disable_caps(_metadata); +}; + +FIXTURE_TEARDOWN(audit) +{ + set_cap(_metadata, CAP_AUDIT_CONTROL); + EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter)); + clear_cap(_metadata, CAP_AUDIT_CONTROL); +} + +TEST_F(audit, bind) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + }; + struct audit_records records; + int ruleset_fd, sock_fd; + + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + + sock_fd = socket_variant(&self->srv0); + ASSERT_LE(0, sock_fd); + EXPECT_EQ(-EACCES, bind_variant(sock_fd, &self->srv0)); + EXPECT_EQ(0, matches_log_tcp(self->audit_fd, "net\\.bind_tcp", "saddr", + variant->addr, "src")); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(1, records.domain); + + EXPECT_EQ(0, close(sock_fd)); +} + +TEST_F(audit, connect) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + }; + struct audit_records records; + int ruleset_fd, sock_fd; + + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + + sock_fd = socket_variant(&self->srv0); + ASSERT_LE(0, sock_fd); + EXPECT_EQ(-EACCES, connect_variant(sock_fd, &self->srv0)); + EXPECT_EQ(0, matches_log_tcp(self->audit_fd, "net\\.connect_tcp", + "daddr", variant->addr, "dest")); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(1, records.domain); + + EXPECT_EQ(0, close(sock_fd)); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c index 8f31b673ff2d..4e356334ecb7 100644 --- a/tools/testing/selftests/landlock/ptrace_test.c +++ b/tools/testing/selftests/landlock/ptrace_test.c @@ -4,6 +4,7 @@ * * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net> * Copyright © 2019-2020 ANSSI + * Copyright © 2024-2025 Microsoft Corporation */ #define _GNU_SOURCE @@ -17,6 +18,7 @@ #include <sys/wait.h> #include <unistd.h> +#include "audit.h" #include "common.h" /* Copied from security/yama/yama_lsm.c */ @@ -434,4 +436,142 @@ TEST_F(hierarchy, trace) _metadata->exit_code = KSFT_FAIL; } +static int matches_log_ptrace(struct __test_metadata *const _metadata, + int audit_fd, const pid_t opid) +{ + static const char log_template[] = REGEX_LANDLOCK_PREFIX + " blockers=ptrace opid=%d ocomm=\"ptrace_test\"$"; + char log_match[sizeof(log_template) + 10]; + int log_match_len; + + log_match_len = + snprintf(log_match, sizeof(log_match), log_template, opid); + if (log_match_len > sizeof(log_match)) + return -E2BIG; + + return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match, + NULL); +} + +FIXTURE(audit) +{ + struct audit_filter audit_filter; + int audit_fd; +}; + +FIXTURE_SETUP(audit) +{ + disable_caps(_metadata); + set_cap(_metadata, CAP_AUDIT_CONTROL); + self->audit_fd = audit_init_with_exe_filter(&self->audit_filter); + EXPECT_LE(0, self->audit_fd); + clear_cap(_metadata, CAP_AUDIT_CONTROL); +} + +FIXTURE_TEARDOWN_PARENT(audit) +{ + EXPECT_EQ(0, audit_cleanup(-1, NULL)); +} + +/* Test PTRACE_TRACEME and PTRACE_ATTACH for parent and child. */ +TEST_F(audit, trace) +{ + pid_t child; + int status; + int pipe_child[2], pipe_parent[2]; + int yama_ptrace_scope; + char buf_parent; + struct audit_records records; + + /* Makes sure there is no superfluous logged records. */ + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(0, records.domain); + + yama_ptrace_scope = get_yama_ptrace_scope(); + ASSERT_LE(0, yama_ptrace_scope); + + if (yama_ptrace_scope > YAMA_SCOPE_DISABLED) + TH_LOG("Incomplete tests due to Yama restrictions (scope %d)", + yama_ptrace_scope); + + /* + * Removes all effective and permitted capabilities to not interfere + * with cap_ptrace_access_check() in case of PTRACE_MODE_FSCREDS. + */ + drop_caps(_metadata); + + ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC)); + ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); + + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + char buf_child; + + ASSERT_EQ(0, close(pipe_parent[1])); + ASSERT_EQ(0, close(pipe_child[0])); + + /* Waits for the parent to be in a domain, if any. */ + ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1)); + + /* Tests child PTRACE_TRACEME. */ + EXPECT_EQ(-1, ptrace(PTRACE_TRACEME)); + EXPECT_EQ(EPERM, errno); + /* We should see the child process. */ + EXPECT_EQ(0, matches_log_ptrace(_metadata, self->audit_fd, + getpid())); + + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + /* Checks for a domain creation. */ + EXPECT_EQ(1, records.domain); + + /* + * Signals that the PTRACE_ATTACH test is done and the + * PTRACE_TRACEME test is ongoing. + */ + ASSERT_EQ(1, write(pipe_child[1], ".", 1)); + + /* Waits for the parent PTRACE_ATTACH test. */ + ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1)); + _exit(_metadata->exit_code); + return; + } + + ASSERT_EQ(0, close(pipe_child[1])); + ASSERT_EQ(0, close(pipe_parent[0])); + create_domain(_metadata); + + /* Signals that the parent is in a domain. */ + ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); + + /* + * Waits for the child to test PTRACE_ATTACH on the parent and start + * testing PTRACE_TRACEME. + */ + ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1)); + + /* The child should not be traced by the parent. */ + EXPECT_EQ(-1, ptrace(PTRACE_DETACH, child, NULL, 0)); + EXPECT_EQ(ESRCH, errno); + + /* Tests PTRACE_ATTACH on the child. */ + EXPECT_EQ(-1, ptrace(PTRACE_ATTACH, child, NULL, 0)); + EXPECT_EQ(EPERM, errno); + EXPECT_EQ(0, matches_log_ptrace(_metadata, self->audit_fd, child)); + + /* Signals that the parent PTRACE_ATTACH test is done. */ + ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); + ASSERT_EQ(child, waitpid(child, &status, 0)); + if (WIFSIGNALED(status) || !WIFEXITED(status) || + WEXITSTATUS(status) != EXIT_SUCCESS) + _metadata->exit_code = KSFT_FAIL; + + /* Makes sure there is no superfluous logged records. */ + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(0, records.domain); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/landlock/scoped_abstract_unix_test.c b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c index a6b59d2ab1b4..6825082c079c 100644 --- a/tools/testing/selftests/landlock/scoped_abstract_unix_test.c +++ b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c @@ -20,6 +20,7 @@ #include <sys/wait.h> #include <unistd.h> +#include "audit.h" #include "common.h" #include "scoped_common.h" @@ -267,6 +268,116 @@ TEST_F(scoped_domains, connect_to_child) _metadata->exit_code = KSFT_FAIL; } +FIXTURE(scoped_audit) +{ + struct service_fixture dgram_address; + struct audit_filter audit_filter; + int audit_fd; +}; + +FIXTURE_SETUP(scoped_audit) +{ + disable_caps(_metadata); + + memset(&self->dgram_address, 0, sizeof(self->dgram_address)); + set_unix_address(&self->dgram_address, 1); + + set_cap(_metadata, CAP_AUDIT_CONTROL); + self->audit_fd = audit_init_with_exe_filter(&self->audit_filter); + EXPECT_LE(0, self->audit_fd); + drop_caps(_metadata); +} + +FIXTURE_TEARDOWN_PARENT(scoped_audit) +{ + EXPECT_EQ(0, audit_cleanup(-1, NULL)); +} + +/* python -c 'print(b"\0selftests-landlock-abstract-unix-".hex().upper())' */ +#define ABSTRACT_SOCKET_PATH_PREFIX \ + "0073656C6674657374732D6C616E646C6F636B2D61627374726163742D756E69782D" + +/* + * Simpler version of scoped_domains.connect_to_child, but with audit tests. + */ +TEST_F(scoped_audit, connect_to_child) +{ + pid_t child; + int err_dgram, status; + int pipe_child[2], pipe_parent[2]; + char buf; + int dgram_client; + struct audit_records records; + + /* Makes sure there is no superfluous logged records. */ + EXPECT_EQ(0, audit_count_records(self->audit_fd, &records)); + EXPECT_EQ(0, records.access); + EXPECT_EQ(0, records.domain); + + ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC)); + ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); + + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + int dgram_server; + + EXPECT_EQ(0, close(pipe_parent[1])); + EXPECT_EQ(0, close(pipe_child[0])); + + /* Waits for the parent to be in a domain. */ + ASSERT_EQ(1, read(pipe_parent[0], &buf, 1)); + + dgram_server = socket(AF_UNIX, SOCK_DGRAM, 0); + ASSERT_LE(0, dgram_server); + ASSERT_EQ(0, bind(dgram_server, &self->dgram_address.unix_addr, + self->dgram_address.unix_addr_len)); + + /* Signals to the parent that child is listening. */ + ASSERT_EQ(1, write(pipe_child[1], ".", 1)); + + /* Waits to connect. */ + ASSERT_EQ(1, read(pipe_parent[0], &buf, 1)); + EXPECT_EQ(0, close(dgram_server)); + _exit(_metadata->exit_code); + return; + } + EXPECT_EQ(0, close(pipe_child[1])); + EXPECT_EQ(0, close(pipe_parent[0])); + + create_scoped_domain(_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET); + + /* Signals that the parent is in a domain, if any. */ + ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); + + dgram_client = socket(AF_UNIX, SOCK_DGRAM, 0); + ASSERT_LE(0, dgram_client); + + /* Waits for the child to listen */ + ASSERT_EQ(1, read(pipe_child[0], &buf, 1)); + err_dgram = connect(dgram_client, &self->dgram_address.unix_addr, + self->dgram_address.unix_addr_len); + EXPECT_EQ(-1, err_dgram); + EXPECT_EQ(EPERM, errno); + + EXPECT_EQ( + 0, + audit_match_record( + self->audit_fd, AUDIT_LANDLOCK_ACCESS, + REGEX_LANDLOCK_PREFIX + " blockers=scope\\.abstract_unix_socket path=" ABSTRACT_SOCKET_PATH_PREFIX + "[0-9A-F]\\+$", + NULL)); + + ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); + EXPECT_EQ(0, close(dgram_client)); + + ASSERT_EQ(child, waitpid(child, &status, 0)); + if (WIFSIGNALED(status) || !WIFEXITED(status) || + WEXITSTATUS(status) != EXIT_SUCCESS) + _metadata->exit_code = KSFT_FAIL; +} + FIXTURE(scoped_vs_unscoped) { struct service_fixture parent_stream_address, parent_dgram_address, diff --git a/tools/testing/selftests/landlock/scoped_signal_test.c b/tools/testing/selftests/landlock/scoped_signal_test.c index 475ee62a832d..d8bf33417619 100644 --- a/tools/testing/selftests/landlock/scoped_signal_test.c +++ b/tools/testing/selftests/landlock/scoped_signal_test.c @@ -249,47 +249,67 @@ TEST_F(scoped_domains, check_access_signal) _metadata->exit_code = KSFT_FAIL; } -static int thread_pipe[2]; - enum thread_return { THREAD_INVALID = 0, THREAD_SUCCESS = 1, THREAD_ERROR = 2, + THREAD_TEST_FAILED = 3, }; -void *thread_func(void *arg) +static void *thread_sync(void *arg) { + const int pipe_read = *(int *)arg; char buf; - if (read(thread_pipe[0], &buf, 1) != 1) + if (read(pipe_read, &buf, 1) != 1) return (void *)THREAD_ERROR; return (void *)THREAD_SUCCESS; } -TEST(signal_scoping_threads) +TEST(signal_scoping_thread_before) { - pthread_t no_sandbox_thread, scoped_thread; + pthread_t no_sandbox_thread; enum thread_return ret = THREAD_INVALID; + int thread_pipe[2]; drop_caps(_metadata); ASSERT_EQ(0, pipe2(thread_pipe, O_CLOEXEC)); - ASSERT_EQ(0, - pthread_create(&no_sandbox_thread, NULL, thread_func, NULL)); + ASSERT_EQ(0, pthread_create(&no_sandbox_thread, NULL, thread_sync, + &thread_pipe[0])); - /* Restricts the domain after creating the first thread. */ + /* Enforces restriction after creating the thread. */ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL); - ASSERT_EQ(EPERM, pthread_kill(no_sandbox_thread, 0)); - ASSERT_EQ(1, write(thread_pipe[1], ".", 1)); - - ASSERT_EQ(0, pthread_create(&scoped_thread, NULL, thread_func, NULL)); - ASSERT_EQ(0, pthread_kill(scoped_thread, 0)); - ASSERT_EQ(1, write(thread_pipe[1], ".", 1)); + EXPECT_EQ(0, pthread_kill(no_sandbox_thread, 0)); + EXPECT_EQ(1, write(thread_pipe[1], ".", 1)); EXPECT_EQ(0, pthread_join(no_sandbox_thread, (void **)&ret)); EXPECT_EQ(THREAD_SUCCESS, ret); + + EXPECT_EQ(0, close(thread_pipe[0])); + EXPECT_EQ(0, close(thread_pipe[1])); +} + +TEST(signal_scoping_thread_after) +{ + pthread_t scoped_thread; + enum thread_return ret = THREAD_INVALID; + int thread_pipe[2]; + + drop_caps(_metadata); + ASSERT_EQ(0, pipe2(thread_pipe, O_CLOEXEC)); + + /* Enforces restriction before creating the thread. */ + create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL); + + ASSERT_EQ(0, pthread_create(&scoped_thread, NULL, thread_sync, + &thread_pipe[0])); + + EXPECT_EQ(0, pthread_kill(scoped_thread, 0)); + EXPECT_EQ(1, write(thread_pipe[1], ".", 1)); + EXPECT_EQ(0, pthread_join(scoped_thread, (void **)&ret)); EXPECT_EQ(THREAD_SUCCESS, ret); @@ -297,6 +317,64 @@ TEST(signal_scoping_threads) EXPECT_EQ(0, close(thread_pipe[1])); } +struct thread_setuid_args { + int pipe_read, new_uid; +}; + +void *thread_setuid(void *ptr) +{ + const struct thread_setuid_args *arg = ptr; + char buf; + + if (read(arg->pipe_read, &buf, 1) != 1) + return (void *)THREAD_ERROR; + + /* libc's setuid() should update all thread's credentials. */ + if (getuid() != arg->new_uid) + return (void *)THREAD_TEST_FAILED; + + return (void *)THREAD_SUCCESS; +} + +TEST(signal_scoping_thread_setuid) +{ + struct thread_setuid_args arg; + pthread_t no_sandbox_thread; + enum thread_return ret = THREAD_INVALID; + int pipe_parent[2]; + int prev_uid; + + disable_caps(_metadata); + + /* This test does not need to be run as root. */ + prev_uid = getuid(); + arg.new_uid = prev_uid + 1; + EXPECT_LT(0, arg.new_uid); + + ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); + arg.pipe_read = pipe_parent[0]; + + /* Capabilities must be set before creating a new thread. */ + set_cap(_metadata, CAP_SETUID); + ASSERT_EQ(0, pthread_create(&no_sandbox_thread, NULL, thread_setuid, + &arg)); + + /* Enforces restriction after creating the thread. */ + create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL); + + EXPECT_NE(arg.new_uid, getuid()); + EXPECT_EQ(0, setuid(arg.new_uid)); + EXPECT_EQ(arg.new_uid, getuid()); + EXPECT_EQ(1, write(pipe_parent[1], ".", 1)); + + EXPECT_EQ(0, pthread_join(no_sandbox_thread, (void **)&ret)); + EXPECT_EQ(THREAD_SUCCESS, ret); + + clear_cap(_metadata, CAP_SETUID); + EXPECT_EQ(0, close(pipe_parent[0])); + EXPECT_EQ(0, close(pipe_parent[1])); +} + const short backlog = 10; static volatile sig_atomic_t signal_received; diff --git a/tools/testing/selftests/landlock/wait-pipe-sandbox.c b/tools/testing/selftests/landlock/wait-pipe-sandbox.c new file mode 100644 index 000000000000..87dbc9164430 --- /dev/null +++ b/tools/testing/selftests/landlock/wait-pipe-sandbox.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Write in a pipe, wait, sandbox itself, test sandboxing, and wait again. + * + * Used by audit_exec.flags from audit_test.c + * + * Copyright © 2024-2025 Microsoft Corporation + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <linux/landlock.h> +#include <linux/prctl.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/prctl.h> +#include <unistd.h> + +#include "wrappers.h" + +static int sync_with(int pipe_child, int pipe_parent) +{ + char buf; + + /* Signals that we are waiting. */ + if (write(pipe_child, ".", 1) != 1) { + perror("Failed to write to first argument"); + return 1; + } + + /* Waits for the parent do its test. */ + if (read(pipe_parent, &buf, 1) != 1) { + perror("Failed to write to the second argument"); + return 1; + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + const struct landlock_ruleset_attr layer2 = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR, + }; + const struct landlock_ruleset_attr layer3 = { + .scoped = LANDLOCK_SCOPE_SIGNAL, + }; + int err, pipe_child, pipe_parent, ruleset_fd; + + /* The first argument must be the file descriptor number of a pipe. */ + if (argc != 3) { + fprintf(stderr, "Wrong number of arguments (not two)\n"); + return 1; + } + + pipe_child = atoi(argv[1]); + pipe_parent = atoi(argv[2]); + /* PR_SET_NO_NEW_PRIVS already set by parent. */ + + /* First step to test parent's layer1. */ + err = sync_with(pipe_child, pipe_parent); + if (err) + return err; + + /* Tries to send a signal, denied by layer1. */ + if (!kill(getppid(), 0)) { + fprintf(stderr, "Successfully sent a signal to the parent"); + return 1; + } + + /* Second step to test parent's layer1 and our layer2. */ + err = sync_with(pipe_child, pipe_parent); + if (err) + return err; + + ruleset_fd = landlock_create_ruleset(&layer2, sizeof(layer2), 0); + if (ruleset_fd < 0) { + perror("Failed to create the layer2 ruleset"); + return 1; + } + + if (landlock_restrict_self(ruleset_fd, 0)) { + perror("Failed to restrict self"); + return 1; + } + close(ruleset_fd); + + /* Tries to send a signal, denied by layer1. */ + if (!kill(getppid(), 0)) { + fprintf(stderr, "Successfully sent a signal to the parent"); + return 1; + } + + /* Tries to open ., denied by layer2. */ + if (open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC) >= 0) { + fprintf(stderr, "Successfully opened /"); + return 1; + } + + /* Third step to test our layer2 and layer3. */ + err = sync_with(pipe_child, pipe_parent); + if (err) + return err; + + ruleset_fd = landlock_create_ruleset(&layer3, sizeof(layer3), 0); + if (ruleset_fd < 0) { + perror("Failed to create the layer3 ruleset"); + return 1; + } + + if (landlock_restrict_self(ruleset_fd, 0)) { + perror("Failed to restrict self"); + return 1; + } + close(ruleset_fd); + + /* Tries to open ., denied by layer2. */ + if (open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC) >= 0) { + fprintf(stderr, "Successfully opened /"); + return 1; + } + + /* Tries to send a signal, denied by layer3. */ + if (!kill(getppid(), 0)) { + fprintf(stderr, "Successfully sent a signal to the parent"); + return 1; + } + + return 0; +} diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index d6edcfcb5be8..530390033929 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -228,4 +228,7 @@ $(OUTPUT)/%:%.S $(LINK.S) $^ $(LDLIBS) -o $@ endif -.PHONY: run_tests all clean install emit_tests gen_mods_dir clean_mods_dir +headers: + $(Q)$(MAKE) -C $(top_srcdir) headers + +.PHONY: run_tests all clean install emit_tests gen_mods_dir clean_mods_dir headers diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile index c52fe3ad8e98..f876bf4744e1 100644 --- a/tools/testing/selftests/lib/Makefile +++ b/tools/testing/selftests/lib/Makefile @@ -4,5 +4,5 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh scanf.sh +TEST_PROGS := bitmap.sh include ../lib.mk diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config index dc15aba8d0a3..81a1f64a22e8 100644 --- a/tools/testing/selftests/lib/config +++ b/tools/testing/selftests/lib/config @@ -1,5 +1,2 @@ -CONFIG_TEST_PRINTF=m -CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m -CONFIG_PRIME_NUMBERS=m CONFIG_TEST_BITOPS=m diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh deleted file mode 100755 index 370b79a9cb2e..000000000000 --- a/tools/testing/selftests/lib/prime_numbers.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# Checks fast/slow prime_number generation for inconsistencies -$(dirname $0)/../kselftest/module.sh "prime numbers" prime_numbers selftest=65536 diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh deleted file mode 100755 index 05f4544e87f9..000000000000 --- a/tools/testing/selftests/lib/printf.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# Tests the printf infrastructure using test_printf kernel module. -$(dirname $0)/../kselftest/module.sh "printf" test_printf diff --git a/tools/testing/selftests/lib/scanf.sh b/tools/testing/selftests/lib/scanf.sh deleted file mode 100755 index b59b8ba561c3..000000000000 --- a/tools/testing/selftests/lib/scanf.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# Tests the scanf infrastructure using test_scanf kernel module. -$(dirname $0)/../kselftest/module.sh "scanf" test_scanf diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index 15601402dee6..46991a029f7c 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -10,6 +10,7 @@ SYSFS_KERNEL_DIR="/sys/kernel" SYSFS_KLP_DIR="$SYSFS_KERNEL_DIR/livepatch" SYSFS_DEBUG_DIR="$SYSFS_KERNEL_DIR/debug" SYSFS_KPROBES_DIR="$SYSFS_DEBUG_DIR/kprobes" +SYSFS_TRACING_DIR="$SYSFS_DEBUG_DIR/tracing" # Kselftest framework requirement - SKIP code is 4 ksft_skip=4 @@ -62,6 +63,9 @@ function push_config() { awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}') FTRACE_ENABLED=$(sysctl --values kernel.ftrace_enabled) KPROBE_ENABLED=$(cat "$SYSFS_KPROBES_DIR/enabled") + TRACING_ON=$(cat "$SYSFS_TRACING_DIR/tracing_on") + CURRENT_TRACER=$(cat "$SYSFS_TRACING_DIR/current_tracer") + FTRACE_FILTER=$(cat "$SYSFS_TRACING_DIR/set_ftrace_filter") } function pop_config() { @@ -74,6 +78,17 @@ function pop_config() { if [[ -n "$KPROBE_ENABLED" ]]; then echo "$KPROBE_ENABLED" > "$SYSFS_KPROBES_DIR/enabled" fi + if [[ -n "$TRACING_ON" ]]; then + echo "$TRACING_ON" > "$SYSFS_TRACING_DIR/tracing_on" + fi + if [[ -n "$CURRENT_TRACER" ]]; then + echo "$CURRENT_TRACER" > "$SYSFS_TRACING_DIR/current_tracer" + fi + if [[ -n "$FTRACE_FILTER" ]]; then + echo "$FTRACE_FILTER" \ + | sed -e "/#### all functions enabled ####/d" \ + > "$SYSFS_TRACING_DIR/set_ftrace_filter" + fi } function set_dynamic_debug() { @@ -352,3 +367,37 @@ function check_sysfs_value() { die "Unexpected value in $path: $expected_value vs. $value" fi } + +# cleanup_tracing() - stop and clean up function tracing +function cleanup_tracing() { + echo 0 > "$SYSFS_TRACING_DIR/tracing_on" + echo "" > "$SYSFS_TRACING_DIR/set_ftrace_filter" + echo "nop" > "$SYSFS_TRACING_DIR/current_tracer" + echo "" > "$SYSFS_TRACING_DIR/trace" +} + +# trace_function(function) - start tracing of a function +# function - to be traced function +function trace_function() { + local function="$1"; shift + + cleanup_tracing + + echo "function" > "$SYSFS_TRACING_DIR/current_tracer" + echo "$function" > "$SYSFS_TRACING_DIR/set_ftrace_filter" + echo 1 > "$SYSFS_TRACING_DIR/tracing_on" +} + +# check_traced_functions(functions...) - check whether each function appeared in the trace log +# functions - list of functions to be checked +function check_traced_functions() { + local function + + for function in "$@"; do + if ! grep -Fwq "$function" "$SYSFS_TRACING_DIR/trace" ; then + die "Function ($function) did not appear in the trace" + fi + done + + cleanup_tracing +} diff --git a/tools/testing/selftests/livepatch/test-ftrace.sh b/tools/testing/selftests/livepatch/test-ftrace.sh index fe14f248913a..094176f1a46a 100755 --- a/tools/testing/selftests/livepatch/test-ftrace.sh +++ b/tools/testing/selftests/livepatch/test-ftrace.sh @@ -61,4 +61,38 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete % rmmod $MOD_LIVEPATCH" +# - verify livepatch can load +# - check if traces have a patched function +# - reset trace and unload livepatch + +start_test "trace livepatched function and check that the live patch remains in effect" + +FUNCTION_NAME="livepatch_cmdline_proc_show" + +load_lp $MOD_LIVEPATCH +trace_function "$FUNCTION_NAME" + +if [[ "$(cat /proc/cmdline)" == "$MOD_LIVEPATCH: this has been live patched" ]] ; then + log "livepatch: ok" +fi + +check_traced_functions "$FUNCTION_NAME" + +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +check_result "% insmod test_modules/$MOD_LIVEPATCH.ko +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +livepatch: '$MOD_LIVEPATCH': patching complete +livepatch: ok +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH" + exit 0 diff --git a/tools/testing/selftests/livepatch/test-kprobe.sh b/tools/testing/selftests/livepatch/test-kprobe.sh index 115065156016..b67dfad03d97 100755 --- a/tools/testing/selftests/livepatch/test-kprobe.sh +++ b/tools/testing/selftests/livepatch/test-kprobe.sh @@ -5,6 +5,8 @@ . $(dirname $0)/functions.sh +grep -q kprobe_ftrace_ops /proc/kallsyms || skip "test-kprobe requires CONFIG_KPROBES_ON_FTRACE" + MOD_LIVEPATCH=test_klp_livepatch MOD_KPROBE=test_klp_kprobe diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c index ad17005521a8..005f29c86484 100644 --- a/tools/testing/selftests/mm/mseal_test.c +++ b/tools/testing/selftests/mm/mseal_test.c @@ -218,7 +218,7 @@ bool seal_support(void) bool pkey_supported(void) { #if defined(__i386__) || defined(__x86_64__) /* arch */ - int pkey = sys_pkey_alloc(0, 0); + int pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); if (pkey > 0) return true; @@ -1671,7 +1671,7 @@ static void test_seal_discard_ro_anon_on_pkey(bool seal) setup_single_address_rw(size, &ptr); FAIL_TEST_IF_FALSE(ptr != (void *)-1); - pkey = sys_pkey_alloc(0, 0); + pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); FAIL_TEST_IF_FALSE(pkey > 0); ret = sys_mprotect_pkey((void *)ptr, size, PROT_READ | PROT_WRITE, pkey); @@ -1683,7 +1683,7 @@ static void test_seal_discard_ro_anon_on_pkey(bool seal) } /* sealing doesn't take effect if PKRU allow write. */ - set_pkey(pkey, 0); + set_pkey(pkey, PKEY_UNRESTRICTED); ret = sys_madvise(ptr, size, MADV_DONTNEED); FAIL_TEST_IF_FALSE(!ret); diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h index f080e97b39be..ea404f80e6cb 100644 --- a/tools/testing/selftests/mm/pkey-helpers.h +++ b/tools/testing/selftests/mm/pkey-helpers.h @@ -13,6 +13,7 @@ #include <ucontext.h> #include <sys/mman.h> +#include <linux/mman.h> #include <linux/types.h> #include "../kselftest.h" @@ -193,7 +194,7 @@ static inline u32 *siginfo_get_pkey_ptr(siginfo_t *si) static inline int kernel_has_pkeys(void) { /* try allocating a key and see if it succeeds */ - int ret = sys_pkey_alloc(0, 0); + int ret = sys_pkey_alloc(0, PKEY_UNRESTRICTED); if (ret <= 0) { return 0; } diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c index 1ac8c8809880..b5e076a564c9 100644 --- a/tools/testing/selftests/mm/pkey_sighandler_tests.c +++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c @@ -311,7 +311,7 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) __write_pkey_reg(pkey_reg); /* Protect the new stack with MPK 1 */ - pkey = sys_pkey_alloc(0, 0); + pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); sys_mprotect_pkey(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey); /* Set up alternate signal stack that will use the default MPK */ @@ -484,7 +484,7 @@ static void test_pkru_sigreturn(void) __write_pkey_reg(pkey_reg); /* Protect the stack with MPK 2 */ - pkey = sys_pkey_alloc(0, 0); + pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); sys_mprotect_pkey(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey); /* Set up alternate signal stack that will use the default MPK */ diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index 35565af308af..23ebec367015 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -463,7 +463,7 @@ static pid_t fork_lazy_child(void) static int alloc_pkey(void) { int ret; - unsigned long init_val = 0x0; + unsigned long init_val = PKEY_UNRESTRICTED; dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n", __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg); diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 28a715a8ef2b..679542f565a4 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -21,6 +21,7 @@ msg_oob msg_zerocopy netlink-dumps nettest +proc_net_pktgen psock_fanout psock_snd psock_tpacket @@ -42,6 +43,7 @@ socket so_incoming_cpu so_netns_cookie so_txtime +so_rcv_listener stress_reuseport_listen tap tcp_fastopen_backup_key diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 8f32b4f01aee..6d718b478ed8 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -7,7 +7,7 @@ CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES) CFLAGS += -I../ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \ - rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh + rtnetlink.sh xfrm_policy.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh @@ -33,9 +33,12 @@ TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_so_priority.sh -TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh +TEST_PROGS += test_so_rcv.sh +TEST_PROGS += cmsg_time.sh cmsg_ip.sh TEST_PROGS += netns-name.sh +TEST_PROGS += link_netns.py TEST_PROGS += nl_netdev.py +TEST_PROGS += rtnetlink.py TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh @@ -75,6 +78,7 @@ TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_ TEST_GEN_FILES += toeplitz TEST_GEN_FILES += cmsg_sender TEST_GEN_FILES += stress_reuseport_listen +TEST_GEN_FILES += so_rcv_listener TEST_PROGS += test_vxlan_vnifiltering.sh TEST_GEN_FILES += io_uring_zerocopy_tx TEST_PROGS += io_uring_zerocopy_tx.sh @@ -100,6 +104,7 @@ TEST_PROGS += vlan_bridge_binding.sh TEST_PROGS += bpf_offload.py TEST_PROGS += ipv6_route_update_soft_lockup.sh TEST_PROGS += busy_poll_test.sh +TEST_GEN_PROGS += proc_net_pktgen TEST_PROGS += lwt_dst_cache_ref_loop.sh # YNL files, must be before "include ..lib.mk" diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py index fd0d959914e4..b2c271b79240 100755 --- a/tools/testing/selftests/net/bpf_offload.py +++ b/tools/testing/selftests/net/bpf_offload.py @@ -207,9 +207,11 @@ def bpftool_prog_list_wait(expected=0, n_retry=20): raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) def bpftool_map_list_wait(expected=0, n_retry=20, ns=""): + nmaps = None for i in range(n_retry): maps = bpftool_map_list(ns=ns) - if len(maps) == expected: + nmaps = len(maps) + if nmaps == expected: return maps time.sleep(0.05) raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) @@ -710,6 +712,7 @@ _, base_maps = bpftool("map") base_map_names = [ 'pid_iter.rodata', # created on each bpftool invocation 'libbpf_det_bind', # created on each bpftool invocation + 'libbpf_global', ] # Check netdevsim diff --git a/tools/testing/selftests/net/cmsg_ip.sh b/tools/testing/selftests/net/cmsg_ip.sh new file mode 100755 index 000000000000..b55680e081ad --- /dev/null +++ b/tools/testing/selftests/net/cmsg_ip.sh @@ -0,0 +1,187 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +IP4=172.16.0.1/24 +TGT4=172.16.0.2 +IP6=2001:db8:1::1/64 +TGT6=2001:db8:1::2 +TMPF=$(mktemp --suffix ".pcap") + +cleanup() +{ + rm -f $TMPF + cleanup_ns $NS +} + +trap cleanup EXIT + +tcpdump -h | grep immediate-mode >> /dev/null +if [ $? -ne 0 ]; then + echo "SKIP - tcpdump with --immediate-mode option required" + exit $ksft_skip +fi + +# Namespaces +setup_ns NS +NSEXE="ip netns exec $NS" + +$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null + +# Connectivity +ip -netns $NS link add type dummy +ip -netns $NS link set dev dummy0 up +ip -netns $NS addr add $IP4 dev dummy0 +ip -netns $NS addr add $IP6 dev dummy0 + +# Test +BAD=0 +TOTAL=0 + +check_result() { + ((TOTAL++)) + if [ $1 -ne $2 ]; then + echo " Case $3 returned $1, expected $2" + ((BAD++)) + fi +} + +# IPV6_DONTFRAG +for ovr in setsock cmsg both diff; do + for df in 0 1; do + for p in u U i r; do + [ $p == "u" ] && prot=UDP + [ $p == "U" ] && prot=UDP + [ $p == "i" ] && prot=ICMP + [ $p == "r" ] && prot=RAW + + [ $ovr == "setsock" ] && m="-F $df" + [ $ovr == "cmsg" ] && m="-f $df" + [ $ovr == "both" ] && m="-F $df -f $df" + [ $ovr == "diff" ] && m="-F $((1 - df)) -f $df" + + $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234 + check_result $? $df "DONTFRAG $prot $ovr" + done + done +done + +# IP_TOS + IPV6_TCLASS + +test_dscp() { + local -r IPVER=$1 + local -r TGT=$2 + local -r MATCH=$3 + + local -r TOS=0x10 + local -r TOS2=0x20 + local -r ECN=0x3 + + ip $IPVER -netns $NS rule add tos $TOS lookup 300 + ip $IPVER -netns $NS route add table 300 prohibit any + + for ovr in setsock cmsg both diff; do + for p in u U i r; do + [ $p == "u" ] && prot=UDP + [ $p == "U" ] && prot=UDP + [ $p == "i" ] && prot=ICMP + [ $p == "r" ] && prot=RAW + + [ $ovr == "setsock" ] && m="-C" + [ $ovr == "cmsg" ] && m="-c" + [ $ovr == "both" ] && m="-C $((TOS2)) -c" + [ $ovr == "diff" ] && m="-C $((TOS )) -c" + + $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null & + BG=$! + sleep 0.05 + + $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS2)) $TGT 1234 + check_result $? 0 "$MATCH $prot $ovr - pass" + + while [ -d /proc/$BG ]; do + $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS2)) $TGT 1234 + done + + tcpdump -r $TMPF -v 2>&1 | grep "$MATCH $TOS2" >> /dev/null + check_result $? 0 "$MATCH $prot $ovr - packet data" + rm $TMPF + + [ $ovr == "both" ] && m="-C $((TOS )) -c" + [ $ovr == "diff" ] && m="-C $((TOS2)) -c" + + # Match prohibit rule: expect failure + $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS)) -s $TGT 1234 + check_result $? 1 "$MATCH $prot $ovr - rejection" + + # Match prohibit rule: IPv4 masks ECN: expect failure + if [[ "$IPVER" == "-4" ]]; then + $NSEXE ./cmsg_sender $IPVER -p $p $m "$((TOS | ECN))" -s $TGT 1234 + check_result $? 1 "$MATCH $prot $ovr - rejection (ECN)" + fi + done + done +} + +test_dscp -4 $TGT4 tos +test_dscp -6 $TGT6 class + +# IP_TTL + IPV6_HOPLIMIT +test_ttl_hoplimit() { + local -r IPVER=$1 + local -r TGT=$2 + local -r MATCH=$3 + + local -r LIM=4 + + for ovr in setsock cmsg both diff; do + for p in u U i r; do + [ $p == "u" ] && prot=UDP + [ $p == "U" ] && prot=UDP + [ $p == "i" ] && prot=ICMP + [ $p == "r" ] && prot=RAW + + [ $ovr == "setsock" ] && m="-L" + [ $ovr == "cmsg" ] && m="-l" + [ $ovr == "both" ] && m="-L $LIM -l" + [ $ovr == "diff" ] && m="-L $((LIM + 1)) -l" + + $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null & + BG=$! + sleep 0.05 + + $NSEXE ./cmsg_sender $IPVER -p $p $m $LIM $TGT 1234 + check_result $? 0 "$MATCH $prot $ovr - pass" + + while [ -d /proc/$BG ]; do + $NSEXE ./cmsg_sender $IPVER -p $p $m $LIM $TGT 1234 + done + + tcpdump -r $TMPF -v 2>&1 | grep "$MATCH $LIM[^0-9]" >> /dev/null + check_result $? 0 "$MATCH $prot $ovr - packet data" + rm $TMPF + done + done +} + +test_ttl_hoplimit -4 $TGT4 ttl +test_ttl_hoplimit -6 $TGT6 hlim + +# IPV6 exthdr +for p in u U i r; do + # Very basic "does it crash" test + for h in h d r; do + $NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234 + check_result $? 0 "ExtHdr $prot $ovr - pass" + done +done + +# Summary +if [ $BAD -ne 0 ]; then + echo "FAIL - $BAD/$TOTAL cases failed" + exit 1 +else + echo "OK" + exit 0 +fi diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh deleted file mode 100755 index 8bc23fb4c82b..000000000000 --- a/tools/testing/selftests/net/cmsg_ipv6.sh +++ /dev/null @@ -1,154 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -source lib.sh - -IP6=2001:db8:1::1/64 -TGT6=2001:db8:1::2 -TMPF=$(mktemp --suffix ".pcap") - -cleanup() -{ - rm -f $TMPF - cleanup_ns $NS -} - -trap cleanup EXIT - -tcpdump -h | grep immediate-mode >> /dev/null -if [ $? -ne 0 ]; then - echo "SKIP - tcpdump with --immediate-mode option required" - exit $ksft_skip -fi - -# Namespaces -setup_ns NS -NSEXE="ip netns exec $NS" - -$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null - -# Connectivity -ip -netns $NS link add type dummy -ip -netns $NS link set dev dummy0 up -ip -netns $NS addr add $IP6 dev dummy0 - -# Test -BAD=0 -TOTAL=0 - -check_result() { - ((TOTAL++)) - if [ $1 -ne $2 ]; then - echo " Case $3 returned $1, expected $2" - ((BAD++)) - fi -} - -# IPV6_DONTFRAG -for ovr in setsock cmsg both diff; do - for df in 0 1; do - for p in u i r; do - [ $p == "u" ] && prot=UDP - [ $p == "i" ] && prot=ICMP - [ $p == "r" ] && prot=RAW - - [ $ovr == "setsock" ] && m="-F $df" - [ $ovr == "cmsg" ] && m="-f $df" - [ $ovr == "both" ] && m="-F $df -f $df" - [ $ovr == "diff" ] && m="-F $((1 - df)) -f $df" - - $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234 - check_result $? $df "DONTFRAG $prot $ovr" - done - done -done - -# IPV6_TCLASS -TOS=0x10 -TOS2=0x20 - -ip -6 -netns $NS rule add tos $TOS lookup 300 -ip -6 -netns $NS route add table 300 prohibit any - -for ovr in setsock cmsg both diff; do - for p in u i r; do - [ $p == "u" ] && prot=UDP - [ $p == "i" ] && prot=ICMP - [ $p == "r" ] && prot=RAW - - [ $ovr == "setsock" ] && m="-C" - [ $ovr == "cmsg" ] && m="-c" - [ $ovr == "both" ] && m="-C $((TOS2)) -c" - [ $ovr == "diff" ] && m="-C $((TOS )) -c" - - $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null & - BG=$! - sleep 0.05 - - $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234 - check_result $? 0 "TCLASS $prot $ovr - pass" - - while [ -d /proc/$BG ]; do - $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234 - done - - tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null - check_result $? 0 "TCLASS $prot $ovr - packet data" - rm $TMPF - - [ $ovr == "both" ] && m="-C $((TOS )) -c" - [ $ovr == "diff" ] && m="-C $((TOS2)) -c" - - $NSEXE ./cmsg_sender -6 -p $p $m $((TOS)) -s $TGT6 1234 - check_result $? 1 "TCLASS $prot $ovr - rejection" - done -done - -# IPV6_HOPLIMIT -LIM=4 - -for ovr in setsock cmsg both diff; do - for p in u i r; do - [ $p == "u" ] && prot=UDP - [ $p == "i" ] && prot=ICMP - [ $p == "r" ] && prot=RAW - - [ $ovr == "setsock" ] && m="-L" - [ $ovr == "cmsg" ] && m="-l" - [ $ovr == "both" ] && m="-L $LIM -l" - [ $ovr == "diff" ] && m="-L $((LIM + 1)) -l" - - $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null & - BG=$! - sleep 0.05 - - $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234 - check_result $? 0 "HOPLIMIT $prot $ovr - pass" - - while [ -d /proc/$BG ]; do - $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234 - done - - tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null - check_result $? 0 "HOPLIMIT $prot $ovr - packet data" - rm $TMPF - done -done - -# IPV6 exthdr -for p in u i r; do - # Very basic "does it crash" test - for h in h d r; do - $NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234 - check_result $? 0 "ExtHdr $prot $ovr - pass" - done -done - -# Summary -if [ $BAD -ne 0 ]; then - echo "FAIL - $BAD/$TOTAL cases failed" - exit 1 -else - echo "OK" - exit 0 -fi diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index bc314382e4e1..a825e628aee7 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -33,6 +33,7 @@ enum { ERN_RECVERR, ERN_CMSG_RD, ERN_CMSG_RCV, + ERN_SEND_MORE, }; struct option_cmsg_u32 { @@ -46,6 +47,7 @@ struct options { const char *service; unsigned int size; unsigned int num_pkt; + bool msg_more; struct { unsigned int mark; unsigned int dontfrag; @@ -72,7 +74,7 @@ struct options { struct option_cmsg_u32 tclass; struct option_cmsg_u32 hlimit; struct option_cmsg_u32 exthdr; - } v6; + } cmsg; } opt = { .size = 13, .num_pkt = 1, @@ -94,7 +96,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin) "\t\t-S send() size\n" "\t\t-4/-6 Force IPv4 / IPv6 only\n" "\t\t-p prot Socket protocol\n" - "\t\t (u = UDP (default); i = ICMP; r = RAW)\n" + "\t\t (u = UDP (default); i = ICMP; r = RAW;\n" + "\t\t U = UDP with MSG_MORE)\n" "\n" "\t\t-m val Set SO_MARK with given value\n" "\t\t-M val Set SO_MARK via setsockopt\n" @@ -104,13 +107,13 @@ static void __attribute__((noreturn)) cs_usage(const char *bin) "\t\t-t Enable time stamp reporting\n" "\t\t-f val Set don't fragment via cmsg\n" "\t\t-F val Set don't fragment via setsockopt\n" - "\t\t-c val Set TCLASS via cmsg\n" - "\t\t-C val Set TCLASS via setsockopt\n" - "\t\t-l val Set HOPLIMIT via cmsg\n" - "\t\t-L val Set HOPLIMIT via setsockopt\n" + "\t\t-c val Set TOS/TCLASS via cmsg\n" + "\t\t-C val Set TOS/TCLASS via setsockopt\n" + "\t\t-l val Set TTL/HOPLIMIT via cmsg\n" + "\t\t-L val Set TTL/HOPLIMIT via setsockopt\n" "\t\t-H type Add an IPv6 header option\n" - "\t\t (h = HOP; d = DST; r = RTDST)" - ""); + "\t\t (h = HOP; d = DST; r = RTDST)\n" + "\n"); exit(ERN_HELP); } @@ -133,8 +136,11 @@ static void cs_parse_args(int argc, char *argv[]) opt.sock.family = AF_INET6; break; case 'p': - if (*optarg == 'u' || *optarg == 'U') { + if (*optarg == 'u') { opt.sock.proto = IPPROTO_UDP; + } else if (*optarg == 'U') { + opt.sock.proto = IPPROTO_UDP; + opt.msg_more = true; } else if (*optarg == 'i' || *optarg == 'I') { opt.sock.proto = IPPROTO_ICMP; } else if (*optarg == 'r') { @@ -169,37 +175,37 @@ static void cs_parse_args(int argc, char *argv[]) opt.ts.ena = true; break; case 'f': - opt.v6.dontfrag.ena = true; - opt.v6.dontfrag.val = atoi(optarg); + opt.cmsg.dontfrag.ena = true; + opt.cmsg.dontfrag.val = atoi(optarg); break; case 'F': opt.sockopt.dontfrag = atoi(optarg); break; case 'c': - opt.v6.tclass.ena = true; - opt.v6.tclass.val = atoi(optarg); + opt.cmsg.tclass.ena = true; + opt.cmsg.tclass.val = atoi(optarg); break; case 'C': opt.sockopt.tclass = atoi(optarg); break; case 'l': - opt.v6.hlimit.ena = true; - opt.v6.hlimit.val = atoi(optarg); + opt.cmsg.hlimit.ena = true; + opt.cmsg.hlimit.val = atoi(optarg); break; case 'L': opt.sockopt.hlimit = atoi(optarg); break; case 'H': - opt.v6.exthdr.ena = true; + opt.cmsg.exthdr.ena = true; switch (optarg[0]) { case 'h': - opt.v6.exthdr.val = IPV6_HOPOPTS; + opt.cmsg.exthdr.val = IPV6_HOPOPTS; break; case 'd': - opt.v6.exthdr.val = IPV6_DSTOPTS; + opt.cmsg.exthdr.val = IPV6_DSTOPTS; break; case 'r': - opt.v6.exthdr.val = IPV6_RTHDRDSTOPTS; + opt.cmsg.exthdr.val = IPV6_RTHDRDSTOPTS; break; default: printf("Error: hdr type: %s\n", optarg); @@ -261,12 +267,20 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) SOL_SOCKET, SO_MARK, &opt.mark); ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, SOL_SOCKET, SO_PRIORITY, &opt.priority); - ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, - SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag); - ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, - SOL_IPV6, IPV6_TCLASS, &opt.v6.tclass); - ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, - SOL_IPV6, IPV6_HOPLIMIT, &opt.v6.hlimit); + + if (opt.sock.family == AF_INET) { + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IP, IP_TOS, &opt.cmsg.tclass); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IP, IP_TTL, &opt.cmsg.hlimit); + } else { + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IPV6, IPV6_DONTFRAG, &opt.cmsg.dontfrag); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IPV6, IPV6_TCLASS, &opt.cmsg.tclass); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IPV6, IPV6_HOPLIMIT, &opt.cmsg.hlimit); + } if (opt.txtime.ena) { __u64 txtime; @@ -297,14 +311,14 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) *(__u32 *)CMSG_DATA(cmsg) = SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE; } - if (opt.v6.exthdr.ena) { + if (opt.cmsg.exthdr.ena) { cmsg = (struct cmsghdr *)(cbuf + cmsg_len); cmsg_len += CMSG_SPACE(8); if (cbuf_sz < cmsg_len) error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small"); cmsg->cmsg_level = SOL_IPV6; - cmsg->cmsg_type = opt.v6.exthdr.val; + cmsg->cmsg_type = opt.cmsg.exthdr.val; cmsg->cmsg_len = CMSG_LEN(8); *(__u64 *)CMSG_DATA(cmsg) = 0; } @@ -405,23 +419,35 @@ static void ca_set_sockopts(int fd) setsockopt(fd, SOL_SOCKET, SO_MARK, &opt.sockopt.mark, sizeof(opt.sockopt.mark))) error(ERN_SOCKOPT, errno, "setsockopt SO_MARK"); - if (opt.sockopt.dontfrag && - setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG, - &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag))) - error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG"); - if (opt.sockopt.tclass && - setsockopt(fd, SOL_IPV6, IPV6_TCLASS, - &opt.sockopt.tclass, sizeof(opt.sockopt.tclass))) - error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS"); - if (opt.sockopt.hlimit && - setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS, - &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit))) - error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT"); if (opt.sockopt.priority && setsockopt(fd, SOL_SOCKET, SO_PRIORITY, &opt.sockopt.priority, sizeof(opt.sockopt.priority))) error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY"); + if (opt.sock.family == AF_INET) { + if (opt.sockopt.tclass && + setsockopt(fd, SOL_IP, IP_TOS, + &opt.sockopt.tclass, sizeof(opt.sockopt.tclass))) + error(ERN_SOCKOPT, errno, "setsockopt IP_TOS"); + if (opt.sockopt.hlimit && + setsockopt(fd, SOL_IP, IP_TTL, + &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit))) + error(ERN_SOCKOPT, errno, "setsockopt IP_TTL"); + } else { + if (opt.sockopt.dontfrag && + setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG, + &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag))) + error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG"); + if (opt.sockopt.tclass && + setsockopt(fd, SOL_IPV6, IPV6_TCLASS, + &opt.sockopt.tclass, sizeof(opt.sockopt.tclass))) + error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS"); + if (opt.sockopt.hlimit && + setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS, + &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit))) + error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT"); + } + if (opt.txtime.ena) { struct sock_txtime so_txtime = { .clockid = CLOCK_MONOTONIC, @@ -511,7 +537,7 @@ int main(int argc, char *argv[]) cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf)); for (i = 0; i < opt.num_pkt; i++) { - err = sendmsg(fd, &msg, 0); + err = sendmsg(fd, &msg, opt.msg_more ? MSG_MORE : 0); if (err < 0) { if (!opt.silent_send) fprintf(stderr, "send failed: %s\n", strerror(errno)); @@ -522,6 +548,14 @@ int main(int argc, char *argv[]) err = ERN_SEND_SHORT; goto err_out; } + if (opt.msg_more) { + err = write(fd, NULL, 0); + if (err < 0) { + fprintf(stderr, "send more: %s\n", strerror(errno)); + err = ERN_SEND_MORE; + goto err_out; + } + } } err = ERN_SUCCESS; diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 61e5116987f3..130d532b7e67 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -18,6 +18,8 @@ CONFIG_DUMMY=y CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_BRIDGE=y CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_BTF_MODULES=n CONFIG_VLAN_8021Q=y CONFIG_GENEVE=m CONFIG_IFB=y @@ -107,5 +109,11 @@ CONFIG_XFRM_INTERFACE=m CONFIG_XFRM_USER=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IPVLAN=m +CONFIG_CAN=m +CONFIG_CAN_DEV=m +CONFIG_CAN_VXCAN=m +CONFIG_NETKIT=y +CONFIG_NET_PKTGEN=m CONFIG_IPV6_ILA=m CONFIG_IPV6_RPL_LWTUNNEL=y diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 899dbad0104b..4fcc38907e48 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -3667,7 +3667,7 @@ ipv6_addr_bind_novrf() # when it really should not a=${NSA_LO_IP6} log_start - show_hint "Tecnically should fail since address is not on device but kernel allows" + show_hint "Technically should fail since address is not on device but kernel allows" run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address" } @@ -3724,7 +3724,7 @@ ipv6_addr_bind_vrf() # passes when it really should not a=${VRF_IP6} log_start - show_hint "Tecnically should fail since address is not on device but kernel allows" + show_hint "Technically should fail since address is not on device but kernel allows" run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind" diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh index d5e3abb8658c..9931a1e36e3d 100755 --- a/tools/testing/selftests/net/fdb_flush.sh +++ b/tools/testing/selftests/net/fdb_flush.sh @@ -583,7 +583,7 @@ vxlan_test_flush_by_remote_attributes() $IP link del dev vx10 $IP link add name vx10 type vxlan dstport "$VXPORT" external - # For multicat FDB entries, the VXLAN driver stores a linked list of + # For multicast FDB entries, the VXLAN driver stores a linked list of # remotes for a given key. Verify that only the expected remotes are # flushed. multicast_fdb_entries_add diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 77c83d9508d3..b39f748c2572 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -76,11 +76,13 @@ log_test() printf "TEST: %-60s [ OK ]\n" "${msg}" nsuccess=$((nsuccess+1)) else - ret=1 - nfail=$((nfail+1)) if [[ $rc -eq $ksft_skip ]]; then + [[ $ret -eq 0 ]] && ret=$ksft_skip + nskip=$((nskip+1)) printf "TEST: %-60s [SKIP]\n" "${msg}" else + ret=1 + nfail=$((nfail+1)) printf "TEST: %-60s [FAIL]\n" "${msg}" fi @@ -741,7 +743,7 @@ ipv6_fcnal() run_cmd "$IP nexthop add id 52 via 2001:db8:92::3" log_test $? 2 "Create nexthop - gw only" - # gw is not reachable throught given dev + # gw is not reachable through given dev run_cmd "$IP nexthop add id 53 via 2001:db8:3::3 dev veth1" log_test $? 2 "Create nexthop - invalid gw+dev combination" @@ -2528,6 +2530,7 @@ done if [ "$TESTS" != "none" ]; then printf "\nTests passed: %3d\n" ${nsuccess} printf "Tests failed: %3d\n" ${nfail} + printf "Tests skipped: %2d\n" ${nskip} fi exit $ret diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 847936363a12..b866bab1d92a 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -256,6 +256,24 @@ fib_rule6_test() fib_rule6_test_match_n_redirect "$match" "$match" \ "$getnomatch" "sport and dport redirect to table" \ "sport and dport no redirect to table" + + match="sport 100-200 dport 300-400" + getmatch="sport 100 dport 400" + getnomatch="sport 100 dport 401" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" \ + "sport and dport range redirect to table" \ + "sport and dport range no redirect to table" + fi + + ip rule help 2>&1 | grep sport | grep -q MASK + if [ $? -eq 0 ]; then + match="sport 0x0f00/0xff00 dport 0x000f/0x00ff" + getmatch="sport 0x0f11 dport 0x220f" + getnomatch="sport 0x1f11 dport 0x221f" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "sport and dport masked redirect to table" \ + "sport and dport masked no redirect to table" fi fib_check_iproute_support "ipproto" "ipproto" @@ -292,6 +310,25 @@ fib_rule6_test() "iif dscp no redirect to table" fi + ip rule help 2>&1 | grep -q "DSCP\[/MASK\]" + if [ $? -eq 0 ]; then + match="dscp 0x0f/0x0f" + tosmatch=$(printf 0x"%x" $((0x1f << 2))) + tosnomatch=$(printf 0x"%x" $((0x1e << 2))) + getmatch="tos $tosmatch" + getnomatch="tos $tosnomatch" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "dscp masked redirect to table" \ + "dscp masked no redirect to table" + + match="dscp 0x0f/0x0f" + getmatch="from $SRC_IP6 iif $DEV tos $tosmatch" + getnomatch="from $SRC_IP6 iif $DEV tos $tosnomatch" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif dscp masked redirect to table" \ + "iif dscp masked no redirect to table" + fi + fib_check_iproute_support "flowlabel" "flowlabel" if [ $? -eq 0 ]; then match="flowlabel 0xfffff" @@ -525,6 +562,24 @@ fib_rule4_test() fib_rule4_test_match_n_redirect "$match" "$match" \ "$getnomatch" "sport and dport redirect to table" \ "sport and dport no redirect to table" + + match="sport 100-200 dport 300-400" + getmatch="sport 100 dport 400" + getnomatch="sport 100 dport 401" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" \ + "sport and dport range redirect to table" \ + "sport and dport range no redirect to table" + fi + + ip rule help 2>&1 | grep sport | grep -q MASK + if [ $? -eq 0 ]; then + match="sport 0x0f00/0xff00 dport 0x000f/0x00ff" + getmatch="sport 0x0f11 dport 0x220f" + getnomatch="sport 0x1f11 dport 0x221f" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "sport and dport masked redirect to table" \ + "sport and dport masked no redirect to table" fi fib_check_iproute_support "ipproto" "ipproto" @@ -561,6 +616,25 @@ fib_rule4_test() "$getnomatch" "iif dscp redirect to table" \ "iif dscp no redirect to table" fi + + ip rule help 2>&1 | grep -q "DSCP\[/MASK\]" + if [ $? -eq 0 ]; then + match="dscp 0x0f/0x0f" + tosmatch=$(printf 0x"%x" $((0x1f << 2))) + tosnomatch=$(printf 0x"%x" $((0x1e << 2))) + getmatch="tos $tosmatch" + getnomatch="tos $tosnomatch" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "dscp masked redirect to table" \ + "dscp masked no redirect to table" + + match="dscp 0x0f/0x0f" + getmatch="from $SRC_IP iif $DEV tos $tosmatch" + getnomatch="from $SRC_IP iif $DEV tos $tosnomatch" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif dscp masked redirect to table" \ + "iif dscp masked no redirect to table" + fi } fib_rule4_vrf_test() diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README index a652429bfd53..7b41cff993ad 100644 --- a/tools/testing/selftests/net/forwarding/README +++ b/tools/testing/selftests/net/forwarding/README @@ -6,7 +6,7 @@ to easily create and test complex environments. Unfortunately, these namespaces can not be used with actual switching ASICs, as their ports can not be migrated to other network namespaces -(dev->netns_local) and most of them probably do not support the +(dev->netns_immutable) and most of them probably do not support the L1-separation provided by namespaces. However, a similar kind of flexibility can be achieved by using VRFs and diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index d9d587454d20..8c1597ebc2d3 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -149,7 +149,7 @@ cfg_test_host_common() check_err $? "Failed to add $name host entry" bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null - check_fail $? "Managed to replace $name host entry" + check_err $? "Failed to replace $name host entry" bridge mdb del dev br0 port br0 grp $grp $state vid 10 bridge mdb get dev br0 grp $grp vid 10 &> /dev/null diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 8de80acf249e..508f3c700d71 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -291,16 +291,6 @@ if [[ "$CHECK_TC" = "yes" ]]; then check_tc_version fi -require_command() -{ - local cmd=$1; shift - - if [[ ! -x "$(command -v "$cmd")" ]]; then - echo "SKIP: $cmd not installed" - exit $ksft_skip - fi -} - # IPv6 support was added in v3.0 check_mtools_version() { diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index 3f9d50f1ef9e..b43816dd998c 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -428,6 +428,14 @@ __test_flood() test_flood() { __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood" + + # Add an entry with arbitrary destination IP. Verify that packets are + # not duplicated (this can happen if hardware floods the packets, and + # then traps them due to misconfiguration, so software data path repeats + # flooding and resends packets). + bridge fdb append dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self + __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood, unresolved FDB entry" + bridge fdb del dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self } vxlan_fdb_add_del() @@ -740,6 +748,8 @@ test_learning() vxlan_flood_test $mac $dst 0 10 0 + # The entry should age out when it only forwards traffic + $MZ $h1 -c 50 -d 1sec -p 64 -b $mac -B $dst -t icmp -q & sleep 60 bridge fdb show brport vx1 | grep $mac | grep -q self diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh index fb9a34cb50c6..afc65647f673 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh @@ -539,6 +539,21 @@ test_flood() 10 10 0 10 0 __test_flood ca:fe:be:ef:13:37 198.51.100.100 20 "flood vlan 20" \ 10 0 10 0 10 + + # Add entries with arbitrary destination IP. Verify that packets are + # not duplicated (this can happen if hardware floods the packets, and + # then traps them due to misconfiguration, so software data path repeats + # flooding and resends packets). + bridge fdb append dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self + + __test_flood de:ad:be:ef:13:37 192.0.2.100 10 \ + "flood vlan 10, unresolved FDB entry" 10 10 0 10 0 + __test_flood ca:fe:be:ef:13:37 198.51.100.100 20 \ + "flood vlan 20, unresolved FDB entry" 10 0 10 0 10 + + bridge fdb del dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self } vxlan_fdb_add_del() diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index b2184847e388..d5824eadea10 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -1318,11 +1318,13 @@ int main(int argc, char **argv) read_MAC(src_mac, smac); read_MAC(dst_mac, dmac); - if (tx_socket) + if (tx_socket) { gro_sender(); - else + } else { + /* Only the receiver exit status determines test success. */ gro_receiver(); + fprintf(stderr, "Gro::%s test passed.\n", testname); + } - fprintf(stderr, "Gro::%s test passed.\n", testname); return 0; } diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh index 02c21ff4ca81..9e3f186bc2a1 100755 --- a/tools/testing/selftests/net/gro.sh +++ b/tools/testing/selftests/net/gro.sh @@ -18,10 +18,10 @@ run_test() { "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" ) setup_ns - # Each test is run 3 times to deflake, because given the receive timing, + # Each test is run 6 times to deflake, because given the receive timing, # not all packets that should coalesce will be considered in the same flow # on every try. - for tries in {1..3}; do + for tries in {1..6}; do # Actual test starts here ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \ 1>>log.txt & @@ -100,5 +100,6 @@ trap cleanup EXIT if [[ "${test}" == "all" ]]; then run_all_tests else - run_test "${proto}" "${test}" + exit_code=$(run_test "${proto}" "${test}") + exit $exit_code fi; diff --git a/tools/testing/selftests/net/ip_local_port_range.sh b/tools/testing/selftests/net/ip_local_port_range.sh index 6c6ad346eaa0..4ff746db1256 100755 --- a/tools/testing/selftests/net/ip_local_port_range.sh +++ b/tools/testing/selftests/net/ip_local_port_range.sh @@ -2,4 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 ./in_netns.sh \ - sh -c 'sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && ./ip_local_port_range' + sh -c 'sysctl -q -w net.mptcp.enabled=1 && \ + sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && \ + ./ip_local_port_range' diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 0bd9a038a1f0..975be4fdbcdb 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -450,6 +450,25 @@ kill_process() { kill $pid && wait $pid; } 2>/dev/null } +check_command() +{ + local cmd=$1; shift + + if [[ ! -x "$(command -v "$cmd")" ]]; then + log_test_skip "$cmd not installed" + return $EXIT_STATUS + fi +} + +require_command() +{ + local cmd=$1; shift + + if ! check_command "$cmd"; then + exit $EXIT_STATUS + fi +} + ip_link_add() { local name=$1; shift diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 54d8f5eba810..8697bd27dc30 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -2,8 +2,8 @@ from .consts import KSRC from .ksft import * -from .netns import NetNS +from .netns import NetNS, NetNSEnter from .nsim import * from .utils import * -from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily +from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily from .ynl import NetshaperFamily diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 3efe005436cd..3cfad0fd4570 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -71,6 +71,11 @@ def ksft_in(a, b, comment=""): _fail("Check failed", a, "not in", b, comment) +def ksft_not_in(a, b, comment=""): + if a in b: + _fail("Check failed", a, "in", b, comment) + + def ksft_is(a, b, comment=""): if a is not b: _fail("Check failed", a, "is not", b, comment) @@ -202,7 +207,7 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0} - print("KTAP version 1") + print("TAP version 13") print("1.." + str(len(cases))) global KSFT_RESULT diff --git a/tools/testing/selftests/net/lib/py/netns.py b/tools/testing/selftests/net/lib/py/netns.py index ecff85f9074f..8e9317044eef 100644 --- a/tools/testing/selftests/net/lib/py/netns.py +++ b/tools/testing/selftests/net/lib/py/netns.py @@ -1,9 +1,12 @@ # SPDX-License-Identifier: GPL-2.0 from .utils import ip +import ctypes import random import string +libc = ctypes.cdll.LoadLibrary('libc.so.6') + class NetNS: def __init__(self, name=None): @@ -29,3 +32,18 @@ class NetNS: def __repr__(self): return f"NetNS({self.name})" + + +class NetNSEnter: + def __init__(self, ns_name): + self.ns_path = f"/run/netns/{ns_name}" + + def __enter__(self): + self.saved = open("/proc/thread-self/ns/net") + with open(self.ns_path) as ns_file: + libc.setns(ns_file.fileno(), 0) + return self + + def __exit__(self, exc_type, exc_value, traceback): + libc.setns(self.saved.fileno(), 0) + self.saved.close() diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 9e3bcddcf3e8..34470d65d871 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -2,8 +2,10 @@ import errno import json as _json +import os import random import re +import select import socket import subprocess import time @@ -15,21 +17,56 @@ class CmdExitFailure(Exception): self.cmd = cmd_obj +def fd_read_timeout(fd, timeout): + rlist, _, _ = select.select([fd], [], [], timeout) + if rlist: + return os.read(fd, 1024) + else: + raise TimeoutError("Timeout waiting for fd read") + + class cmd: - def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None, timeout=5): + """ + Execute a command on local or remote host. + + Use bkg() instead to run a command in the background. + """ + def __init__(self, comm, shell=True, fail=True, ns=None, background=False, + host=None, timeout=5, ksft_wait=None): if ns: comm = f'ip netns exec {ns} ' + comm self.stdout = None self.stderr = None self.ret = None + self.ksft_term_fd = None self.comm = comm if host: self.proc = host.cmd(comm) else: + # ksft_wait lets us wait for the background process to fully start, + # we pass an FD to the child process, and wait for it to write back. + # Similarly term_fd tells child it's time to exit. + pass_fds = () + env = os.environ.copy() + if ksft_wait is not None: + rfd, ready_fd = os.pipe() + wait_fd, self.ksft_term_fd = os.pipe() + pass_fds = (ready_fd, wait_fd, ) + env["KSFT_READY_FD"] = str(ready_fd) + env["KSFT_WAIT_FD"] = str(wait_fd) + self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + stderr=subprocess.PIPE, pass_fds=pass_fds, + env=env) + if ksft_wait is not None: + os.close(ready_fd) + os.close(wait_fd) + msg = fd_read_timeout(rfd, ksft_wait) + os.close(rfd) + if not msg: + raise Exception("Did not receive ready message") if not background: self.process(terminate=False, fail=fail, timeout=timeout) @@ -37,6 +74,8 @@ class cmd: if fail is None: fail = not terminate + if self.ksft_term_fd: + os.write(self.ksft_term_fd, b"1") if terminate: self.proc.terminate() stdout, stderr = self.proc.communicate(timeout) @@ -54,13 +93,36 @@ class cmd: class bkg(cmd): + """ + Run a command in the background. + + Examples usage: + + Run a command on remote host, and wait for it to finish. + This is usually paired with wait_port_listen() to make sure + the command has initialized: + + with bkg("socat ...", exit_wait=True, host=cfg.remote) as nc: + ... + + Run a command and expect it to let us know that it's ready + by writing to a special file descriptor passed via KSFT_READY_FD. + Command will be terminated when we exit the context manager: + + with bkg("my_binary", ksft_wait=5): + """ def __init__(self, comm, shell=True, fail=None, ns=None, host=None, - exit_wait=False): + exit_wait=False, ksft_wait=None): super().__init__(comm, background=True, - shell=shell, fail=fail, ns=ns, host=host) - self.terminate = not exit_wait + shell=shell, fail=fail, ns=ns, host=host, + ksft_wait=ksft_wait) + self.terminate = not exit_wait and not ksft_wait self.check_fail = fail + if shell and self.terminate: + print("# Warning: combining shell and terminate is risky!") + print("# SIGTERM may not reach the child on zsh/ksh!") + def __enter__(self): return self @@ -123,20 +185,13 @@ def ethtool(args, json=None, ns=None, host=None): return tool('ethtool', args, json=json, ns=ns, host=host) -def rand_port(): +def rand_port(type=socket.SOCK_STREAM): """ - Get a random unprivileged port, try to make sure it's not already used. + Get a random unprivileged port. """ - for _ in range(1000): - port = random.randint(10000, 65535) - try: - with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: - s.bind(("", port)) - return port - except OSError as e: - if e.errno != errno.EADDRINUSE: - raise - raise Exception("Can't find any free unprivileged port") + with socket.socket(socket.AF_INET6, type) as s: + s.bind(("", 0)) + return s.getsockname()[1] def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5): diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index ad1e36baee2a..8986c584cb37 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -42,6 +42,10 @@ class RtnlFamily(YnlFamily): super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(), schema='', recv_size=recv_size) +class RtnlAddrFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('rt_addr.yaml')).as_posix(), + schema='', recv_size=recv_size) class NetdevFamily(YnlFamily): def __init__(self, recv_size=0): diff --git a/tools/testing/selftests/net/link_netns.py b/tools/testing/selftests/net/link_netns.py new file mode 100755 index 000000000000..aab043c59d69 --- /dev/null +++ b/tools/testing/selftests/net/link_netns.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import time + +from lib.py import ksft_run, ksft_exit, ksft_true +from lib.py import ip +from lib.py import NetNS, NetNSEnter +from lib.py import RtnlFamily + + +LINK_NETNSID = 100 + + +def test_event() -> None: + with NetNS() as ns1, NetNS() as ns2: + with NetNSEnter(str(ns2)): + rtnl = RtnlFamily() + + rtnl.ntf_subscribe("rtnlgrp-link") + + ip(f"netns set {ns2} {LINK_NETNSID}", ns=str(ns1)) + ip(f"link add netns {ns1} link-netnsid {LINK_NETNSID} dummy1 type dummy") + ip(f"link add netns {ns1} dummy2 type dummy", ns=str(ns2)) + + ip("link del dummy1", ns=str(ns1)) + ip("link del dummy2", ns=str(ns1)) + + time.sleep(1) + rtnl.check_ntf() + ksft_true(rtnl.async_msg_queue.empty(), + "Received unexpected link notification") + + +def validate_link_netns(netns, ifname, link_netnsid) -> bool: + link_info = ip(f"-d link show dev {ifname}", ns=netns, json=True) + if not link_info: + return False + return link_info[0].get("link_netnsid") == link_netnsid + + +def test_link_net() -> None: + configs = [ + # type, common args, type args, fallback to dev_net + ("ipvlan", "link dummy1", "", False), + ("macsec", "link dummy1", "", False), + ("macvlan", "link dummy1", "", False), + ("macvtap", "link dummy1", "", False), + ("vlan", "link dummy1", "id 100", False), + ("gre", "", "local 192.0.2.1", True), + ("vti", "", "local 192.0.2.1", True), + ("ipip", "", "local 192.0.2.1", True), + ("ip6gre", "", "local 2001:db8::1", True), + ("ip6tnl", "", "local 2001:db8::1", True), + ("vti6", "", "local 2001:db8::1", True), + ("sit", "", "local 192.0.2.1", True), + ("xfrm", "", "if_id 1", True), + ] + + with NetNS() as ns1, NetNS() as ns2, NetNS() as ns3: + net1, net2, net3 = str(ns1), str(ns2), str(ns3) + + # prepare link netnsid and a dummy link needed by certain drivers + ip(f"netns set {net3} {LINK_NETNSID}", ns=str(net2)) + ip("link add dummy1 type dummy", ns=net3) + + cases = [ + # source, "netns", "link-netns", expected link-netns + (net3, None, None, None, None), + (net3, net2, None, None, LINK_NETNSID), + (net2, None, net3, LINK_NETNSID, LINK_NETNSID), + (net1, net2, net3, LINK_NETNSID, LINK_NETNSID), + ] + + for src_net, netns, link_netns, exp1, exp2 in cases: + tgt_net = netns or src_net + for typ, cargs, targs, fb_dev_net in configs: + cmd = "link add" + if netns: + cmd += f" netns {netns}" + if link_netns: + cmd += f" link-netns {link_netns}" + cmd += f" {cargs} foo type {typ} {targs}" + ip(cmd, ns=src_net) + if fb_dev_net: + ksft_true(validate_link_netns(tgt_net, "foo", exp1), + f"{typ} link_netns validation failed") + else: + ksft_true(validate_link_netns(tgt_net, "foo", exp2), + f"{typ} link_netns validation failed") + ip(f"link del foo", ns=tgt_net) + + +def test_peer_net() -> None: + types = [ + "vxcan", + "netkit", + "veth", + ] + + with NetNS() as ns1, NetNS() as ns2, NetNS() as ns3, NetNS() as ns4: + net1, net2, net3, net4 = str(ns1), str(ns2), str(ns3), str(ns4) + + ip(f"netns set {net3} {LINK_NETNSID}", ns=str(net2)) + + cases = [ + # source, "netns", "link-netns", "peer netns", expected + (net1, None, None, None, None), + (net1, net2, None, None, None), + (net2, None, net3, None, LINK_NETNSID), + (net1, net2, net3, None, None), + (net2, None, None, net3, LINK_NETNSID), + (net1, net2, None, net3, LINK_NETNSID), + (net2, None, net2, net3, LINK_NETNSID), + (net1, net2, net4, net3, LINK_NETNSID), + ] + + for src_net, netns, link_netns, peer_netns, exp in cases: + tgt_net = netns or src_net + for typ in types: + cmd = "link add" + if netns: + cmd += f" netns {netns}" + if link_netns: + cmd += f" link-netns {link_netns}" + cmd += f" foo type {typ}" + if peer_netns: + cmd += f" peer netns {peer_netns}" + ip(cmd, ns=src_net) + ksft_true(validate_link_netns(tgt_net, "foo", exp), + f"{typ} peer_netns validation failed") + ip(f"link del foo", ns=tgt_net) + + +def main() -> None: + ksft_run([test_event, test_link_net, test_peer_net]) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index c76525fe2b84..340e1a777e16 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -7,7 +7,7 @@ CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INC TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ simult_flows.sh mptcp_sockopt.sh userspace_pm.sh -TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq +TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq mptcp_diag TEST_FILES := mptcp_lib.sh settings diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 2bd0c1eb70c5..4f55477ffe08 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -200,6 +200,32 @@ chk_msk_cestab() "${expected}" "${msg}" "" } +chk_dump_one() +{ + local ss_token + local token + local msg + + ss_token="$(ss -inmHMN $ns | grep 'token:' |\ + head -n 1 |\ + sed 's/.*token:\([0-9a-f]*\).*/\1/')" + + token="$(ip netns exec $ns ./mptcp_diag -t $ss_token |\ + awk -F':[ \t]+' '/^token/ {print $2}')" + + msg="....chk dump_one" + + mptcp_lib_print_title "$msg" + if [ -n "$ss_token" ] && [ "$ss_token" = "$token" ]; then + mptcp_lib_pr_ok + mptcp_lib_result_pass "${msg}" + else + mptcp_lib_pr_fail "expected $ss_token found $token" + mptcp_lib_result_fail "${msg}" + ret=${KSFT_FAIL} + fi +} + msk_info_get_value() { local port="${1}" @@ -290,6 +316,7 @@ chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" chk_msk_inuse 2 chk_msk_cestab 2 +chk_dump_one flush_pids chk_msk_inuse 0 "2->0" diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c new file mode 100644 index 000000000000..284286c524cf --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025, Kylin Software */ + +#include <linux/sock_diag.h> +#include <linux/rtnetlink.h> +#include <linux/inet_diag.h> +#include <linux/netlink.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <linux/tcp.h> + +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> + +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif + +struct mptcp_info { + __u8 mptcpi_subflows; + __u8 mptcpi_add_addr_signal; + __u8 mptcpi_add_addr_accepted; + __u8 mptcpi_subflows_max; + __u8 mptcpi_add_addr_signal_max; + __u8 mptcpi_add_addr_accepted_max; + __u32 mptcpi_flags; + __u32 mptcpi_token; + __u64 mptcpi_write_seq; + __u64 mptcpi_snd_una; + __u64 mptcpi_rcv_nxt; + __u8 mptcpi_local_addr_used; + __u8 mptcpi_local_addr_max; + __u8 mptcpi_csum_enabled; + __u32 mptcpi_retransmits; + __u64 mptcpi_bytes_retrans; + __u64 mptcpi_bytes_sent; + __u64 mptcpi_bytes_received; + __u64 mptcpi_bytes_acked; + __u8 mptcpi_subflows_total; + __u8 reserved[3]; + __u32 mptcpi_last_data_sent; + __u32 mptcpi_last_data_recv; + __u32 mptcpi_last_ack_recv; +}; + +static void die_perror(const char *msg) +{ + perror(msg); + exit(1); +} + +static void die_usage(int r) +{ + fprintf(stderr, "Usage: mptcp_diag -t\n"); + exit(r); +} + +static void send_query(int fd, __u32 token) +{ + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK + }; + struct { + struct nlmsghdr nlh; + struct inet_diag_req_v2 r; + } req = { + .nlh = { + .nlmsg_len = sizeof(req), + .nlmsg_type = SOCK_DIAG_BY_FAMILY, + .nlmsg_flags = NLM_F_REQUEST + }, + .r = { + .sdiag_family = AF_INET, + /* Real proto is set via INET_DIAG_REQ_PROTOCOL */ + .sdiag_protocol = IPPROTO_TCP, + .id.idiag_cookie[0] = token, + } + }; + struct rtattr rta_proto; + struct iovec iov[6]; + int iovlen = 1; + __u32 proto; + + req.r.idiag_ext |= (1 << (INET_DIAG_INFO - 1)); + proto = IPPROTO_MPTCP; + rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL; + rta_proto.rta_len = RTA_LENGTH(sizeof(proto)); + + iov[0] = (struct iovec) { + .iov_base = &req, + .iov_len = sizeof(req) + }; + iov[iovlen] = (struct iovec){ &rta_proto, sizeof(rta_proto)}; + iov[iovlen + 1] = (struct iovec){ &proto, sizeof(proto)}; + req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto)); + iovlen += 2; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = iovlen + }; + + for (;;) { + if (sendmsg(fd, &msg, 0) < 0) { + if (errno == EINTR) + continue; + die_perror("sendmsg"); + } + break; + } +} + +static void parse_rtattr_flags(struct rtattr *tb[], int max, struct rtattr *rta, + int len, unsigned short flags) +{ + unsigned short type; + + memset(tb, 0, sizeof(struct rtattr *) * (max + 1)); + while (RTA_OK(rta, len)) { + type = rta->rta_type & ~flags; + if (type <= max && !tb[type]) + tb[type] = rta; + rta = RTA_NEXT(rta, len); + } +} + +static void print_info_msg(struct mptcp_info *info) +{ + printf("Token & Flags\n"); + printf("token: %x\n", info->mptcpi_token); + printf("flags: %x\n", info->mptcpi_flags); + printf("csum_enabled: %u\n", info->mptcpi_csum_enabled); + + printf("\nBasic Info\n"); + printf("subflows: %u\n", info->mptcpi_subflows); + printf("subflows_max: %u\n", info->mptcpi_subflows_max); + printf("subflows_total: %u\n", info->mptcpi_subflows_total); + printf("local_addr_used: %u\n", info->mptcpi_local_addr_used); + printf("local_addr_max: %u\n", info->mptcpi_local_addr_max); + printf("add_addr_signal: %u\n", info->mptcpi_add_addr_signal); + printf("add_addr_accepted: %u\n", info->mptcpi_add_addr_accepted); + printf("add_addr_signal_max: %u\n", info->mptcpi_add_addr_signal_max); + printf("add_addr_accepted_max: %u\n", info->mptcpi_add_addr_accepted_max); + + printf("\nTransmission Info\n"); + printf("write_seq: %llu\n", info->mptcpi_write_seq); + printf("snd_una: %llu\n", info->mptcpi_snd_una); + printf("rcv_nxt: %llu\n", info->mptcpi_rcv_nxt); + printf("last_data_sent: %u\n", info->mptcpi_last_data_sent); + printf("last_data_recv: %u\n", info->mptcpi_last_data_recv); + printf("last_ack_recv: %u\n", info->mptcpi_last_ack_recv); + printf("retransmits: %u\n", info->mptcpi_retransmits); + printf("retransmit bytes: %llu\n", info->mptcpi_bytes_retrans); + printf("bytes_sent: %llu\n", info->mptcpi_bytes_sent); + printf("bytes_received: %llu\n", info->mptcpi_bytes_received); + printf("bytes_acked: %llu\n", info->mptcpi_bytes_acked); +} + +static void parse_nlmsg(struct nlmsghdr *nlh) +{ + struct inet_diag_msg *r = NLMSG_DATA(nlh); + struct rtattr *tb[INET_DIAG_MAX + 1]; + + parse_rtattr_flags(tb, INET_DIAG_MAX, (struct rtattr *)(r + 1), + nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)), + NLA_F_NESTED); + + if (tb[INET_DIAG_INFO]) { + int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]); + struct mptcp_info *info; + + /* workaround fort older kernels with less fields */ + if (len < sizeof(*info)) { + info = alloca(sizeof(*info)); + memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len); + memset((char *)info + len, 0, sizeof(*info) - len); + } else { + info = RTA_DATA(tb[INET_DIAG_INFO]); + } + print_info_msg(info); + } +} + +static void recv_nlmsg(int fd, struct nlmsghdr *nlh) +{ + char rcv_buff[8192]; + struct sockaddr_nl rcv_nladdr = { + .nl_family = AF_NETLINK + }; + struct iovec rcv_iov = { + .iov_base = rcv_buff, + .iov_len = sizeof(rcv_buff) + }; + struct msghdr rcv_msg = { + .msg_name = &rcv_nladdr, + .msg_namelen = sizeof(rcv_nladdr), + .msg_iov = &rcv_iov, + .msg_iovlen = 1 + }; + int len; + + len = recvmsg(fd, &rcv_msg, 0); + nlh = (struct nlmsghdr *)rcv_buff; + + while (NLMSG_OK(nlh, len)) { + if (nlh->nlmsg_type == NLMSG_DONE) { + printf("NLMSG_DONE\n"); + break; + } else if (nlh->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err; + + err = (struct nlmsgerr *)NLMSG_DATA(nlh); + printf("Error %d:%s\n", + -(err->error), strerror(-(err->error))); + break; + } + parse_nlmsg(nlh); + nlh = NLMSG_NEXT(nlh, len); + } +} + +static void get_mptcpinfo(__u32 token) +{ + struct nlmsghdr *nlh = NULL; + int fd; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + if (fd < 0) + die_perror("Netlink socket"); + + send_query(fd, token); + recv_nlmsg(fd, nlh); + + close(fd); +} + +static void parse_opts(int argc, char **argv, __u32 *target_token) +{ + int c; + + if (argc < 2) + die_usage(1); + + while ((c = getopt(argc, argv, "ht:")) != -1) { + switch (c) { + case 'h': + die_usage(0); + break; + case 't': + sscanf(optarg, "%x", target_token); + break; + default: + die_usage(1); + break; + } + } +} + +int main(int argc, char *argv[]) +{ + __u32 target_token; + + parse_opts(argc, argv, &target_token); + get_mptcpinfo(target_token); + + return 0; +} + diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 9c2a415976cb..2329c2f8519b 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -28,7 +28,7 @@ size=0 usage() { echo "Usage: $0 [ -b ] [ -c ] [ -d ] [ -i]" - echo -e "\t-b: bail out after first error, otherwise runs al testcases" + echo -e "\t-b: bail out after first error, otherwise runs all testcases" echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" echo -e "\t-d: debug this script" echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'" diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 3651f73451cf..333064b0b5ac 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -117,7 +117,36 @@ cleanup() trap cleanup EXIT # Create and configure network namespaces for testing +print_title "Init" mptcp_lib_ns_init ns1 ns2 + +# check path_manager and pm_type sysctl mapping +if [ -f /proc/sys/net/mptcp/path_manager ]; then + ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=userspace + pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)" + if [ "${pm_type}" != "1" ]; then + test_fail "unexpected pm_type: ${pm_type}" + mptcp_lib_result_print_all_tap + exit ${KSFT_FAIL} + fi + + ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=error 2>/dev/null + pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)" + if [ "${pm_type}" != "1" ]; then + test_fail "unexpected pm_type after error: ${pm_type}" + mptcp_lib_result_print_all_tap + exit ${KSFT_FAIL} + fi + + ip netns exec "$ns1" sysctl -q net.mptcp.pm_type=0 + pm_name="$(ip netns exec "$ns1" sysctl -n net.mptcp.path_manager)" + if [ "${pm_name}" != "kernel" ]; then + test_fail "unexpected path-manager: ${pm_name}" + mptcp_lib_result_print_all_tap + exit ${KSFT_FAIL} + fi +fi + for i in "$ns1" "$ns2" ;do ip netns exec "$i" sysctl -q net.mptcp.pm_type=1 done @@ -152,7 +181,6 @@ mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid sleep 0.5 mptcp_lib_subtests_last_ts_reset -print_title "Init" print_test "Created network namespaces ns1, ns2" test_pass diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh index 6974474c26f3..0be1905d1f2f 100755 --- a/tools/testing/selftests/net/netns-name.sh +++ b/tools/testing/selftests/net/netns-name.sh @@ -78,6 +78,16 @@ ip -netns $NS link show dev $ALT_NAME 2> /dev/null && fail "Can still find alt-name after move" ip -netns $test_ns link del $DEV || fail +# +# Test no conflict of the same name/ifindex in different netns +# +ip -netns $NS link add name $DEV index 100 type dummy || fail +ip -netns $NS link add netns $test_ns name $DEV index 100 type dummy || + fail "Can create in netns without moving" +ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found" +ip -netns $NS link del $DEV || fail +ip -netns $test_ns link del $DEV || fail + echo -ne "$(basename $0) \t\t\t\t" if [ $RET_CODE -eq 0 ]; then echo "[ OK ]" diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index 93e8cb671c3d..beaee5e4e2aa 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -35,6 +35,21 @@ def napi_list_check(nf) -> None: comment=f"queue count after reset queue {q} mode {i}") +def nsim_rxq_reset_down(nf) -> None: + """ + Test that the queue API supports resetting a queue + while the interface is down. We should convert this + test to testing real HW once more devices support + queue API. + """ + with NetdevSimDev(queue_count=4) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} down") + for i in [0, 2, 3]: + nsim.dfs_write("queue_reset", f"1 {i}") + + def page_pool_check(nf) -> None: with NetdevSimDev() as nsimdev: nsim = nsimdev.nsims[0] @@ -106,7 +121,8 @@ def page_pool_check(nf) -> None: def main() -> None: nf = NetdevFamily() - ksft_run([empty_check, lo_check, page_pool_check, napi_list_check], + ksft_run([empty_check, lo_check, page_pool_check, napi_list_check, + nsim_rxq_reset_down], args=(nf, )) ksft_exit() diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 960e1ab4dd04..3c8d3455d8e7 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -330,6 +330,11 @@ test_psample() { # - drop packets and verify the right drop reason is reported test_drop_reason() { which perf >/dev/null 2>&1 || return $ksft_skip + which pahole >/dev/null 2>&1 || return $ksft_skip + + ovs_drop_subsys=$(pahole -C skb_drop_reason_subsys | + awk '/OPENVSWITCH/ { print $3; }' | + tr -d ,) sbx_add "test_drop_reason" || return $? @@ -373,7 +378,7 @@ test_drop_reason() { "in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,proto=1),icmp()" 'drop' ovs_drop_record_and_run "test_drop_reason" ip netns exec client ping -c 2 172.31.110.20 - ovs_drop_reason_count 0x30001 # OVS_DROP_FLOW_ACTION + ovs_drop_reason_count 0x${ovs_drop_subsys}0001 # OVS_DROP_FLOW_ACTION if [[ "$?" -ne "2" ]]; then info "Did not detect expected drops: $?" return 1 @@ -390,7 +395,7 @@ test_drop_reason() { ovs_drop_record_and_run \ "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 6000 - ovs_drop_reason_count 0x30004 # OVS_DROP_EXPLICIT_ACTION_ERROR + ovs_drop_reason_count 0x${ovs_drop_subsys}0004 # OVS_DROP_EXPLICIT_ACTION_ERROR if [[ "$?" -ne "1" ]]; then info "Did not detect expected explicit error drops: $?" return 1 @@ -398,7 +403,7 @@ test_drop_reason() { ovs_drop_record_and_run \ "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 7000 - ovs_drop_reason_count 0x30003 # OVS_DROP_EXPLICIT_ACTION + ovs_drop_reason_count 0x${ovs_drop_subsys}0003 # OVS_DROP_EXPLICIT_ACTION if [[ "$?" -ne "1" ]]; then info "Did not detect expected explicit drops: $?" return 1 diff --git a/tools/testing/selftests/net/proc_net_pktgen.c b/tools/testing/selftests/net/proc_net_pktgen.c new file mode 100644 index 000000000000..69444fb29577 --- /dev/null +++ b/tools/testing/selftests/net/proc_net_pktgen.c @@ -0,0 +1,690 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * proc_net_pktgen: kselftest for /proc/net/pktgen interface + * + * Copyright (c) 2025 Peter Seiderer <ps.report@gmx.net> + * + */ +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../kselftest_harness.h" + +static const char ctrl_cmd_stop[] = "stop"; +static const char ctrl_cmd_start[] = "start"; +static const char ctrl_cmd_reset[] = "reset"; + +static const char wrong_ctrl_cmd[] = "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"; + +static const char thr_cmd_add_loopback_0[] = "add_device lo@0"; +static const char thr_cmd_rm_loopback_0[] = "rem_device_all"; + +static const char wrong_thr_cmd[] = "forsureawrongcommand"; +static const char legacy_thr_cmd[] = "max_before_softirq"; + +static const char wrong_dev_cmd[] = "forsurewrongcommand"; +static const char dev_cmd_min_pkt_size_0[] = "min_pkt_size"; +static const char dev_cmd_min_pkt_size_1[] = "min_pkt_size "; +static const char dev_cmd_min_pkt_size_2[] = "min_pkt_size 0"; +static const char dev_cmd_min_pkt_size_3[] = "min_pkt_size 1"; +static const char dev_cmd_min_pkt_size_4[] = "min_pkt_size 100"; +static const char dev_cmd_min_pkt_size_5[] = "min_pkt_size=1001"; +static const char dev_cmd_min_pkt_size_6[] = "min_pkt_size =2002"; +static const char dev_cmd_min_pkt_size_7[] = "min_pkt_size= 3003"; +static const char dev_cmd_min_pkt_size_8[] = "min_pkt_size = 4004"; +static const char dev_cmd_max_pkt_size_0[] = "max_pkt_size 200"; +static const char dev_cmd_pkt_size_0[] = "pkt_size 300"; +static const char dev_cmd_imix_weights_0[] = "imix_weights 0,7 576,4 1500,1"; +static const char dev_cmd_imix_weights_1[] = "imix_weights 101,1 102,2 103,3 104,4 105,5 106,6 107,7 108,8 109,9 110,10 111,11 112,12 113,13 114,14 115,15 116,16 117,17 118,18 119,19 120,20"; +static const char dev_cmd_imix_weights_2[] = "imix_weights 100,1 102,2 103,3 104,4 105,5 106,6 107,7 108,8 109,9 110,10 111,11 112,12 113,13 114,14 115,15 116,16 117,17 118,18 119,19 120,20 121,21"; +static const char dev_cmd_imix_weights_3[] = "imix_weights"; +static const char dev_cmd_imix_weights_4[] = "imix_weights "; +static const char dev_cmd_imix_weights_5[] = "imix_weights 0"; +static const char dev_cmd_imix_weights_6[] = "imix_weights 0,"; +static const char dev_cmd_debug_0[] = "debug 1"; +static const char dev_cmd_debug_1[] = "debug 0"; +static const char dev_cmd_frags_0[] = "frags 100"; +static const char dev_cmd_delay_0[] = "delay 100"; +static const char dev_cmd_delay_1[] = "delay 2147483647"; +static const char dev_cmd_rate_0[] = "rate 0"; +static const char dev_cmd_rate_1[] = "rate 100"; +static const char dev_cmd_ratep_0[] = "ratep 0"; +static const char dev_cmd_ratep_1[] = "ratep 200"; +static const char dev_cmd_udp_src_min_0[] = "udp_src_min 1"; +static const char dev_cmd_udp_dst_min_0[] = "udp_dst_min 2"; +static const char dev_cmd_udp_src_max_0[] = "udp_src_max 3"; +static const char dev_cmd_udp_dst_max_0[] = "udp_dst_max 4"; +static const char dev_cmd_clone_skb_0[] = "clone_skb 1"; +static const char dev_cmd_clone_skb_1[] = "clone_skb 0"; +static const char dev_cmd_count_0[] = "count 100"; +static const char dev_cmd_src_mac_count_0[] = "src_mac_count 100"; +static const char dev_cmd_dst_mac_count_0[] = "dst_mac_count 100"; +static const char dev_cmd_burst_0[] = "burst 0"; +static const char dev_cmd_node_0[] = "node 100"; +static const char dev_cmd_xmit_mode_0[] = "xmit_mode start_xmit"; +static const char dev_cmd_xmit_mode_1[] = "xmit_mode netif_receive"; +static const char dev_cmd_xmit_mode_2[] = "xmit_mode queue_xmit"; +static const char dev_cmd_xmit_mode_3[] = "xmit_mode nonsense"; +static const char dev_cmd_flag_0[] = "flag UDPCSUM"; +static const char dev_cmd_flag_1[] = "flag !UDPCSUM"; +static const char dev_cmd_flag_2[] = "flag nonsense"; +static const char dev_cmd_dst_min_0[] = "dst_min 101.102.103.104"; +static const char dev_cmd_dst_0[] = "dst 101.102.103.104"; +static const char dev_cmd_dst_max_0[] = "dst_max 201.202.203.204"; +static const char dev_cmd_dst6_0[] = "dst6 2001:db38:1234:0000:0000:0000:0000:0000"; +static const char dev_cmd_dst6_min_0[] = "dst6_min 2001:db8:1234:0000:0000:0000:0000:0000"; +static const char dev_cmd_dst6_max_0[] = "dst6_max 2001:db8:1234:0000:0000:0000:0000:0000"; +static const char dev_cmd_src6_0[] = "src6 2001:db38:1234:0000:0000:0000:0000:0000"; +static const char dev_cmd_src_min_0[] = "src_min 101.102.103.104"; +static const char dev_cmd_src_max_0[] = "src_max 201.202.203.204"; +static const char dev_cmd_dst_mac_0[] = "dst_mac 01:02:03:04:05:06"; +static const char dev_cmd_src_mac_0[] = "src_mac 11:12:13:14:15:16"; +static const char dev_cmd_clear_counters_0[] = "clear_counters"; +static const char dev_cmd_flows_0[] = "flows 100"; +static const char dev_cmd_spi_0[] = "spi 100"; +static const char dev_cmd_flowlen_0[] = "flowlen 100"; +static const char dev_cmd_queue_map_min_0[] = "queue_map_min 1"; +static const char dev_cmd_queue_map_max_0[] = "queue_map_max 2"; +static const char dev_cmd_mpls_0[] = "mpls 00000001"; +static const char dev_cmd_mpls_1[] = "mpls 00000001,000000f2"; +static const char dev_cmd_mpls_2[] = "mpls 00000f00,00000f01,00000f02,00000f03,00000f04,00000f05,00000f06,00000f07,00000f08,00000f09,00000f0a,00000f0b,00000f0c,00000f0d,00000f0e,00000f0f"; +static const char dev_cmd_mpls_3[] = "mpls 00000f00,00000f01,00000f02,00000f03,00000f04,00000f05,00000f06,00000f07,00000f08,00000f09,00000f0a,00000f0b,00000f0c,00000f0d,00000f0e,00000f0f,00000f10"; +static const char dev_cmd_vlan_id_0[] = "vlan_id 1"; +static const char dev_cmd_vlan_p_0[] = "vlan_p 1"; +static const char dev_cmd_vlan_cfi_0[] = "vlan_cfi 1"; +static const char dev_cmd_vlan_id_1[] = "vlan_id 4096"; +static const char dev_cmd_svlan_id_0[] = "svlan_id 1"; +static const char dev_cmd_svlan_p_0[] = "svlan_p 1"; +static const char dev_cmd_svlan_cfi_0[] = "svlan_cfi 1"; +static const char dev_cmd_svlan_id_1[] = "svlan_id 4096"; +static const char dev_cmd_tos_0[] = "tos 0"; +static const char dev_cmd_tos_1[] = "tos 0f"; +static const char dev_cmd_tos_2[] = "tos 0ff"; +static const char dev_cmd_traffic_class_0[] = "traffic_class f0"; +static const char dev_cmd_skb_priority_0[] = "skb_priority 999"; + +FIXTURE(proc_net_pktgen) { + int ctrl_fd; + int thr_fd; + int dev_fd; +}; + +FIXTURE_SETUP(proc_net_pktgen) { + int r; + ssize_t len; + + r = system("modprobe pktgen"); + ASSERT_EQ(r, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?"); + + self->ctrl_fd = open("/proc/net/pktgen/pgctrl", O_RDWR); + ASSERT_GE(self->ctrl_fd, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?"); + + self->thr_fd = open("/proc/net/pktgen/kpktgend_0", O_RDWR); + ASSERT_GE(self->thr_fd, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?"); + + len = write(self->thr_fd, thr_cmd_add_loopback_0, sizeof(thr_cmd_add_loopback_0)); + ASSERT_EQ(len, sizeof(thr_cmd_add_loopback_0)) TH_LOG("device lo@0 already registered?"); + + self->dev_fd = open("/proc/net/pktgen/lo@0", O_RDWR); + ASSERT_GE(self->dev_fd, 0) TH_LOG("device entry for lo@0 missing?"); +} + +FIXTURE_TEARDOWN(proc_net_pktgen) { + int ret; + ssize_t len; + + ret = close(self->dev_fd); + EXPECT_EQ(ret, 0); + + len = write(self->thr_fd, thr_cmd_rm_loopback_0, sizeof(thr_cmd_rm_loopback_0)); + EXPECT_EQ(len, sizeof(thr_cmd_rm_loopback_0)); + + ret = close(self->thr_fd); + EXPECT_EQ(ret, 0); + + ret = close(self->ctrl_fd); + EXPECT_EQ(ret, 0); +} + +TEST_F(proc_net_pktgen, wrong_ctrl_cmd) { + for (int i = 0; i <= sizeof(wrong_ctrl_cmd); i++) { + ssize_t len; + + len = write(self->ctrl_fd, wrong_ctrl_cmd, i); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + } +} + +TEST_F(proc_net_pktgen, ctrl_cmd) { + ssize_t len; + + len = write(self->ctrl_fd, ctrl_cmd_stop, sizeof(ctrl_cmd_stop)); + EXPECT_EQ(len, sizeof(ctrl_cmd_stop)); + + len = write(self->ctrl_fd, ctrl_cmd_stop, sizeof(ctrl_cmd_stop) - 1); + EXPECT_EQ(len, sizeof(ctrl_cmd_stop) - 1); + + len = write(self->ctrl_fd, ctrl_cmd_start, sizeof(ctrl_cmd_start)); + EXPECT_EQ(len, sizeof(ctrl_cmd_start)); + + len = write(self->ctrl_fd, ctrl_cmd_start, sizeof(ctrl_cmd_start) - 1); + EXPECT_EQ(len, sizeof(ctrl_cmd_start) - 1); + + len = write(self->ctrl_fd, ctrl_cmd_reset, sizeof(ctrl_cmd_reset)); + EXPECT_EQ(len, sizeof(ctrl_cmd_reset)); + + len = write(self->ctrl_fd, ctrl_cmd_reset, sizeof(ctrl_cmd_reset) - 1); + EXPECT_EQ(len, sizeof(ctrl_cmd_reset) - 1); +} + +TEST_F(proc_net_pktgen, wrong_thr_cmd) { + for (int i = 0; i <= sizeof(wrong_thr_cmd); i++) { + ssize_t len; + + len = write(self->thr_fd, wrong_thr_cmd, i); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + } +} + +TEST_F(proc_net_pktgen, legacy_thr_cmd) { + for (int i = 0; i <= sizeof(legacy_thr_cmd); i++) { + ssize_t len; + + len = write(self->thr_fd, legacy_thr_cmd, i); + if (i < (sizeof(legacy_thr_cmd) - 1)) { + /* incomplete command string */ + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + } else { + /* complete command string without/with trailing '\0' */ + EXPECT_EQ(len, i); + } + } +} + +TEST_F(proc_net_pktgen, wrong_dev_cmd) { + for (int i = 0; i <= sizeof(wrong_dev_cmd); i++) { + ssize_t len; + + len = write(self->dev_fd, wrong_dev_cmd, i); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + } +} + +TEST_F(proc_net_pktgen, dev_cmd_min_pkt_size) { + ssize_t len; + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_0, sizeof(dev_cmd_min_pkt_size_0)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_0)); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_0, sizeof(dev_cmd_min_pkt_size_0) - 1); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_0) - 1); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_1, sizeof(dev_cmd_min_pkt_size_1)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_1)); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_1, sizeof(dev_cmd_min_pkt_size_1) - 1); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_1) - 1); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_2, sizeof(dev_cmd_min_pkt_size_2)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_2)); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_2, sizeof(dev_cmd_min_pkt_size_2) - 1); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_2) - 1); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_3, sizeof(dev_cmd_min_pkt_size_3)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_3)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_4, sizeof(dev_cmd_min_pkt_size_4)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_4)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_5, sizeof(dev_cmd_min_pkt_size_5)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_5)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_6, sizeof(dev_cmd_min_pkt_size_6)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_6)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_7, sizeof(dev_cmd_min_pkt_size_7)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_7)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_8, sizeof(dev_cmd_min_pkt_size_8)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_8)); +} + +TEST_F(proc_net_pktgen, dev_cmd_max_pkt_size) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_max_pkt_size_0, sizeof(dev_cmd_max_pkt_size_0)); + EXPECT_EQ(len, sizeof(dev_cmd_max_pkt_size_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_pkt_size) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_pkt_size_0, sizeof(dev_cmd_pkt_size_0)); + EXPECT_EQ(len, sizeof(dev_cmd_pkt_size_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_imix_weights) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_imix_weights_0, sizeof(dev_cmd_imix_weights_0)); + EXPECT_EQ(len, sizeof(dev_cmd_imix_weights_0)); + + len = write(self->dev_fd, dev_cmd_imix_weights_1, sizeof(dev_cmd_imix_weights_1)); + EXPECT_EQ(len, sizeof(dev_cmd_imix_weights_1)); + + len = write(self->dev_fd, dev_cmd_imix_weights_2, sizeof(dev_cmd_imix_weights_2)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, E2BIG); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_3, sizeof(dev_cmd_imix_weights_3)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_3, sizeof(dev_cmd_imix_weights_3) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_4, sizeof(dev_cmd_imix_weights_4)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_4, sizeof(dev_cmd_imix_weights_4) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_5, sizeof(dev_cmd_imix_weights_5)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_5, sizeof(dev_cmd_imix_weights_5) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_6, sizeof(dev_cmd_imix_weights_6)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_6, sizeof(dev_cmd_imix_weights_6) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); +} + +TEST_F(proc_net_pktgen, dev_cmd_debug) { + ssize_t len; + + /* debug on */ + len = write(self->dev_fd, dev_cmd_debug_0, sizeof(dev_cmd_debug_0)); + EXPECT_EQ(len, sizeof(dev_cmd_debug_0)); + + /* debug off */ + len = write(self->dev_fd, dev_cmd_debug_1, sizeof(dev_cmd_debug_1)); + EXPECT_EQ(len, sizeof(dev_cmd_debug_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_frags) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_frags_0, sizeof(dev_cmd_frags_0)); + EXPECT_EQ(len, sizeof(dev_cmd_frags_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_delay) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_delay_0, sizeof(dev_cmd_delay_0)); + EXPECT_EQ(len, sizeof(dev_cmd_delay_0)); + + len = write(self->dev_fd, dev_cmd_delay_1, sizeof(dev_cmd_delay_1)); + EXPECT_EQ(len, sizeof(dev_cmd_delay_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_rate) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_rate_0, sizeof(dev_cmd_rate_0)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + len = write(self->dev_fd, dev_cmd_rate_1, sizeof(dev_cmd_rate_1)); + EXPECT_EQ(len, sizeof(dev_cmd_rate_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_ratep) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_ratep_0, sizeof(dev_cmd_ratep_0)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + len = write(self->dev_fd, dev_cmd_ratep_1, sizeof(dev_cmd_ratep_1)); + EXPECT_EQ(len, sizeof(dev_cmd_ratep_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_udp_src_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_udp_src_min_0, sizeof(dev_cmd_udp_src_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_udp_src_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_udp_dst_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_udp_dst_min_0, sizeof(dev_cmd_udp_dst_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_udp_dst_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_udp_src_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_udp_src_max_0, sizeof(dev_cmd_udp_src_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_udp_src_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_udp_dst_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_udp_dst_max_0, sizeof(dev_cmd_udp_dst_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_udp_dst_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_clone_skb) { + ssize_t len; + + /* clone_skb on (gives EOPNOTSUPP on lo device) */ + len = write(self->dev_fd, dev_cmd_clone_skb_0, sizeof(dev_cmd_clone_skb_0)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EOPNOTSUPP); + + /* clone_skb off */ + len = write(self->dev_fd, dev_cmd_clone_skb_1, sizeof(dev_cmd_clone_skb_1)); + EXPECT_EQ(len, sizeof(dev_cmd_clone_skb_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_count) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_count_0, sizeof(dev_cmd_count_0)); + EXPECT_EQ(len, sizeof(dev_cmd_count_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src_mac_count) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src_mac_count_0, sizeof(dev_cmd_src_mac_count_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src_mac_count_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst_mac_count) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_mac_count_0, sizeof(dev_cmd_dst_mac_count_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_mac_count_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_burst) { + ssize_t len; + + /* burst off */ + len = write(self->dev_fd, dev_cmd_burst_0, sizeof(dev_cmd_burst_0)); + EXPECT_EQ(len, sizeof(dev_cmd_burst_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_node) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_node_0, sizeof(dev_cmd_node_0)); + EXPECT_EQ(len, sizeof(dev_cmd_node_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_xmit_mode) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_xmit_mode_0, sizeof(dev_cmd_xmit_mode_0)); + EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_0)); + + len = write(self->dev_fd, dev_cmd_xmit_mode_1, sizeof(dev_cmd_xmit_mode_1)); + EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_1)); + + len = write(self->dev_fd, dev_cmd_xmit_mode_2, sizeof(dev_cmd_xmit_mode_2)); + EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_2)); + + len = write(self->dev_fd, dev_cmd_xmit_mode_3, sizeof(dev_cmd_xmit_mode_3)); + EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_3)); +} + +TEST_F(proc_net_pktgen, dev_cmd_flag) { + ssize_t len; + + /* flag UDPCSUM on */ + len = write(self->dev_fd, dev_cmd_flag_0, sizeof(dev_cmd_flag_0)); + EXPECT_EQ(len, sizeof(dev_cmd_flag_0)); + + /* flag UDPCSUM off */ + len = write(self->dev_fd, dev_cmd_flag_1, sizeof(dev_cmd_flag_1)); + EXPECT_EQ(len, sizeof(dev_cmd_flag_1)); + + /* flag invalid */ + len = write(self->dev_fd, dev_cmd_flag_2, sizeof(dev_cmd_flag_2)); + EXPECT_EQ(len, sizeof(dev_cmd_flag_2)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_min_0, sizeof(dev_cmd_dst_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_0, sizeof(dev_cmd_dst_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_max_0, sizeof(dev_cmd_dst_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst6) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst6_0, sizeof(dev_cmd_dst6_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst6_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst6_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst6_min_0, sizeof(dev_cmd_dst6_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst6_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst6_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst6_max_0, sizeof(dev_cmd_dst6_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst6_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src6) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src6_0, sizeof(dev_cmd_src6_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src6_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src_min_0, sizeof(dev_cmd_src_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src_max_0, sizeof(dev_cmd_src_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst_mac) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_mac_0, sizeof(dev_cmd_dst_mac_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_mac_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src_mac) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src_mac_0, sizeof(dev_cmd_src_mac_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src_mac_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_clear_counters) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_clear_counters_0, sizeof(dev_cmd_clear_counters_0)); + EXPECT_EQ(len, sizeof(dev_cmd_clear_counters_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_flows) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_flows_0, sizeof(dev_cmd_flows_0)); + EXPECT_EQ(len, sizeof(dev_cmd_flows_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_spi) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_spi_0, sizeof(dev_cmd_spi_0)); + EXPECT_EQ(len, sizeof(dev_cmd_spi_0)) TH_LOG("CONFIG_XFRM not enabled?"); +} + +TEST_F(proc_net_pktgen, dev_cmd_flowlen) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_flowlen_0, sizeof(dev_cmd_flowlen_0)); + EXPECT_EQ(len, sizeof(dev_cmd_flowlen_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_queue_map_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_queue_map_min_0, sizeof(dev_cmd_queue_map_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_queue_map_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_queue_map_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_queue_map_max_0, sizeof(dev_cmd_queue_map_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_queue_map_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_mpls) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_mpls_0, sizeof(dev_cmd_mpls_0)); + EXPECT_EQ(len, sizeof(dev_cmd_mpls_0)); + + len = write(self->dev_fd, dev_cmd_mpls_1, sizeof(dev_cmd_mpls_1)); + EXPECT_EQ(len, sizeof(dev_cmd_mpls_1)); + + len = write(self->dev_fd, dev_cmd_mpls_2, sizeof(dev_cmd_mpls_2)); + EXPECT_EQ(len, sizeof(dev_cmd_mpls_2)); + + len = write(self->dev_fd, dev_cmd_mpls_3, sizeof(dev_cmd_mpls_3)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, E2BIG); +} + +TEST_F(proc_net_pktgen, dev_cmd_vlan_id) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_vlan_id_0, sizeof(dev_cmd_vlan_id_0)); + EXPECT_EQ(len, sizeof(dev_cmd_vlan_id_0)); + + len = write(self->dev_fd, dev_cmd_vlan_p_0, sizeof(dev_cmd_vlan_p_0)); + EXPECT_EQ(len, sizeof(dev_cmd_vlan_p_0)); + + len = write(self->dev_fd, dev_cmd_vlan_cfi_0, sizeof(dev_cmd_vlan_cfi_0)); + EXPECT_EQ(len, sizeof(dev_cmd_vlan_cfi_0)); + + len = write(self->dev_fd, dev_cmd_vlan_id_1, sizeof(dev_cmd_vlan_id_1)); + EXPECT_EQ(len, sizeof(dev_cmd_vlan_id_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_svlan_id) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_svlan_id_0, sizeof(dev_cmd_svlan_id_0)); + EXPECT_EQ(len, sizeof(dev_cmd_svlan_id_0)); + + len = write(self->dev_fd, dev_cmd_svlan_p_0, sizeof(dev_cmd_svlan_p_0)); + EXPECT_EQ(len, sizeof(dev_cmd_svlan_p_0)); + + len = write(self->dev_fd, dev_cmd_svlan_cfi_0, sizeof(dev_cmd_svlan_cfi_0)); + EXPECT_EQ(len, sizeof(dev_cmd_svlan_cfi_0)); + + len = write(self->dev_fd, dev_cmd_svlan_id_1, sizeof(dev_cmd_svlan_id_1)); + EXPECT_EQ(len, sizeof(dev_cmd_svlan_id_1)); +} + + +TEST_F(proc_net_pktgen, dev_cmd_tos) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_tos_0, sizeof(dev_cmd_tos_0)); + EXPECT_EQ(len, sizeof(dev_cmd_tos_0)); + + len = write(self->dev_fd, dev_cmd_tos_1, sizeof(dev_cmd_tos_1)); + EXPECT_EQ(len, sizeof(dev_cmd_tos_1)); + + len = write(self->dev_fd, dev_cmd_tos_2, sizeof(dev_cmd_tos_2)); + EXPECT_EQ(len, sizeof(dev_cmd_tos_2)); +} + + +TEST_F(proc_net_pktgen, dev_cmd_traffic_class) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_traffic_class_0, sizeof(dev_cmd_traffic_class_0)); + EXPECT_EQ(len, sizeof(dev_cmd_traffic_class_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_skb_priority) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_skb_priority_0, sizeof(dev_cmd_skb_priority_0)); + EXPECT_EQ(len, sizeof(dev_cmd_skb_priority_0)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c index 404a2ce759ab..221270cee3ea 100644 --- a/tools/testing/selftests/net/psock_tpacket.c +++ b/tools/testing/selftests/net/psock_tpacket.c @@ -12,7 +12,7 @@ * * Datapath: * Open a pair of packet sockets and send resp. receive an a priori known - * packet pattern accross the sockets and check if it was received resp. + * packet pattern across the sockets and check if it was received resp. * sent correctly. Fanout in combination with RX_RING is currently not * tested here. * diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c index 066efd30e294..7b9bf8a7bbe1 100644 --- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c +++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c @@ -112,7 +112,7 @@ TEST(reuseaddr_ports_exhausted_reusable_same_euid) ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); if (opts->reuseport[0] && opts->reuseport[1]) { - EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened."); + EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets successfully listened."); } else { EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations."); } diff --git a/tools/testing/selftests/net/rtnetlink.py b/tools/testing/selftests/net/rtnetlink.py new file mode 100755 index 000000000000..80950888800b --- /dev/null +++ b/tools/testing/selftests/net/rtnetlink.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_exit, ksft_run, ksft_ge, RtnlAddrFamily +import socket + +IPV4_ALL_HOSTS_MULTICAST = b'\xe0\x00\x00\x01' + +def dump_mcaddr_check(rtnl: RtnlAddrFamily) -> None: + """ + Verify that at least one interface has the IPv4 all-hosts multicast address. + At least the loopback interface should have this address. + """ + + addresses = rtnl.getmaddrs({"ifa-family": socket.AF_INET}, dump=True) + + all_host_multicasts = [ + addr for addr in addresses if addr['ifa-multicast'] == IPV4_ALL_HOSTS_MULTICAST + ] + + ksft_ge(len(all_host_multicasts), 1, + "No interface found with the IPv4 all-hosts multicast address") + +def main() -> None: + rtnl = RtnlAddrFamily() + ksft_run([dump_mcaddr_check], args=(rtnl, )) + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh index 1f78a87f6f37..152bf4c65747 100644 --- a/tools/testing/selftests/net/setup_veth.sh +++ b/tools/testing/selftests/net/setup_veth.sh @@ -11,7 +11,8 @@ setup_veth_ns() { local -r ns_mac="$4" [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" + echo 200000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" + echo 1 > "/sys/class/net/${ns_dev}/napi_defer_hard_irqs" ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535 ip -netns "${ns_name}" link set dev "${ns_dev}" up diff --git a/tools/testing/selftests/net/so_rcv_listener.c b/tools/testing/selftests/net/so_rcv_listener.c new file mode 100644 index 000000000000..bc5841192aa6 --- /dev/null +++ b/tools/testing/selftests/net/so_rcv_listener.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <netdb.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <linux/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#ifndef SO_RCVPRIORITY +#define SO_RCVPRIORITY 82 +#endif + +struct options { + __u32 val; + int name; + int rcvname; + const char *host; + const char *service; +} opt; + +static void __attribute__((noreturn)) usage(const char *bin) +{ + printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin); + printf("Options:\n" + "\t\t-M val Test SO_RCVMARK\n" + "\t\t-P val Test SO_RCVPRIORITY\n" + ""); + exit(EXIT_FAILURE); +} + +static void parse_args(int argc, char *argv[]) +{ + int o; + + while ((o = getopt(argc, argv, "M:P:")) != -1) { + switch (o) { + case 'M': + opt.val = atoi(optarg); + opt.name = SO_MARK; + opt.rcvname = SO_RCVMARK; + break; + case 'P': + opt.val = atoi(optarg); + opt.name = SO_PRIORITY; + opt.rcvname = SO_RCVPRIORITY; + break; + default: + usage(argv[0]); + break; + } + } + + if (optind != argc - 2) + usage(argv[0]); + + opt.host = argv[optind]; + opt.service = argv[optind + 1]; +} + +int main(int argc, char *argv[]) +{ + int err = 0; + int recv_fd = -1; + int ret_value = 0; + __u32 recv_val; + struct cmsghdr *cmsg; + char cbuf[CMSG_SPACE(sizeof(__u32))]; + char recv_buf[CMSG_SPACE(sizeof(__u32))]; + struct iovec iov[1]; + struct msghdr msg; + struct sockaddr_in recv_addr4; + struct sockaddr_in6 recv_addr6; + + parse_args(argc, argv); + + int family = strchr(opt.host, ':') ? AF_INET6 : AF_INET; + + recv_fd = socket(family, SOCK_DGRAM, IPPROTO_UDP); + if (recv_fd < 0) { + perror("Can't open recv socket"); + ret_value = -errno; + goto cleanup; + } + + err = setsockopt(recv_fd, SOL_SOCKET, opt.rcvname, &opt.val, sizeof(opt.val)); + if (err < 0) { + perror("Recv setsockopt error"); + ret_value = -errno; + goto cleanup; + } + + if (family == AF_INET) { + memset(&recv_addr4, 0, sizeof(recv_addr4)); + recv_addr4.sin_family = family; + recv_addr4.sin_port = htons(atoi(opt.service)); + + if (inet_pton(family, opt.host, &recv_addr4.sin_addr) <= 0) { + perror("Invalid IPV4 address"); + ret_value = -errno; + goto cleanup; + } + + err = bind(recv_fd, (struct sockaddr *)&recv_addr4, sizeof(recv_addr4)); + } else { + memset(&recv_addr6, 0, sizeof(recv_addr6)); + recv_addr6.sin6_family = family; + recv_addr6.sin6_port = htons(atoi(opt.service)); + + if (inet_pton(family, opt.host, &recv_addr6.sin6_addr) <= 0) { + perror("Invalid IPV6 address"); + ret_value = -errno; + goto cleanup; + } + + err = bind(recv_fd, (struct sockaddr *)&recv_addr6, sizeof(recv_addr6)); + } + + if (err < 0) { + perror("Recv bind error"); + ret_value = -errno; + goto cleanup; + } + + iov[0].iov_base = recv_buf; + iov[0].iov_len = sizeof(recv_buf); + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + + err = recvmsg(recv_fd, &msg, 0); + if (err < 0) { + perror("Message receive error"); + ret_value = -errno; + goto cleanup; + } + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == opt.name) { + recv_val = *(__u32 *)CMSG_DATA(cmsg); + printf("Received value: %u\n", recv_val); + + if (recv_val != opt.val) { + fprintf(stderr, "Error: expected value: %u, got: %u\n", + opt.val, recv_val); + ret_value = -EINVAL; + } + goto cleanup; + } + } + + fprintf(stderr, "Error: No matching cmsg received\n"); + ret_value = -ENOMSG; + +cleanup: + if (recv_fd >= 0) + close(recv_fd); + + return ret_value; +} diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c index d418162d335f..93b61e9a36f1 100644 --- a/tools/testing/selftests/net/tcp_ao/connect-deny.c +++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c @@ -4,6 +4,7 @@ #include "aolib.h" #define fault(type) (inj == FAULT_ ## type) +static volatile int sk_pair; static inline int test_add_key_maclen(int sk, const char *key, uint8_t maclen, union tcp_addr in_addr, uint8_t prefix, @@ -34,10 +35,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, const char *cnt_name, test_cnt cnt_expected, fault_t inj) { - struct tcp_ao_counters ao_cnt1, ao_cnt2; + struct tcp_counters cnt1, cnt2; uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */ + test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected; int lsk, err, sk = 0; - time_t timeout; lsk = test_listen_socket(this_ip_addr, port, 1); @@ -46,21 +47,24 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, if (cnt_name) before_cnt = netstat_get_one(cnt_name, NULL); - if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (pwd && test_get_tcp_counters(lsk, &cnt1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - err = test_wait_fd(lsk, timeout, 0); + err = test_skpair_wait_poll(lsk, 0, poll_cnt, &sk_pair); if (err == -ETIMEDOUT) { + sk_pair = err; if (!fault(TIMEOUT)) - test_fail("timed out for accept()"); + test_fail("%s: timed out for accept()", tst_name); + } else if (err == -EKEYREJECTED) { + if (!fault(KEYREJECT)) + test_fail("%s: key was rejected", tst_name); } else if (err < 0) { - test_error("test_wait_fd()"); + test_error("test_skpair_wait_poll()"); } else { if (fault(TIMEOUT)) - test_fail("ready to accept"); + test_fail("%s: ready to accept", tst_name); sk = accept(lsk, NULL, NULL); if (sk < 0) { @@ -72,13 +76,13 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, } synchronize_threads(); /* before counter checks */ - if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2)) - test_error("test_get_tcp_ao_counters()"); + if (pwd && test_get_tcp_counters(lsk, &cnt2)) + test_error("test_get_tcp_counters()"); close(lsk); if (pwd) - test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); if (!cnt_name) goto out; @@ -109,7 +113,7 @@ static void *server_fn(void *arg) try_accept("Non-AO server + AO client", port++, NULL, this_ip_dest, -1, 100, 100, 0, - "TCPAOKeyNotFound", 0, FAULT_TIMEOUT); + "TCPAOKeyNotFound", TEST_CNT_NS_KEY_NOT_FOUND, FAULT_TIMEOUT); try_accept("AO server + Non-AO client", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, @@ -135,8 +139,9 @@ static void *server_fn(void *arg) wrong_addr, -1, 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT); + /* Key rejected by the other side, failing short through skpair */ try_accept("Client: Wrong addr", port++, NULL, - this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_TIMEOUT); + this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_KEYREJECT); try_accept("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 200, 100, 0, @@ -163,8 +168,7 @@ static void try_connect(const char *tst_name, unsigned int port, uint8_t sndid, uint8_t rcvid, test_cnt cnt_expected, fault_t inj) { - struct tcp_ao_counters ao_cnt1, ao_cnt2; - time_t timeout; + struct tcp_counters cnt1, cnt2; int sk, ret; sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); @@ -174,16 +178,15 @@ static void try_connect(const char *tst_name, unsigned int port, if (pwd && test_add_key(sk, pwd, addr, prefix, sndid, rcvid)) test_error("setsockopt(TCP_AO_ADD_KEY)"); - if (pwd && test_get_tcp_ao_counters(sk, &ao_cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (pwd && test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - ret = _test_connect_socket(sk, this_ip_dest, port, timeout); - + ret = test_skpair_connect_poll(sk, this_ip_dest, port, cnt_expected, &sk_pair); synchronize_threads(); /* before counter checks */ if (ret < 0) { + sk_pair = ret; if (fault(KEYREJECT) && ret == -EKEYREJECTED) { test_ok("%s: connect() was prevented", tst_name); } else if (ret == -ETIMEDOUT && fault(TIMEOUT)) { @@ -202,9 +205,11 @@ static void try_connect(const char *tst_name, unsigned int port, else test_ok("%s: connected", tst_name); if (pwd && ret > 0) { - if (test_get_tcp_ao_counters(sk, &ao_cnt2)) - test_error("test_get_tcp_ao_counters()"); - test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); + } else if (pwd) { + test_tcp_counters_free(&cnt1); } out: synchronize_threads(); /* close() */ @@ -241,6 +246,11 @@ static void *client_fn(void *arg) try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + /* + * XXX: The test doesn't increase any counters, see tcp_make_synack(). + * Potentially, it can be speed up by setting sk_pair = -ETIMEDOUT + * but the price would be increased complexity of the tracer thread. + */ trace_ao_event_sk_expect(TCP_AO_SYNACK_NO_KEY, this_ip_dest, addr_any, port, 0, 100, 100); try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD, diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c index f1d8d29e393f..340f00e979ea 100644 --- a/tools/testing/selftests/net/tcp_ao/connect.c +++ b/tools/testing/selftests/net/tcp_ao/connect.c @@ -35,7 +35,7 @@ static void *client_fn(void *arg) uint64_t before_aogood, after_aogood; const size_t nr_packets = 20; struct netstat *ns_before, *ns_after; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters ao1, ao2; if (sk < 0) test_error("socket()"); @@ -50,18 +50,18 @@ static void *client_fn(void *arg) ns_before = netstat_read(); before_aogood = netstat_get(ns_before, "TCPAOGood", NULL); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &ao1)) + test_error("test_get_tcp_counters()"); - if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, 100, nr_packets)) { test_fail("verify failed"); return NULL; } ns_after = netstat_read(); after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &ao2)) + test_error("test_get_tcp_counters()"); netstat_print_diff(ns_before, ns_after); netstat_free(ns_before); netstat_free(ns_after); @@ -71,14 +71,14 @@ static void *client_fn(void *arg) nr_packets, after_aogood, before_aogood); return NULL; } - if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD)) + if (test_assert_counters("connect", &ao1, &ao2, TEST_CNT_GOOD)) return NULL; test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %zu", - before_aogood, ao1.ao_info_pkt_good, - ao1.key_cnts[0].pkt_good, - after_aogood, ao2.ao_info_pkt_good, - ao2.key_cnts[0].pkt_good, + before_aogood, ao1.ao.ao_info_pkt_good, + ao1.ao.key_cnts[0].pkt_good, + after_aogood, ao2.ao.ao_info_pkt_good, + ao2.ao.key_cnts[0].pkt_good, nr_packets); return NULL; } diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c index a1614f0d8c44..85c1a1e958c6 100644 --- a/tools/testing/selftests/net/tcp_ao/icmps-discard.c +++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c @@ -53,7 +53,7 @@ static void serve_interfered(int sk) ssize_t test_quota = packet_size * packets_nr * 10; uint64_t dest_unreach_a, dest_unreach_b; uint64_t icmp_ignored_a, icmp_ignored_b; - struct tcp_ao_counters ao_cnt1, ao_cnt2; + struct tcp_counters cnt1, cnt2; bool counter_not_found; struct netstat *ns_after, *ns_before; ssize_t bytes; @@ -61,16 +61,16 @@ static void serve_interfered(int sk) ns_before = netstat_read(); dest_unreach_a = netstat_get(ns_before, dst_unreach, NULL); icmp_ignored_a = netstat_get(ns_before, tcpao_icmps, NULL); - if (test_get_tcp_ao_counters(sk, &ao_cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); bytes = test_server_run(sk, test_quota, 0); ns_after = netstat_read(); netstat_print_diff(ns_before, ns_after); dest_unreach_b = netstat_get(ns_after, dst_unreach, NULL); icmp_ignored_b = netstat_get(ns_after, tcpao_icmps, &counter_not_found); - if (test_get_tcp_ao_counters(sk, &ao_cnt2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); netstat_free(ns_before); netstat_free(ns_after); @@ -91,9 +91,9 @@ static void serve_interfered(int sk) return; } #ifdef TEST_ICMPS_ACCEPT - test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD); + test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD); #else - test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP); + test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP); #endif if (icmp_ignored_a >= icmp_ignored_b) { test_icmps_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64, @@ -395,7 +395,6 @@ static void icmp_interfere(const size_t nr, uint32_t rcv_nxt, void *src, void *d static void send_interfered(int sk) { - const unsigned int timeout = TEST_TIMEOUT_SEC; struct sockaddr_in6 src, dst; socklen_t addr_sz; @@ -409,7 +408,7 @@ static void send_interfered(int sk) while (1) { uint32_t rcv_nxt; - if (test_client_verify(sk, packet_size, packets_nr, timeout)) { + if (test_client_verify(sk, packet_size, packets_nr)) { test_fail("client: connection is broken"); return; } diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c index d4385b52c10b..69d9a7a05d5c 100644 --- a/tools/testing/selftests/net/tcp_ao/key-management.c +++ b/tools/testing/selftests/net/tcp_ao/key-management.c @@ -629,11 +629,11 @@ static int key_collection_socket(bool server, unsigned int port) } static void verify_counters(const char *tst_name, bool is_listen_sk, bool server, - struct tcp_ao_counters *a, struct tcp_ao_counters *b) + struct tcp_counters *a, struct tcp_counters *b) { unsigned int i; - __test_tcp_ao_counters_cmp(tst_name, a, b, TEST_CNT_GOOD); + test_assert_counters_sk(tst_name, a, b, TEST_CNT_GOOD); for (i = 0; i < collection.nr_keys; i++) { struct test_key *key = &collection.keys[i]; @@ -652,12 +652,12 @@ static void verify_counters(const char *tst_name, bool is_listen_sk, bool server rx_cnt_expected = key->used_on_server_tx; } - test_tcp_ao_key_counters_cmp(tst_name, a, b, - rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0, - sndid, rcvid); + test_assert_counters_key(tst_name, &a->ao, &b->ao, + rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0, + sndid, rcvid); } - test_tcp_ao_counters_free(a); - test_tcp_ao_counters_free(b); + test_tcp_counters_free(a); + test_tcp_counters_free(b); test_ok("%s: passed counters checks", tst_name); } @@ -791,17 +791,17 @@ out: } static int start_server(const char *tst_name, unsigned int port, size_t quota, - struct tcp_ao_counters *begin, + struct tcp_counters *begin, unsigned int current_index, unsigned int rnext_index) { - struct tcp_ao_counters lsk_c1, lsk_c2; + struct tcp_counters lsk_c1, lsk_c2; ssize_t bytes; int sk, lsk; synchronize_threads(); /* 1: key collection initialized */ lsk = key_collection_socket(true, port); - if (test_get_tcp_ao_counters(lsk, &lsk_c1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(lsk, &lsk_c1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 2: MKTs added => connect() */ if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) test_error("test_wait_fd()"); @@ -809,12 +809,12 @@ static int start_server(const char *tst_name, unsigned int port, size_t quota, sk = accept(lsk, NULL, NULL); if (sk < 0) test_error("accept()"); - if (test_get_tcp_ao_counters(sk, begin)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, begin)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 3: accepted => send data */ - if (test_get_tcp_ao_counters(lsk, &lsk_c2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(lsk, &lsk_c2)) + test_error("test_get_tcp_counters()"); verify_keys(tst_name, lsk, true, true); close(lsk); @@ -830,12 +830,12 @@ static int start_server(const char *tst_name, unsigned int port, size_t quota, } static void end_server(const char *tst_name, int sk, - struct tcp_ao_counters *begin) + struct tcp_counters *begin) { - struct tcp_ao_counters end; + struct tcp_counters end; - if (test_get_tcp_ao_counters(sk, &end)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &end)) + test_error("test_get_tcp_counters()"); verify_keys(tst_name, sk, false, true); synchronize_threads(); /* 4: verified => closed */ @@ -848,7 +848,7 @@ static void end_server(const char *tst_name, int sk, static void try_server_run(const char *tst_name, unsigned int port, size_t quota, unsigned int current_index, unsigned int rnext_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; int sk; sk = start_server(tst_name, port, quota, &tmp, @@ -860,7 +860,7 @@ static void server_rotations(const char *tst_name, unsigned int port, size_t quota, unsigned int rotations, unsigned int current_index, unsigned int rnext_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; unsigned int i; int sk; @@ -886,7 +886,7 @@ static void server_rotations(const char *tst_name, unsigned int port, static int run_client(const char *tst_name, unsigned int port, unsigned int nr_keys, int current_index, int rnext_index, - struct tcp_ao_counters *before, + struct tcp_counters *before, const size_t msg_sz, const size_t msg_nr) { int sk; @@ -904,8 +904,8 @@ static int run_client(const char *tst_name, unsigned int port, if (test_set_key(sk, sndid, rcvid)) test_error("failed to set current/rnext keys"); } - if (before && test_get_tcp_ao_counters(sk, before)) - test_error("test_get_tcp_ao_counters()"); + if (before && test_get_tcp_counters(sk, before)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 2: MKTs added => connect() */ if (test_connect_socket(sk, this_ip_dest, port++) <= 0) @@ -918,11 +918,11 @@ static int run_client(const char *tst_name, unsigned int port, collection.keys[rnext_index].used_on_server_tx = 1; synchronize_threads(); /* 3: accepted => send data */ - if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, msg_sz, msg_nr)) { test_fail("verify failed"); close(sk); if (before) - test_tcp_ao_counters_free(before); + test_tcp_counters_free(before); return -1; } @@ -931,7 +931,7 @@ static int run_client(const char *tst_name, unsigned int port, static int start_client(const char *tst_name, unsigned int port, unsigned int nr_keys, int current_index, int rnext_index, - struct tcp_ao_counters *before, + struct tcp_counters *before, const size_t msg_sz, const size_t msg_nr) { if (init_default_key_collection(nr_keys, true)) @@ -943,9 +943,9 @@ static int start_client(const char *tst_name, unsigned int port, static void end_client(const char *tst_name, int sk, unsigned int nr_keys, int current_index, int rnext_index, - struct tcp_ao_counters *start) + struct tcp_counters *start) { - struct tcp_ao_counters end; + struct tcp_counters end; /* Some application may become dependent on this kernel choice */ if (current_index < 0) @@ -955,8 +955,8 @@ static void end_client(const char *tst_name, int sk, unsigned int nr_keys, verify_current_rnext(tst_name, sk, collection.keys[current_index].client_keyid, collection.keys[rnext_index].server_keyid); - if (start && test_get_tcp_ao_counters(sk, &end)) - test_error("test_get_tcp_ao_counters()"); + if (start && test_get_tcp_counters(sk, &end)) + test_error("test_get_tcp_counters()"); verify_keys(tst_name, sk, false, false); synchronize_threads(); /* 4: verify => closed */ close(sk); @@ -1016,7 +1016,7 @@ static void try_unmatched_keys(int sk, int *rnext_index, unsigned int port) trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_addr, this_ip_dest, -1, port, 0, -1, -1, -1, -1, -1, -1, key->server_keyid, -1); - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk, msg_len, nr_packets)) test_fail("verify failed"); *rnext_index = i; } @@ -1048,7 +1048,7 @@ static void check_current_back(const char *tst_name, unsigned int port, unsigned int current_index, unsigned int rnext_index, unsigned int rotate_to_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; int sk; sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, @@ -1061,7 +1061,7 @@ static void check_current_back(const char *tst_name, unsigned int port, port, -1, 0, -1, -1, -1, -1, -1, collection.keys[rotate_to_index].client_keyid, collection.keys[current_index].client_keyid, -1); - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk, msg_len, nr_packets)) test_fail("verify failed"); /* There is a race here: between setting the current_key with * setsockopt(TCP_AO_INFO) and starting to send some data - there @@ -1081,7 +1081,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port, unsigned int nr_keys, unsigned int rotations, unsigned int current_index, unsigned int rnext_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; unsigned int i; int sk; @@ -1099,10 +1099,10 @@ static void roll_over_keys(const char *tst_name, unsigned int port, collection.keys[i].server_keyid, -1); if (test_set_key(sk, -1, collection.keys[i].server_keyid)) test_error("Can't change the Rnext key"); - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, msg_len, nr_packets)) { test_fail("verify failed"); close(sk); - test_tcp_ao_counters_free(&tmp); + test_tcp_counters_free(&tmp); return; } verify_current_rnext(tst_name, sk, -1, @@ -1116,7 +1116,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port, static void try_client_run(const char *tst_name, unsigned int port, unsigned int nr_keys, int current_index, int rnext_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; int sk; sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h index 5db2f65cddc4..ebb2899c12fe 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/aolib.h +++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h @@ -289,7 +289,7 @@ extern int link_set_up(const char *intf); extern const unsigned int test_server_port; extern int test_wait_fd(int sk, time_t sec, bool write); extern int __test_connect_socket(int sk, const char *device, - void *addr, size_t addr_sz, time_t timeout); + void *addr, size_t addr_sz, bool async); extern int __test_listen_socket(int backlog, void *addr, size_t addr_sz); static inline int test_listen_socket(const union tcp_addr taddr, @@ -331,25 +331,26 @@ static inline int test_listen_socket(const union tcp_addr taddr, * If set to 0 - kernel will try to retransmit SYN number of times, set in * /proc/sys/net/ipv4/tcp_syn_retries * By default set to 1 to make tests pass faster on non-busy machine. + * [in process of removal, don't use in new tests] */ #ifndef TEST_RETRANSMIT_SEC #define TEST_RETRANSMIT_SEC 1 #endif static inline int _test_connect_socket(int sk, const union tcp_addr taddr, - unsigned int port, time_t timeout) + unsigned int port, bool async) { sockaddr_af addr; tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port)); return __test_connect_socket(sk, veth_name, - (void *)&addr, sizeof(addr), timeout); + (void *)&addr, sizeof(addr), async); } static inline int test_connect_socket(int sk, const union tcp_addr taddr, unsigned int port) { - return _test_connect_socket(sk, taddr, port, TEST_TIMEOUT_SEC); + return _test_connect_socket(sk, taddr, port, false); } extern int __test_set_md5(int sk, void *addr, size_t addr_sz, @@ -483,10 +484,7 @@ static inline int test_set_ao_flags(int sk, bool ao_required, bool accept_icmps) } extern ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec); -extern ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, - const size_t msg_len, time_t timeout_sec); -extern int test_client_verify(int sk, const size_t msg_len, const size_t nr, - time_t timeout_sec); +extern int test_client_verify(int sk, const size_t msg_len, const size_t nr); struct tcp_ao_key_counters { uint8_t sndid; @@ -512,7 +510,15 @@ struct tcp_ao_counters { size_t nr_keys; struct tcp_ao_key_counters *key_cnts; }; -extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out); + +struct tcp_counters { + struct tcp_ao_counters ao; + uint64_t netns_md5_notfound; + uint64_t netns_md5_unexpected; + uint64_t netns_md5_failure; +}; + +extern int test_get_tcp_counters(int sk, struct tcp_counters *out); #define TEST_CNT_KEY_GOOD BIT(0) #define TEST_CNT_KEY_BAD BIT(1) @@ -526,8 +532,31 @@ extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out); #define TEST_CNT_NS_KEY_NOT_FOUND BIT(9) #define TEST_CNT_NS_AO_REQUIRED BIT(10) #define TEST_CNT_NS_DROPPED_ICMP BIT(11) +#define TEST_CNT_NS_MD5_NOT_FOUND BIT(12) +#define TEST_CNT_NS_MD5_UNEXPECTED BIT(13) +#define TEST_CNT_NS_MD5_FAILURE BIT(14) typedef uint16_t test_cnt; +#define _for_each_counter(f) \ +do { \ + /* per-netns */ \ + f(ao.netns_ao_good, TEST_CNT_NS_GOOD); \ + f(ao.netns_ao_bad, TEST_CNT_NS_BAD); \ + f(ao.netns_ao_key_not_found, TEST_CNT_NS_KEY_NOT_FOUND); \ + f(ao.netns_ao_required, TEST_CNT_NS_AO_REQUIRED); \ + f(ao.netns_ao_dropped_icmp, TEST_CNT_NS_DROPPED_ICMP); \ + /* per-socket */ \ + f(ao.ao_info_pkt_good, TEST_CNT_SOCK_GOOD); \ + f(ao.ao_info_pkt_bad, TEST_CNT_SOCK_BAD); \ + f(ao.ao_info_pkt_key_not_found, TEST_CNT_SOCK_KEY_NOT_FOUND); \ + f(ao.ao_info_pkt_ao_required, TEST_CNT_SOCK_AO_REQUIRED); \ + f(ao.ao_info_pkt_dropped_icmp, TEST_CNT_SOCK_DROPPED_ICMP); \ + /* non-AO */ \ + f(netns_md5_notfound, TEST_CNT_NS_MD5_NOT_FOUND); \ + f(netns_md5_unexpected, TEST_CNT_NS_MD5_UNEXPECTED); \ + f(netns_md5_failure, TEST_CNT_NS_MD5_FAILURE); \ +} while (0) + #define TEST_CNT_AO_GOOD (TEST_CNT_SOCK_GOOD | TEST_CNT_NS_GOOD) #define TEST_CNT_AO_BAD (TEST_CNT_SOCK_BAD | TEST_CNT_NS_BAD) #define TEST_CNT_AO_KEY_NOT_FOUND (TEST_CNT_SOCK_KEY_NOT_FOUND | \ @@ -539,34 +568,71 @@ typedef uint16_t test_cnt; #define TEST_CNT_GOOD (TEST_CNT_KEY_GOOD | TEST_CNT_AO_GOOD) #define TEST_CNT_BAD (TEST_CNT_KEY_BAD | TEST_CNT_AO_BAD) -extern int __test_tcp_ao_counters_cmp(const char *tst_name, - struct tcp_ao_counters *before, struct tcp_ao_counters *after, +extern test_cnt test_cmp_counters(struct tcp_counters *before, + struct tcp_counters *after); +extern int test_assert_counters_sk(const char *tst_name, + struct tcp_counters *before, struct tcp_counters *after, test_cnt expected); -extern int test_tcp_ao_key_counters_cmp(const char *tst_name, +extern int test_assert_counters_key(const char *tst_name, struct tcp_ao_counters *before, struct tcp_ao_counters *after, test_cnt expected, int sndid, int rcvid); -extern void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts); +extern void test_tcp_counters_free(struct tcp_counters *cnts); + +/* + * Polling for netns and socket counters during select()/connect() and also + * client/server messaging. Instead of constant timeout on underlying select(), + * check the counters and return early. This allows to pass the tests where + * timeout is expected without waiting for that fixing timeout (tests speed-up). + * Previously shorter timeouts were used for tests expecting to time out, + * but that leaded to sporadic false positives on counter checks failures, + * as one second timeouts aren't enough for TCP retransmit. + * + * Two sides of the socketpair (client/server) should synchronize failures + * using a shared variable *err, so that they can detect the other side's + * failure. + */ +extern int test_skpair_wait_poll(int sk, bool write, test_cnt cond, + volatile int *err); +extern int _test_skpair_connect_poll(int sk, const char *device, + void *addr, size_t addr_sz, + test_cnt cond, volatile int *err); +static inline int test_skpair_connect_poll(int sk, const union tcp_addr taddr, + unsigned int port, + test_cnt cond, volatile int *err) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port)); + return _test_skpair_connect_poll(sk, veth_name, + (void *)&addr, sizeof(addr), cond, err); +} + +extern int test_skpair_client(int sk, const size_t msg_len, const size_t nr, + test_cnt cond, volatile int *err); +extern int test_skpair_server(int sk, ssize_t quota, + test_cnt cond, volatile int *err); + /* - * Frees buffers allocated in test_get_tcp_ao_counters(). + * Frees buffers allocated in test_get_tcp_counters(). * The function doesn't expect new keys or keys removed between calls - * to test_get_tcp_ao_counters(). Check key counters manually if they + * to test_get_tcp_counters(). Check key counters manually if they * may change. */ -static inline int test_tcp_ao_counters_cmp(const char *tst_name, - struct tcp_ao_counters *before, - struct tcp_ao_counters *after, - test_cnt expected) +static inline int test_assert_counters(const char *tst_name, + struct tcp_counters *before, + struct tcp_counters *after, + test_cnt expected) { int ret; - ret = __test_tcp_ao_counters_cmp(tst_name, before, after, expected); + ret = test_assert_counters_sk(tst_name, before, after, expected); if (ret) goto out; - ret = test_tcp_ao_key_counters_cmp(tst_name, before, after, - expected, -1, -1); + ret = test_assert_counters_key(tst_name, &before->ao, &after->ao, + expected, -1, -1); out: - test_tcp_ao_counters_free(before); - test_tcp_ao_counters_free(after); + test_tcp_counters_free(before); + test_tcp_counters_free(after); return ret; } diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c index 24380c68fec6..27403f875054 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c +++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c @@ -427,11 +427,8 @@ static void dump_trace_event(struct expected_trace_point *e) test_print("trace event filter %s [%s:%d => %s:%d, L3index %d, flags: %s%s%s%s%s, keyid: %d, rnext: %d, maclen: %d, sne: %d] = %zu", trace_event_names[e->type], src, e->src_port, dst, e->dst_port, e->L3index, - (e->fin > 0) ? "F" : (e->fin == 0) ? "!F" : "", - (e->syn > 0) ? "S" : (e->syn == 0) ? "!S" : "", - (e->rst > 0) ? "R" : (e->rst == 0) ? "!R" : "", - (e->psh > 0) ? "P" : (e->psh == 0) ? "!P" : "", - (e->ack > 0) ? "." : (e->ack == 0) ? "!." : "", + e->fin ? "F" : "", e->syn ? "S" : "", e->rst ? "R" : "", + e->psh ? "P" : "", e->ack ? "." : "", e->keyid, e->rnext, e->maclen, e->sne, e->matched); } diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c index 0ffda966c677..ef8e9031d47a 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/sock.c +++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c @@ -34,10 +34,8 @@ int __test_listen_socket(int backlog, void *addr, size_t addr_sz) return sk; } -int test_wait_fd(int sk, time_t sec, bool write) +static int __test_wait_fd(int sk, struct timeval *tv, bool write) { - struct timeval tv = { .tv_sec = sec }; - struct timeval *ptv = NULL; fd_set fds, efds; int ret; socklen_t slen = sizeof(ret); @@ -47,14 +45,11 @@ int test_wait_fd(int sk, time_t sec, bool write) FD_ZERO(&efds); FD_SET(sk, &efds); - if (sec) - ptv = &tv; - errno = 0; if (write) - ret = select(sk + 1, NULL, &fds, &efds, ptv); + ret = select(sk + 1, NULL, &fds, &efds, tv); else - ret = select(sk + 1, &fds, NULL, &efds, ptv); + ret = select(sk + 1, &fds, NULL, &efds, tv); if (ret < 0) return -errno; if (ret == 0) { @@ -69,8 +64,54 @@ int test_wait_fd(int sk, time_t sec, bool write) return 0; } +int test_wait_fd(int sk, time_t sec, bool write) +{ + struct timeval tv = { .tv_sec = sec, }; + + return __test_wait_fd(sk, sec ? &tv : NULL, write); +} + +static bool __skpair_poll_should_stop(int sk, struct tcp_counters *c, + test_cnt condition) +{ + struct tcp_counters c2; + test_cnt diff; + + if (test_get_tcp_counters(sk, &c2)) + test_error("test_get_tcp_counters()"); + + diff = test_cmp_counters(c, &c2); + test_tcp_counters_free(&c2); + return (diff & condition) == condition; +} + +/* How often wake up and check netns counters & paired (*err) */ +#define POLL_USEC 150 +static int __test_skpair_poll(int sk, bool write, uint64_t timeout, + struct tcp_counters *c, test_cnt cond, + volatile int *err) +{ + uint64_t t; + + for (t = 0; t <= timeout * 1000000; t += POLL_USEC) { + struct timeval tv = { .tv_usec = POLL_USEC, }; + int ret; + + ret = __test_wait_fd(sk, &tv, write); + if (ret != -ETIMEDOUT) + return ret; + if (c && cond && __skpair_poll_should_stop(sk, c, cond)) + break; + if (err && *err) + return *err; + } + if (err) + *err = -ETIMEDOUT; + return -ETIMEDOUT; +} + int __test_connect_socket(int sk, const char *device, - void *addr, size_t addr_sz, time_t timeout) + void *addr, size_t addr_sz, bool async) { long flags; int err; @@ -82,15 +123,6 @@ int __test_connect_socket(int sk, const char *device, test_error("setsockopt(SO_BINDTODEVICE, %s)", device); } - if (!timeout) { - err = connect(sk, addr, addr_sz); - if (err) { - err = -errno; - goto out; - } - return 0; - } - flags = fcntl(sk, F_GETFL); if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0)) test_error("fcntl()"); @@ -100,9 +132,9 @@ int __test_connect_socket(int sk, const char *device, err = -errno; goto out; } - if (timeout < 0) + if (async) return sk; - err = test_wait_fd(sk, timeout, 1); + err = test_wait_fd(sk, TEST_TIMEOUT_SEC, 1); if (err) goto out; } @@ -113,6 +145,45 @@ out: return err; } +int test_skpair_wait_poll(int sk, bool write, + test_cnt cond, volatile int *err) +{ + struct tcp_counters c; + int ret; + + *err = 0; + if (test_get_tcp_counters(sk, &c)) + test_error("test_get_tcp_counters()"); + synchronize_threads(); /* 1: init skpair & read nscounters */ + + ret = __test_skpair_poll(sk, write, TEST_TIMEOUT_SEC, &c, cond, err); + test_tcp_counters_free(&c); + return ret; +} + +int _test_skpair_connect_poll(int sk, const char *device, + void *addr, size_t addr_sz, + test_cnt condition, volatile int *err) +{ + struct tcp_counters c; + int ret; + + *err = 0; + if (test_get_tcp_counters(sk, &c)) + test_error("test_get_tcp_counters()"); + synchronize_threads(); /* 1: init skpair & read nscounters */ + ret = __test_connect_socket(sk, device, addr, addr_sz, true); + if (ret < 0) { + test_tcp_counters_free(&c); + return (*err = ret); + } + ret = __test_skpair_poll(sk, 1, TEST_TIMEOUT_SEC, &c, condition, err); + if (ret < 0) + close(sk); + test_tcp_counters_free(&c); + return ret; +} + int __test_set_md5(int sk, void *addr, size_t addr_sz, uint8_t prefix, int vrf, const char *password) { @@ -333,12 +404,12 @@ do { \ return 0; } -int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) +int test_get_tcp_counters(int sk, struct tcp_counters *out) { struct tcp_ao_getsockopt *key_dump; socklen_t key_dump_sz = sizeof(*key_dump); struct tcp_ao_info_opt info = {}; - bool c1, c2, c3, c4, c5; + bool c1, c2, c3, c4, c5, c6, c7, c8; struct netstat *ns; int err, nr_keys; @@ -346,25 +417,30 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) /* per-netns */ ns = netstat_read(); - out->netns_ao_good = netstat_get(ns, "TCPAOGood", &c1); - out->netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2); - out->netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3); - out->netns_ao_required = netstat_get(ns, "TCPAORequired", &c4); - out->netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5); + out->ao.netns_ao_good = netstat_get(ns, "TCPAOGood", &c1); + out->ao.netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2); + out->ao.netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3); + out->ao.netns_ao_required = netstat_get(ns, "TCPAORequired", &c4); + out->ao.netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5); + out->netns_md5_notfound = netstat_get(ns, "TCPMD5NotFound", &c6); + out->netns_md5_unexpected = netstat_get(ns, "TCPMD5Unexpected", &c7); + out->netns_md5_failure = netstat_get(ns, "TCPMD5Failure", &c8); netstat_free(ns); - if (c1 || c2 || c3 || c4 || c5) + if (c1 || c2 || c3 || c4 || c5 || c6 || c7 || c8) return -EOPNOTSUPP; err = test_get_ao_info(sk, &info); + if (err == -ENOENT) + return 0; if (err) return err; /* per-socket */ - out->ao_info_pkt_good = info.pkt_good; - out->ao_info_pkt_bad = info.pkt_bad; - out->ao_info_pkt_key_not_found = info.pkt_key_not_found; - out->ao_info_pkt_ao_required = info.pkt_ao_required; - out->ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp; + out->ao.ao_info_pkt_good = info.pkt_good; + out->ao.ao_info_pkt_bad = info.pkt_bad; + out->ao.ao_info_pkt_key_not_found = info.pkt_key_not_found; + out->ao.ao_info_pkt_ao_required = info.pkt_ao_required; + out->ao.ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp; /* per-key */ nr_keys = test_get_ao_keys_nr(sk); @@ -372,7 +448,7 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) return nr_keys; if (nr_keys == 0) test_error("test_get_ao_keys_nr() == 0"); - out->nr_keys = (size_t)nr_keys; + out->ao.nr_keys = (size_t)nr_keys; key_dump = calloc(nr_keys, key_dump_sz); if (!key_dump) return -errno; @@ -386,72 +462,84 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) return -errno; } - out->key_cnts = calloc(nr_keys, sizeof(out->key_cnts[0])); - if (!out->key_cnts) { + out->ao.key_cnts = calloc(nr_keys, sizeof(out->ao.key_cnts[0])); + if (!out->ao.key_cnts) { free(key_dump); return -errno; } while (nr_keys--) { - out->key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid; - out->key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid; - out->key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good; - out->key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad; + out->ao.key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid; + out->ao.key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid; + out->ao.key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good; + out->ao.key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad; } free(key_dump); return 0; } -int __test_tcp_ao_counters_cmp(const char *tst_name, - struct tcp_ao_counters *before, - struct tcp_ao_counters *after, - test_cnt expected) +test_cnt test_cmp_counters(struct tcp_counters *before, + struct tcp_counters *after) +{ +#define __cmp(cnt, e_cnt) \ +do { \ + if (before->cnt > after->cnt) \ + test_error("counter " __stringify(cnt) " decreased"); \ + if (before->cnt != after->cnt) \ + ret |= e_cnt; \ +} while (0) + + test_cnt ret = 0; + size_t i; + + if (before->ao.nr_keys != after->ao.nr_keys) + test_error("the number of keys has changed"); + + _for_each_counter(__cmp); + + i = before->ao.nr_keys; + while (i--) { + __cmp(ao.key_cnts[i].pkt_good, TEST_CNT_KEY_GOOD); + __cmp(ao.key_cnts[i].pkt_bad, TEST_CNT_KEY_BAD); + } +#undef __cmp + return ret; +} + +int test_assert_counters_sk(const char *tst_name, + struct tcp_counters *before, + struct tcp_counters *after, + test_cnt expected) { -#define __cmp_ao(cnt, expecting_inc) \ +#define __cmp_ao(cnt, e_cnt) \ do { \ if (before->cnt > after->cnt) { \ test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64, \ - tst_name ?: "", before->cnt, after->cnt); \ + tst_name ?: "", before->cnt, after->cnt); \ return -1; \ } \ - if ((before->cnt != after->cnt) != (expecting_inc)) { \ + if ((before->cnt != after->cnt) != !!(expected & e_cnt)) { \ test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64, \ - tst_name ?: "", (expecting_inc) ? "" : "not ", \ + tst_name ?: "", (expected & e_cnt) ? "" : "not ", \ before->cnt, after->cnt); \ return -1; \ } \ -} while(0) +} while (0) errno = 0; - /* per-netns */ - __cmp_ao(netns_ao_good, !!(expected & TEST_CNT_NS_GOOD)); - __cmp_ao(netns_ao_bad, !!(expected & TEST_CNT_NS_BAD)); - __cmp_ao(netns_ao_key_not_found, - !!(expected & TEST_CNT_NS_KEY_NOT_FOUND)); - __cmp_ao(netns_ao_required, !!(expected & TEST_CNT_NS_AO_REQUIRED)); - __cmp_ao(netns_ao_dropped_icmp, - !!(expected & TEST_CNT_NS_DROPPED_ICMP)); - /* per-socket */ - __cmp_ao(ao_info_pkt_good, !!(expected & TEST_CNT_SOCK_GOOD)); - __cmp_ao(ao_info_pkt_bad, !!(expected & TEST_CNT_SOCK_BAD)); - __cmp_ao(ao_info_pkt_key_not_found, - !!(expected & TEST_CNT_SOCK_KEY_NOT_FOUND)); - __cmp_ao(ao_info_pkt_ao_required, !!(expected & TEST_CNT_SOCK_AO_REQUIRED)); - __cmp_ao(ao_info_pkt_dropped_icmp, - !!(expected & TEST_CNT_SOCK_DROPPED_ICMP)); + _for_each_counter(__cmp_ao); return 0; #undef __cmp_ao } -int test_tcp_ao_key_counters_cmp(const char *tst_name, - struct tcp_ao_counters *before, - struct tcp_ao_counters *after, - test_cnt expected, - int sndid, int rcvid) +int test_assert_counters_key(const char *tst_name, + struct tcp_ao_counters *before, + struct tcp_ao_counters *after, + test_cnt expected, int sndid, int rcvid) { size_t i; -#define __cmp_ao(i, cnt, expecting_inc) \ +#define __cmp_ao(i, cnt, e_cnt) \ do { \ if (before->key_cnts[i].cnt > after->key_cnts[i].cnt) { \ test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64 " for key %u:%u", \ @@ -461,16 +549,16 @@ do { \ before->key_cnts[i].rcvid); \ return -1; \ } \ - if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != (expecting_inc)) { \ + if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != !!(expected & e_cnt)) { \ test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64 " for key %u:%u", \ - tst_name ?: "", (expecting_inc) ? "" : "not ",\ + tst_name ?: "", (expected & e_cnt) ? "" : "not ",\ before->key_cnts[i].cnt, \ after->key_cnts[i].cnt, \ before->key_cnts[i].sndid, \ before->key_cnts[i].rcvid); \ return -1; \ } \ -} while(0) +} while (0) if (before->nr_keys != after->nr_keys) { test_fail("%s: Keys changed on the socket %zu != %zu", @@ -485,20 +573,22 @@ do { \ continue; if (rcvid >= 0 && before->key_cnts[i].rcvid != rcvid) continue; - __cmp_ao(i, pkt_good, !!(expected & TEST_CNT_KEY_GOOD)); - __cmp_ao(i, pkt_bad, !!(expected & TEST_CNT_KEY_BAD)); + __cmp_ao(i, pkt_good, TEST_CNT_KEY_GOOD); + __cmp_ao(i, pkt_bad, TEST_CNT_KEY_BAD); } return 0; #undef __cmp_ao } -void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts) +void test_tcp_counters_free(struct tcp_counters *cnts) { - free(cnts->key_cnts); + free(cnts->ao.key_cnts); } #define TEST_BUF_SIZE 4096 -ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) +static ssize_t _test_server_run(int sk, ssize_t quota, struct tcp_counters *c, + test_cnt cond, volatile int *err, + time_t timeout_sec) { ssize_t total = 0; @@ -507,7 +597,7 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) ssize_t bytes, sent; int ret; - ret = test_wait_fd(sk, timeout_sec, 0); + ret = __test_skpair_poll(sk, 0, timeout_sec, c, cond, err); if (ret) return ret; @@ -518,7 +608,7 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) if (bytes == 0) break; - ret = test_wait_fd(sk, timeout_sec, 1); + ret = __test_skpair_poll(sk, 1, timeout_sec, c, cond, err); if (ret) return ret; @@ -533,13 +623,41 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) return total; } -ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, - const size_t msg_len, time_t timeout_sec) +ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) +{ + return _test_server_run(sk, quota, NULL, 0, NULL, + timeout_sec ?: TEST_TIMEOUT_SEC); +} + +int test_skpair_server(int sk, ssize_t quota, test_cnt cond, volatile int *err) +{ + struct tcp_counters c; + ssize_t ret; + + *err = 0; + if (test_get_tcp_counters(sk, &c)) + test_error("test_get_tcp_counters()"); + synchronize_threads(); /* 1: init skpair & read nscounters */ + + ret = _test_server_run(sk, quota, &c, cond, err, TEST_TIMEOUT_SEC); + test_tcp_counters_free(&c); + return ret; +} + +static ssize_t test_client_loop(int sk, size_t buf_sz, const size_t msg_len, + struct tcp_counters *c, test_cnt cond, + volatile int *err) { char msg[msg_len]; int nodelay = 1; + char *buf; size_t i; + buf = alloca(buf_sz); + if (!buf) + return -ENOMEM; + randomize_buffer(buf, buf_sz); + if (setsockopt(sk, IPPROTO_TCP, TCP_NODELAY, &nodelay, sizeof(nodelay))) test_error("setsockopt(TCP_NODELAY)"); @@ -547,7 +665,7 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, size_t sent, bytes = min(msg_len, buf_sz - i); int ret; - ret = test_wait_fd(sk, timeout_sec, 1); + ret = __test_skpair_poll(sk, 1, TEST_TIMEOUT_SEC, c, cond, err); if (ret) return ret; @@ -561,7 +679,8 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, do { ssize_t got; - ret = test_wait_fd(sk, timeout_sec, 0); + ret = __test_skpair_poll(sk, 0, TEST_TIMEOUT_SEC, + c, cond, err); if (ret) return ret; @@ -580,15 +699,31 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, return i; } -int test_client_verify(int sk, const size_t msg_len, const size_t nr, - time_t timeout_sec) +int test_client_verify(int sk, const size_t msg_len, const size_t nr) { size_t buf_sz = msg_len * nr; - char *buf = alloca(buf_sz); ssize_t ret; - randomize_buffer(buf, buf_sz); - ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec); + ret = test_client_loop(sk, buf_sz, msg_len, NULL, 0, NULL); + if (ret < 0) + return (int)ret; + return ret != buf_sz ? -1 : 0; +} + +int test_skpair_client(int sk, const size_t msg_len, const size_t nr, + test_cnt cond, volatile int *err) +{ + struct tcp_counters c; + size_t buf_sz = msg_len * nr; + ssize_t ret; + + *err = 0; + if (test_get_tcp_counters(sk, &c)) + test_error("test_get_tcp_counters()"); + synchronize_threads(); /* 1: init skpair & read nscounters */ + + ret = test_client_loop(sk, buf_sz, msg_len, &c, cond, err); + test_tcp_counters_free(&c); if (ret < 0) return (int)ret; return ret != buf_sz ? -1 : 0; diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c index ecc6f1e3a414..9a059b6c4523 100644 --- a/tools/testing/selftests/net/tcp_ao/restore.c +++ b/tools/testing/selftests/net/tcp_ao/restore.c @@ -16,11 +16,11 @@ const size_t quota = nr_packets * msg_len; static void try_server_run(const char *tst_name, unsigned int port, fault_t inj, test_cnt cnt_expected) { + test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected; const char *cnt_name = "TCPAOGood"; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; uint64_t before_cnt, after_cnt; - int sk, lsk; - time_t timeout; + int sk, lsk, dummy; ssize_t bytes; if (fault(TIMEOUT)) @@ -48,11 +48,10 @@ static void try_server_run(const char *tst_name, unsigned int port, } before_cnt = netstat_get_one(cnt_name, NULL); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - bytes = test_server_run(sk, quota, timeout); + bytes = test_skpair_server(sk, quota, poll_cnt, &dummy); if (fault(TIMEOUT)) { if (bytes > 0) test_fail("%s: server served: %zd", tst_name, bytes); @@ -65,17 +64,17 @@ static void try_server_run(const char *tst_name, unsigned int port, test_ok("%s: server alive", tst_name); } synchronize_threads(); /* 3: counters checks */ - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); after_cnt = netstat_get_one(cnt_name, NULL); - test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); if (after_cnt <= before_cnt) { - test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64, - tst_name, cnt_name, after_cnt, before_cnt); + test_fail("%s(server): %s counter did not increase: %" PRIu64 " <= %" PRIu64, + tst_name, cnt_name, after_cnt, before_cnt); } else { - test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64, + test_ok("%s(server): counter %s increased %" PRIu64 " => %" PRIu64, tst_name, cnt_name, before_cnt, after_cnt); } @@ -92,16 +91,16 @@ static void *server_fn(void *arg) { unsigned int port = test_server_port; - try_server_run("TCP-AO migrate to another socket", port++, + try_server_run("TCP-AO migrate to another socket (server)", port++, 0, TEST_CNT_GOOD); - try_server_run("TCP-AO with wrong send ISN", port++, + try_server_run("TCP-AO with wrong send ISN (server)", port++, FAULT_TIMEOUT, TEST_CNT_BAD); - try_server_run("TCP-AO with wrong receive ISN", port++, + try_server_run("TCP-AO with wrong receive ISN (server)", port++, FAULT_TIMEOUT, TEST_CNT_BAD); - try_server_run("TCP-AO with wrong send SEQ ext number", port++, + try_server_run("TCP-AO with wrong send SEQ ext number (server)", port++, FAULT_TIMEOUT, TEST_CNT_BAD); - try_server_run("TCP-AO with wrong receive SEQ ext number", port++, - FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD); + try_server_run("TCP-AO with wrong receive SEQ ext number (server)", + port++, FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD); synchronize_threads(); /* don't race to exit: client exits */ return NULL; @@ -125,7 +124,7 @@ static void test_get_sk_checkpoint(unsigned int server_port, sockaddr_af *saddr, test_error("failed to connect()"); synchronize_threads(); /* 2: accepted => send data */ - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk, msg_len, nr_packets)) test_fail("pre-migrate verify failed"); test_enable_repair(sk); @@ -139,11 +138,11 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port, struct tcp_ao_repair *ao_img, fault_t inj, test_cnt cnt_expected) { + test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected; const char *cnt_name = "TCPAOGood"; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; uint64_t before_cnt, after_cnt; - time_t timeout; - int sk; + int sk, dummy; if (fault(TIMEOUT)) cnt_name = "TCPAOBad"; @@ -159,30 +158,30 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port, test_error("setsockopt(TCP_AO_ADD_KEY)"); test_ao_restore(sk, ao_img); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); test_disable_repair(sk); test_sock_state_free(img); - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - if (test_client_verify(sk, msg_len, nr_packets, timeout)) { + if (test_skpair_client(sk, msg_len, nr_packets, poll_cnt, &dummy)) { if (fault(TIMEOUT)) test_ok("%s: post-migrate connection is broken", tst_name); else test_fail("%s: post-migrate connection is working", tst_name); } else { if (fault(TIMEOUT)) - test_fail("%s: post-migrate connection still working", tst_name); + test_fail("%s: post-migrate connection is working", tst_name); else test_ok("%s: post-migrate connection is alive", tst_name); } + synchronize_threads(); /* 3: counters checks */ - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); after_cnt = netstat_get_one(cnt_name, NULL); - test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); if (after_cnt <= before_cnt) { test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64, @@ -203,7 +202,7 @@ static void *client_fn(void *arg) sockaddr_af saddr; test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); - test_sk_restore("TCP-AO migrate to another socket", port++, + test_sk_restore("TCP-AO migrate to another socket (client)", port++, &saddr, &tcp_img, &ao_img, 0, TEST_CNT_GOOD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); @@ -212,7 +211,7 @@ static void *client_fn(void *arg) -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1); trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr, port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1); - test_sk_restore("TCP-AO with wrong send ISN", port++, + test_sk_restore("TCP-AO with wrong send ISN (client)", port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); @@ -221,7 +220,7 @@ static void *client_fn(void *arg) -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1); trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr, port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1); - test_sk_restore("TCP-AO with wrong receive ISN", port++, + test_sk_restore("TCP-AO with wrong receive ISN (client)", port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); @@ -229,8 +228,8 @@ static void *client_fn(void *arg) trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest, -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1); /* not expecting server => client mismatches as only snd sne is broken */ - test_sk_restore("TCP-AO with wrong send SEQ ext number", port++, - &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, + test_sk_restore("TCP-AO with wrong send SEQ ext number (client)", + port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); @@ -238,8 +237,8 @@ static void *client_fn(void *arg) /* not expecting client => server mismatches as only rcv sne is broken */ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr, port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1); - test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++, - &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, + test_sk_restore("TCP-AO with wrong receive SEQ ext number (client)", + port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_NS_GOOD | TEST_CNT_BAD); return NULL; diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c index 6364facaa63e..883cddf377cf 100644 --- a/tools/testing/selftests/net/tcp_ao/rst.c +++ b/tools/testing/selftests/net/tcp_ao/rst.c @@ -84,15 +84,15 @@ static void close_forced(int sk) static void test_server_active_rst(unsigned int port) { - struct tcp_ao_counters cnt1, cnt2; + struct tcp_counters cnt1, cnt2; ssize_t bytes; int sk, lsk; lsk = test_listen_socket(this_ip_addr, port, backlog); if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) test_error("setsockopt(TCP_AO_ADD_KEY)"); - if (test_get_tcp_ao_counters(lsk, &cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(lsk, &cnt1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 1: MKT added */ if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) @@ -103,8 +103,8 @@ static void test_server_active_rst(unsigned int port) test_error("accept()"); synchronize_threads(); /* 2: connection accept()ed, another queued */ - if (test_get_tcp_ao_counters(lsk, &cnt2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(lsk, &cnt2)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 3: close listen socket */ close(lsk); @@ -120,7 +120,7 @@ static void test_server_active_rst(unsigned int port) synchronize_threads(); /* 5: closed active sk */ synchronize_threads(); /* 6: counters checks */ - if (test_tcp_ao_counters_cmp("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD)) + if (test_assert_counters("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD)) test_fail("MKT counters (server) have not only good packets"); else test_ok("MKT counters are good on server"); @@ -128,7 +128,7 @@ static void test_server_active_rst(unsigned int port) static void test_server_passive_rst(unsigned int port) { - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; int sk, lsk; ssize_t bytes; @@ -147,8 +147,8 @@ static void test_server_passive_rst(unsigned int port) synchronize_threads(); /* 2: accepted => send data */ close(lsk); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); if (bytes != quota) { @@ -160,12 +160,12 @@ static void test_server_passive_rst(unsigned int port) synchronize_threads(); /* 3: checkpoint the client */ synchronize_threads(); /* 4: close the server, creating twsk */ - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); close(sk); synchronize_threads(); /* 5: restore the socket, send more data */ - test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD); + test_assert_counters("passive RST server", &cnt1, &cnt2, TEST_CNT_GOOD); synchronize_threads(); /* 6: server exits */ } @@ -271,8 +271,7 @@ static void test_client_active_rst(unsigned int port) synchronize_threads(); /* 1: MKT added */ for (i = 0; i < last; i++) { - err = _test_connect_socket(sk[i], this_ip_dest, port, - (i == 0) ? TEST_TIMEOUT_SEC : -1); + err = _test_connect_socket(sk[i], this_ip_dest, port, i != 0); if (err < 0) test_error("failed to connect()"); } @@ -283,12 +282,12 @@ static void test_client_active_rst(unsigned int port) test_error("test_wait_fds(): %d", err); /* async connect() with third sk to get into request_sock_queue */ - err = _test_connect_socket(sk[last], this_ip_dest, port, -1); + err = _test_connect_socket(sk[last], this_ip_dest, port, 1); if (err < 0) test_error("failed to connect()"); synchronize_threads(); /* 3: close listen socket */ - if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk[0], packet_sz, quota / packet_sz)) test_fail("Failed to send data on connected socket"); else test_ok("Verified established tcp connection"); @@ -323,7 +322,7 @@ static void test_client_active_rst(unsigned int port) static void test_client_passive_rst(unsigned int port) { - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; struct tcp_ao_repair ao_img; struct tcp_sock_state img; sockaddr_af saddr; @@ -341,7 +340,7 @@ static void test_client_passive_rst(unsigned int port) test_error("failed to connect()"); synchronize_threads(); /* 2: accepted => send data */ - if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk, packet_sz, quota / packet_sz)) test_fail("Failed to send data on connected socket"); else test_ok("Verified established tcp connection"); @@ -397,8 +396,8 @@ static void test_client_passive_rst(unsigned int port) test_error("setsockopt(TCP_AO_ADD_KEY)"); test_ao_restore(sk, &ao_img); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); test_disable_repair(sk); test_sock_state_free(&img); @@ -417,7 +416,7 @@ static void test_client_passive_rst(unsigned int port) * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0, * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0 */ - err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC); + err = test_client_verify(sk, packet_sz, quota / packet_sz); /* Make sure that the connection was reset, not timeouted */ if (err && err == -ECONNRESET) test_ok("client sock was passively reset post-seq-adjust"); @@ -426,12 +425,12 @@ static void test_client_passive_rst(unsigned int port) else test_fail("client sock is yet connected post-seq-adjust"); - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 6: server exits */ close(sk); - test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD); + test_assert_counters("client passive RST", &cnt1, &cnt2, TEST_CNT_GOOD); } static void *client_fn(void *arg) diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c index 3ecd2b58de6a..73b2f2276f3f 100644 --- a/tools/testing/selftests/net/tcp_ao/self-connect.c +++ b/tools/testing/selftests/net/tcp_ao/self-connect.c @@ -30,7 +30,7 @@ static void setup_lo_intf(const char *lo_intf) static void tcp_self_connect(const char *tst, unsigned int port, bool different_keyids, bool check_restore) { - struct tcp_ao_counters before_ao, after_ao; + struct tcp_counters before, after; uint64_t before_aogood, after_aogood; struct netstat *ns_before, *ns_after; const size_t nr_packets = 20; @@ -60,17 +60,17 @@ static void tcp_self_connect(const char *tst, unsigned int port, ns_before = netstat_read(); before_aogood = netstat_get(ns_before, "TCPAOGood", NULL); - if (test_get_tcp_ao_counters(sk, &before_ao)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &before)) + test_error("test_get_tcp_counters()"); if (__test_connect_socket(sk, "lo", (struct sockaddr *)&addr, - sizeof(addr), TEST_TIMEOUT_SEC) < 0) { + sizeof(addr), 0) < 0) { ns_after = netstat_read(); netstat_print_diff(ns_before, ns_after); test_error("failed to connect()"); } - if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, 100, nr_packets)) { test_fail("%s: tcp connection verify failed", tst); close(sk); return; @@ -78,8 +78,8 @@ static void tcp_self_connect(const char *tst, unsigned int port, ns_after = netstat_read(); after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); - if (test_get_tcp_ao_counters(sk, &after_ao)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &after)) + test_error("test_get_tcp_counters()"); if (!check_restore) { /* to debug: netstat_print_diff(ns_before, ns_after); */ netstat_free(ns_before); @@ -93,7 +93,7 @@ static void tcp_self_connect(const char *tst, unsigned int port, return; } - if (test_tcp_ao_counters_cmp(tst, &before_ao, &after_ao, TEST_CNT_GOOD)) { + if (test_assert_counters(tst, &before, &after, TEST_CNT_GOOD)) { close(sk); return; } @@ -136,7 +136,7 @@ static void tcp_self_connect(const char *tst, unsigned int port, test_ao_restore(sk, &ao_img); test_disable_repair(sk); test_sock_state_free(&img); - if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, 100, nr_packets)) { test_fail("%s: tcp connection verify failed", tst); close(sk); return; diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c index 8901a6785dc8..f00245263b20 100644 --- a/tools/testing/selftests/net/tcp_ao/seq-ext.c +++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c @@ -40,7 +40,7 @@ static void test_adjust_seqs(struct tcp_sock_state *img, static int test_sk_restore(struct tcp_sock_state *img, struct tcp_ao_repair *ao_img, sockaddr_af *saddr, const union tcp_addr daddr, unsigned int dport, - struct tcp_ao_counters *cnt) + struct tcp_counters *cnt) { int sk; @@ -54,8 +54,8 @@ static int test_sk_restore(struct tcp_sock_state *img, test_error("setsockopt(TCP_AO_ADD_KEY)"); test_ao_restore(sk, ao_img); - if (test_get_tcp_ao_counters(sk, cnt)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, cnt)) + test_error("test_get_tcp_counters()"); test_disable_repair(sk); test_sock_state_free(img); @@ -65,7 +65,7 @@ static int test_sk_restore(struct tcp_sock_state *img, static void *server_fn(void *arg) { uint64_t before_good, after_good, after_bad; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; struct tcp_sock_state img; struct tcp_ao_repair ao_img; sockaddr_af saddr; @@ -114,7 +114,7 @@ static void *server_fn(void *arg) test_adjust_seqs(&img, &ao_img, true); synchronize_threads(); /* 4: dump finished */ sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, - client_new_port, &ao1); + client_new_port, &cnt1); trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_addr, this_ip_dest, test_server_port + 1, client_new_port, 1); @@ -136,11 +136,11 @@ static void *server_fn(void *arg) } synchronize_threads(); /* 6: verify counters after SEQ-number rollover */ - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); after_good = netstat_get_one("TCPAOGood", NULL); - test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); + test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD); if (after_good <= before_good) { test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64, @@ -173,7 +173,7 @@ out: static void *client_fn(void *arg) { uint64_t before_good, after_good, after_bad; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; struct tcp_sock_state img; struct tcp_ao_repair ao_img; sockaddr_af saddr; @@ -191,7 +191,7 @@ static void *client_fn(void *arg) test_error("failed to connect()"); synchronize_threads(); /* 2: accepted => send data */ - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, msg_len, nr_packets)) { test_fail("pre-migrate verify failed"); return NULL; } @@ -213,20 +213,20 @@ static void *client_fn(void *arg) test_adjust_seqs(&img, &ao_img, false); synchronize_threads(); /* 4: dump finished */ sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, - test_server_port + 1, &ao1); + test_server_port + 1, &cnt1); synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */ - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk, msg_len, nr_packets)) test_fail("post-migrate verify failed"); else test_ok("post-migrate connection alive"); synchronize_threads(); /* 5: verify counters after SEQ-number rollover */ - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); after_good = netstat_get_one("TCPAOGood", NULL); - test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); + test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD); if (after_good <= before_good) { test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64, diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c index f779e5892bc1..a1467b64390a 100644 --- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c +++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c @@ -6,6 +6,7 @@ #define fault(type) (inj == FAULT_ ## type) static const char *md5_password = "Some evil genius, enemy to mankind, must have been the first contriver."; static const char *ao_password = DEFAULT_TEST_PASSWORD; +static volatile int sk_pair; static union tcp_addr client2; static union tcp_addr client3; @@ -41,10 +42,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *cnt_name, test_cnt cnt_expected, int needs_tcp_md5, fault_t inj) { - struct tcp_ao_counters ao_cnt1, ao_cnt2; + struct tcp_counters cnt1, cnt2; uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */ - int lsk, err, sk = 0; - time_t timeout; + test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected; + int lsk, err, sk = -1; if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) return; @@ -63,22 +64,25 @@ static void try_accept(const char *tst_name, unsigned int port, if (cnt_name) before_cnt = netstat_get_one(cnt_name, NULL); - if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (ao_addr && test_get_tcp_counters(lsk, &cnt1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - err = test_wait_fd(lsk, timeout, 0); + err = test_skpair_wait_poll(lsk, 0, poll_cnt, &sk_pair); synchronize_threads(); /* connect()/accept() timeouts */ if (err == -ETIMEDOUT) { + sk_pair = err; if (!fault(TIMEOUT)) - test_fail("timed out for accept()"); + test_fail("%s: timed out for accept()", tst_name); + } else if (err == -EKEYREJECTED) { + if (!fault(KEYREJECT)) + test_fail("%s: key was rejected", tst_name); } else if (err < 0) { - test_error("test_wait_fd()"); + test_error("test_skpair_wait_poll()"); } else { if (fault(TIMEOUT)) - test_fail("ready to accept"); + test_fail("%s: ready to accept", tst_name); sk = accept(lsk, NULL, NULL); if (sk < 0) { @@ -89,8 +93,8 @@ static void try_accept(const char *tst_name, unsigned int port, } } - if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt2)) - test_error("test_get_tcp_ao_counters()"); + if (ao_addr && test_get_tcp_counters(lsk, &cnt2)) + test_error("test_get_tcp_counters()"); close(lsk); if (!cnt_name) { @@ -108,11 +112,11 @@ static void try_accept(const char *tst_name, unsigned int port, tst_name, cnt_name, before_cnt, after_cnt); } if (ao_addr) - test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); out: synchronize_threads(); /* test_kill_sk() */ - if (sk > 0) + if (sk >= 0) test_kill_sk(sk); } @@ -153,78 +157,82 @@ static void *server_fn(void *arg) server_add_routes(); - try_accept("AO server (INADDR_ANY): AO client", port++, NULL, 0, + try_accept("[server] AO server (INADDR_ANY): AO client", port++, NULL, 0, &addr_any, 0, 0, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0); - try_accept("AO server (INADDR_ANY): MD5 client", port++, NULL, 0, + try_accept("[server] AO server (INADDR_ANY): MD5 client", port++, NULL, 0, &addr_any, 0, 0, 100, 100, 0, "TCPMD5Unexpected", - 0, 1, FAULT_TIMEOUT); - try_accept("AO server (INADDR_ANY): no sign client", port++, NULL, 0, + TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT); + try_accept("[server] AO server (INADDR_ANY): no sign client", port++, NULL, 0, &addr_any, 0, 0, 100, 100, 0, "TCPAORequired", TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT); - try_accept("AO server (AO_REQUIRED): AO client", port++, NULL, 0, + try_accept("[server] AO server (AO_REQUIRED): AO client", port++, NULL, 0, &this_ip_dest, TEST_PREFIX, true, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0); - try_accept("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, + try_accept("[server] AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, &this_ip_dest, TEST_PREFIX, true, 100, 100, 0, "TCPAORequired", TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT); - try_accept("MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0, + try_accept("[server] MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0, NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound", - 0, 1, FAULT_TIMEOUT); - try_accept("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, + TEST_CNT_NS_KEY_NOT_FOUND, 1, FAULT_TIMEOUT); + try_accept("[server] MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0); - try_accept("MD5 server (INADDR_ANY): no sign client", port++, &addr_any, + try_accept("[server] MD5 server (INADDR_ANY): no sign client", port++, &addr_any, 0, NULL, 0, 0, 0, 0, 0, "TCPMD5NotFound", - 0, 1, FAULT_TIMEOUT); + TEST_CNT_NS_MD5_NOT_FOUND, 1, FAULT_TIMEOUT); - try_accept("no sign server: AO client", port++, NULL, 0, + try_accept("[server] no sign server: AO client", port++, NULL, 0, NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound", - TEST_CNT_AO_KEY_NOT_FOUND, 0, FAULT_TIMEOUT); - try_accept("no sign server: MD5 client", port++, NULL, 0, + TEST_CNT_NS_KEY_NOT_FOUND, 0, FAULT_TIMEOUT); + try_accept("[server] no sign server: MD5 client", port++, NULL, 0, NULL, 0, 0, 0, 0, 0, "TCPMD5Unexpected", - 0, 1, FAULT_TIMEOUT); - try_accept("no sign server: no sign client", port++, NULL, 0, + TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT); + try_accept("[server] no sign server: no sign client", port++, NULL, 0, NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0); - try_accept("AO+MD5 server: AO client (matching)", port++, + try_accept("[server] AO+MD5 server: AO client (matching)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 1, 0); - try_accept("AO+MD5 server: AO client (misconfig, matching MD5)", port++, + try_accept("[server] AO+MD5 server: AO client (misconfig, matching MD5)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: AO client (misconfig, non-matching)", port++, + try_accept("[server] AO+MD5 server: AO client (misconfig, non-matching)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: MD5 client (matching)", port++, + try_accept("[server] AO+MD5 server: MD5 client (matching)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, NULL, 0, 1, 0); - try_accept("AO+MD5 server: MD5 client (misconfig, matching AO)", port++, + try_accept("[server] AO+MD5 server: MD5 client (misconfig, matching AO)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: MD5 client (misconfig, non-matching)", port++, + 100, 100, 0, "TCPMD5Unexpected", + TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT); + try_accept("[server] AO+MD5 server: MD5 client (misconfig, non-matching)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: no sign client (unmatched)", port++, + 100, 100, 0, "TCPMD5Unexpected", + TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT); + try_accept("[server] AO+MD5 server: no sign client (unmatched)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "CurrEstab", 0, 1, 0); - try_accept("AO+MD5 server: no sign client (misconfig, matching AO)", + try_accept("[server] AO+MD5 server: no sign client (misconfig, matching AO)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "TCPAORequired", TEST_CNT_AO_REQUIRED, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: no sign client (misconfig, matching MD5)", + try_accept("[server] AO+MD5 server: no sign client (misconfig, matching MD5)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, "TCPMD5NotFound", 0, 1, FAULT_TIMEOUT); + 100, 100, 0, "TCPMD5NotFound", + TEST_CNT_NS_MD5_NOT_FOUND, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys", + /* Key rejected by the other side, failing short through skpair */ + try_accept("[server] AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys", + 100, 100, 0, NULL, 0, 1, FAULT_KEYREJECT); + try_accept("[server] AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT); + 100, 100, 0, NULL, 0, 1, FAULT_KEYREJECT); server_add_fail_tests(&port); @@ -259,7 +267,6 @@ static void try_connect(const char *tst_name, unsigned int port, uint8_t sndid, uint8_t rcvid, uint8_t vrf, fault_t inj, int needs_tcp_md5, union tcp_addr *bind_addr) { - time_t timeout; int sk, ret; if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) @@ -281,11 +288,10 @@ static void try_connect(const char *tst_name, unsigned int port, synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - ret = _test_connect_socket(sk, this_ip_dest, port, timeout); - + ret = test_skpair_connect_poll(sk, this_ip_dest, port, 0, &sk_pair); synchronize_threads(); /* connect()/accept() timeouts */ if (ret < 0) { + sk_pair = ret; if (fault(KEYREJECT) && ret == -EKEYREJECTED) test_ok("%s: connect() was prevented", tst_name); else if (ret == -ETIMEDOUT && fault(TIMEOUT)) @@ -305,8 +311,7 @@ static void try_connect(const char *tst_name, unsigned int port, out: synchronize_threads(); /* test_kill_sk() */ - /* _test_connect_socket() cleans up on failure */ - if (ret > 0) + if (ret > 0) /* test_skpair_connect_poll() cleans up on failure */ test_kill_sk(sk); } @@ -437,7 +442,6 @@ static void try_to_add(const char *tst_name, unsigned int port, int ao_vrf, uint8_t sndid, uint8_t rcvid, int needs_tcp_md5, fault_t inj) { - time_t timeout; int sk, ret; if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) @@ -450,11 +454,10 @@ static void try_to_add(const char *tst_name, unsigned int port, synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + ret = test_skpair_connect_poll(sk, this_ip_dest, port, 0, &sk_pair); synchronize_threads(); /* connect()/accept() timeouts */ - if (ret <= 0) { + if (ret < 0) { test_error("%s: connect() returned %d", tst_name, ret); goto out; } @@ -490,8 +493,7 @@ static void try_to_add(const char *tst_name, unsigned int port, out: synchronize_threads(); /* test_kill_sk() */ - /* _test_connect_socket() cleans up on failure */ - if (ret > 0) + if (ret > 0) /* test_skpair_connect_poll() cleans up on failure */ test_kill_sk(sk); } diff --git a/tools/testing/selftests/net/test_blackhole_dev.sh b/tools/testing/selftests/net/test_blackhole_dev.sh deleted file mode 100755 index 3119b80e711f..000000000000 --- a/tools/testing/selftests/net/test_blackhole_dev.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# Runs blackhole-dev test using blackhole-dev kernel module - -if /sbin/modprobe -q test_blackhole_dev ; then - /sbin/modprobe -q -r test_blackhole_dev; - echo "test_blackhole_dev: ok"; -else - echo "test_blackhole_dev: [FAIL]"; - exit 1; -fi diff --git a/tools/testing/selftests/net/test_so_rcv.sh b/tools/testing/selftests/net/test_so_rcv.sh new file mode 100755 index 000000000000..d8aa4362879d --- /dev/null +++ b/tools/testing/selftests/net/test_so_rcv.sh @@ -0,0 +1,73 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +HOSTS=("127.0.0.1" "::1") +PORT=1234 +TOTAL_TESTS=0 +FAILED_TESTS=0 + +declare -A TESTS=( + ["SO_RCVPRIORITY"]="-P 2" + ["SO_RCVMARK"]="-M 3" +) + +check_result() { + ((TOTAL_TESTS++)) + if [ "$1" -ne 0 ]; then + ((FAILED_TESTS++)) + fi +} + +cleanup() +{ + cleanup_ns $NS +} + +trap cleanup EXIT + +setup_ns NS + +for HOST in "${HOSTS[@]}"; do + PROTOCOL="IPv4" + if [[ "$HOST" == "::1" ]]; then + PROTOCOL="IPv6" + fi + for test_name in "${!TESTS[@]}"; do + echo "Running $test_name test, $PROTOCOL" + arg=${TESTS[$test_name]} + + ip netns exec $NS ./so_rcv_listener $arg $HOST $PORT & + LISTENER_PID=$! + + sleep 0.5 + + if ! ip netns exec $NS ./cmsg_sender $arg $HOST $PORT; then + echo "Sender failed for $test_name, $PROTOCOL" + kill "$LISTENER_PID" 2>/dev/null + wait "$LISTENER_PID" + check_result 1 + continue + fi + + wait "$LISTENER_PID" + LISTENER_EXIT_CODE=$? + + if [ "$LISTENER_EXIT_CODE" -eq 0 ]; then + echo "Rcv test OK for $test_name, $PROTOCOL" + check_result 0 + else + echo "Rcv test FAILED for $test_name, $PROTOCOL" + check_result 1 + fi + done +done + +if [ "$FAILED_TESTS" -ne 0 ]; then + echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed" + exit ${KSFT_FAIL} +else + echo "OK - All $TOTAL_TESTS tests passed" + exit ${KSFT_PASS} +fi diff --git a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh index 2d442cdab11e..062f957950af 100755 --- a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh +++ b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh @@ -1,29 +1,114 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# Check FDB default-remote handling across "ip link set". +ALL_TESTS=" + test_set_remote + test_change_mc_remote +" +source lib.sh check_remotes() { local what=$1; shift local N=$(bridge fdb sh dev vx | grep 00:00:00:00:00:00 | wc -l) - echo -ne "expected two remotes after $what\t" - if [[ $N != 2 ]]; then - echo "[FAIL]" - EXIT_STATUS=1 + ((N == 2)) + check_err $? "expected 2 remotes after $what, got $N" +} + +# Check FDB default-remote handling across "ip link set". +test_set_remote() +{ + RET=0 + + ip_link_add vx up type vxlan id 2000 dstport 4789 + bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent + bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent + check_remotes "fdb append" + + ip link set dev vx type vxlan remote 192.0.2.30 + check_remotes "link set" + + log_test 'FDB default-remote handling across "ip link set"' +} + +fmt_remote() +{ + local addr=$1; shift + + if [[ $addr == 224.* ]]; then + echo "group $addr" else - echo "[ OK ]" + echo "remote $addr" fi } -ip link add name vx up type vxlan id 2000 dstport 4789 -bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent -bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent -check_remotes "fdb append" +change_remote() +{ + local remote=$1; shift + + ip link set dev vx type vxlan $(fmt_remote $remote) dev v1 +} + +check_membership() +{ + local check_vec=("$@") + + local memberships + memberships=$( + netstat -n --groups | + sed -n '/^v1\b/p' | + grep -o '[^ ]*$' + ) + check_err $? "Couldn't obtain group memberships" + + local item + for item in "${check_vec[@]}"; do + eval "local $item" + echo "$memberships" | grep -q "\b$group\b" + check_err_fail $fail $? "$group is_ex reported in IGMP query response" + done +} + +test_change_mc_remote() +{ + check_command netstat || return + + ip_link_add v1 up type veth peer name v2 + ip_link_set_up v2 + + RET=0 + + ip_link_add vx up type vxlan dstport 4789 \ + local 192.0.2.1 $(fmt_remote 224.1.1.1) dev v1 vni 1000 + + check_membership "group=224.1.1.1 fail=0" \ + "group=224.1.1.2 fail=1" \ + "group=224.1.1.3 fail=1" + + log_test "MC group report after VXLAN creation" + + RET=0 + + change_remote 224.1.1.2 + check_membership "group=224.1.1.1 fail=1" \ + "group=224.1.1.2 fail=0" \ + "group=224.1.1.3 fail=1" + + log_test "MC group report after changing VXLAN remote MC->MC" + + RET=0 + + change_remote 192.0.2.2 + check_membership "group=224.1.1.1 fail=1" \ + "group=224.1.1.2 fail=1" \ + "group=224.1.1.3 fail=1" + + log_test "MC group report after changing VXLAN remote MC->UC" +} + +trap defer_scopes_cleanup EXIT -ip link set dev vx type vxlan remote 192.0.2.30 -check_remotes "link set" +tests_run -ip link del dev vx exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk index 12e7cae251be..e907c2751956 100644 --- a/tools/testing/selftests/net/ynl.mk +++ b/tools/testing/selftests/net/ynl.mk @@ -27,7 +27,8 @@ $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig: $(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig $(Q)rm -f $(top_srcdir)/tools/net/ynl/libynl.a - $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a + $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl \ + GENS="$(YNL_GENS)" RSTS="" libynl.a $(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a EXTRA_CLEAN += \ diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 7d14a7c0cb62..58bcbbd029bc 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -47,6 +47,7 @@ XARCH_riscv = riscv64 XARCH = $(or $(XARCH_$(ARCH)),$(ARCH)) # map from user input variants to their kernel supported architectures +ARCH_armthumb = arm ARCH_ppc = powerpc ARCH_ppc64 = powerpc ARCH_ppc64le = powerpc @@ -54,6 +55,7 @@ ARCH_mips32le = mips ARCH_mips32be = mips ARCH_riscv32 = riscv ARCH_riscv64 = riscv +ARCH_s390x = s390 ARCH := $(or $(ARCH_$(XARCH)),$(XARCH)) # kernel image names by architecture @@ -62,6 +64,7 @@ IMAGE_x86_64 = arch/x86/boot/bzImage IMAGE_x86 = arch/x86/boot/bzImage IMAGE_arm64 = arch/arm64/boot/Image IMAGE_arm = arch/arm/boot/zImage +IMAGE_armthumb = arch/arm/boot/zImage IMAGE_mips32le = vmlinuz IMAGE_mips32be = vmlinuz IMAGE_ppc = vmlinux @@ -70,6 +73,7 @@ IMAGE_ppc64le = arch/powerpc/boot/zImage IMAGE_riscv = arch/riscv/boot/Image IMAGE_riscv32 = arch/riscv/boot/Image IMAGE_riscv64 = arch/riscv/boot/Image +IMAGE_s390x = arch/s390/boot/bzImage IMAGE_s390 = arch/s390/boot/bzImage IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi IMAGE = $(objtree)/$(IMAGE_$(XARCH)) @@ -81,19 +85,20 @@ DEFCONFIG_x86_64 = defconfig DEFCONFIG_x86 = defconfig DEFCONFIG_arm64 = defconfig DEFCONFIG_arm = multi_v7_defconfig +DEFCONFIG_armthumb = multi_v7_defconfig DEFCONFIG_mips32le = malta_defconfig -DEFCONFIG_mips32be = malta_defconfig +DEFCONFIG_mips32be = malta_defconfig generic/eb.config DEFCONFIG_ppc = pmac32_defconfig DEFCONFIG_ppc64 = powernv_be_defconfig DEFCONFIG_ppc64le = powernv_defconfig DEFCONFIG_riscv = defconfig DEFCONFIG_riscv32 = rv32_defconfig DEFCONFIG_riscv64 = defconfig -DEFCONFIG_s390 = defconfig +DEFCONFIG_s390x = defconfig +DEFCONFIG_s390 = defconfig compat.config DEFCONFIG_loongarch = defconfig DEFCONFIG = $(DEFCONFIG_$(XARCH)) -EXTRACONFIG_mips32be = -d CONFIG_CPU_LITTLE_ENDIAN -e CONFIG_CPU_BIG_ENDIAN EXTRACONFIG = $(EXTRACONFIG_$(XARCH)) # optional tests to run (default = all) @@ -105,6 +110,7 @@ QEMU_ARCH_x86_64 = x86_64 QEMU_ARCH_x86 = x86_64 QEMU_ARCH_arm64 = aarch64 QEMU_ARCH_arm = arm +QEMU_ARCH_armthumb = arm QEMU_ARCH_mips32le = mipsel # works with malta_defconfig QEMU_ARCH_mips32be = mips QEMU_ARCH_ppc = ppc @@ -113,6 +119,7 @@ QEMU_ARCH_ppc64le = ppc64 QEMU_ARCH_riscv = riscv64 QEMU_ARCH_riscv32 = riscv32 QEMU_ARCH_riscv64 = riscv64 +QEMU_ARCH_s390x = s390x QEMU_ARCH_s390 = s390x QEMU_ARCH_loongarch = loongarch64 QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) @@ -133,6 +140,7 @@ QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $( QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_armthumb = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_mips32le = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_mips32be = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" @@ -141,6 +149,7 @@ QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_riscv32 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_s390x = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA) @@ -156,15 +165,18 @@ Q=@ endif CFLAGS_i386 = $(call cc-option,-m32) +CFLAGS_arm = -marm +CFLAGS_armthumb = -mthumb -march=armv6t2 CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2) -CFLAGS_s390 = -m64 +CFLAGS_s390x = -m64 +CFLAGS_s390 = -m31 CFLAGS_mips32le = -EL -mabi=32 -fPIC CFLAGS_mips32be = -EB -mabi=32 CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all)) CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ - $(call cc-option,-fno-stack-protector) \ + $(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \ $(CFLAGS_$(XARCH)) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_EXTRA) LDFLAGS := @@ -220,7 +232,7 @@ all: run sysroot: sysroot/$(ARCH)/include -sysroot/$(ARCH)/include: +sysroot/$(ARCH)/include: | defconfig $(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot $(QUIET_MKDIR)mkdir -p sysroot $(Q)$(MAKE) -C $(srctree) outputmakefile @@ -264,16 +276,16 @@ initramfs: nolibc-test $(Q)cp nolibc-test initramfs/init defconfig: - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) mrproper $(DEFCONFIG) prepare + $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(DEFCONFIG) $(Q)if [ -n "$(EXTRACONFIG)" ]; then \ $(srctree)/scripts/config --file $(objtree)/.config $(EXTRACONFIG); \ $(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \ fi -kernel: +kernel: | defconfig $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null -kernel-standalone: initramfs +kernel-standalone: initramfs | defconfig $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null # run the tests after building the kernel diff --git a/tools/testing/selftests/nolibc/nolibc-test-linkage.c b/tools/testing/selftests/nolibc/nolibc-test-linkage.c index 5ff4c8a1db2a..a7ca8325863f 100644 --- a/tools/testing/selftests/nolibc/nolibc-test-linkage.c +++ b/tools/testing/selftests/nolibc/nolibc-test-linkage.c @@ -11,16 +11,16 @@ void *linkage_test_errno_addr(void) return &errno; } -int linkage_test_constructor_test_value; +int linkage_test_constructor_test_value = 0; __attribute__((constructor)) static void constructor1(void) { - linkage_test_constructor_test_value = 2; + linkage_test_constructor_test_value |= 1 << 0; } __attribute__((constructor)) static void constructor2(void) { - linkage_test_constructor_test_value *= 3; + linkage_test_constructor_test_value |= 1 << 1; } diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 0e0e3b48a8c3..5884a891c491 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -43,6 +43,8 @@ #endif #endif +#pragma GCC diagnostic ignored "-Wmissing-prototypes" + #include "nolibc-test-linkage.h" /* for the type of int_fast16_t and int_fast32_t, musl differs from glibc and nolibc */ @@ -690,14 +692,14 @@ int expect_strtox(int llen, void *func, const char *input, int base, intmax_t ex __attribute__((constructor)) static void constructor1(void) { - constructor_test_value = 1; + constructor_test_value |= 1 << 0; } __attribute__((constructor)) static void constructor2(int argc, char **argv, char **envp) { if (argc && argv && envp) - constructor_test_value *= 2; + constructor_test_value |= 1 << 1; } int run_startup(int min, int max) @@ -736,9 +738,9 @@ int run_startup(int min, int max) CASE_TEST(environ_HOME); EXPECT_PTRNZ(1, getenv("HOME")); break; CASE_TEST(auxv_addr); EXPECT_PTRGT(test_auxv != (void *)-1, test_auxv, brk); break; CASE_TEST(auxv_AT_UID); EXPECT_EQ(1, getauxval(AT_UID), getuid()); break; - CASE_TEST(constructor); EXPECT_EQ(1, constructor_test_value, 2); break; + CASE_TEST(constructor); EXPECT_EQ(is_nolibc, constructor_test_value, 0x3); break; CASE_TEST(linkage_errno); EXPECT_PTREQ(1, linkage_test_errno_addr(), &errno); break; - CASE_TEST(linkage_constr); EXPECT_EQ(1, linkage_test_constructor_test_value, 6); break; + CASE_TEST(linkage_constr); EXPECT_EQ(1, linkage_test_constructor_test_value, 0x3); break; case __LINE__: return ret; /* must be last */ /* note: do not set any defaults so as to permit holes above */ @@ -767,6 +769,44 @@ int test_getdents64(const char *dir) return ret; } +static int test_dirent(void) +{ + int comm = 0, cmdline = 0; + struct dirent dirent, *result; + DIR *dir; + int ret; + + dir = opendir("/proc/self"); + if (!dir) + return 1; + + while (1) { + errno = 0; + ret = readdir_r(dir, &dirent, &result); + if (ret != 0) + return 1; + if (!result) + break; + + if (strcmp(dirent.d_name, "comm") == 0) + comm++; + else if (strcmp(dirent.d_name, "cmdline") == 0) + cmdline++; + } + + if (errno) + return 1; + + ret = closedir(dir); + if (ret) + return 1; + + if (comm != 1 || cmdline != 1) + return 1; + + return 0; +} + int test_getpagesize(void) { int x = getpagesize(); @@ -988,6 +1028,22 @@ int test_rlimit(void) return 0; } +int test_openat(void) +{ + int dev, null; + + dev = openat(AT_FDCWD, "/dev", O_DIRECTORY); + if (dev < 0) + return -1; + + null = openat(dev, "null", O_RDONLY); + close(dev); + if (null < 0) + return -1; + + close(null); + return 0; +} /* Run syscall tests between IDs <min> and <max>. * Return 0 on success, non-zero on failure. @@ -1059,6 +1115,7 @@ int run_syscall(int min, int max) CASE_TEST(fork); EXPECT_SYSZR(1, test_fork()); break; CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break; CASE_TEST(getdents64_null); EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break; + CASE_TEST(directories); EXPECT_SYSZR(proc, test_dirent()); break; CASE_TEST(gettimeofday_tv); EXPECT_SYSZR(1, gettimeofday(&tv, NULL)); break; CASE_TEST(gettimeofday_tv_tz);EXPECT_SYSZR(1, gettimeofday(&tv, &tz)); break; CASE_TEST(getpagesize); EXPECT_SYSZR(1, test_getpagesize()); break; @@ -1073,8 +1130,9 @@ int run_syscall(int min, int max) CASE_TEST(mmap_bad); EXPECT_PTRER(1, mmap(NULL, 0, PROT_READ, MAP_PRIVATE, 0, 0), MAP_FAILED, EINVAL); break; CASE_TEST(munmap_bad); EXPECT_SYSER(1, munmap(NULL, 0), -1, EINVAL); break; CASE_TEST(mmap_munmap_good); EXPECT_SYSZR(1, test_mmap_munmap()); break; - CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", 0), -1); if (tmp != -1) close(tmp); break; - CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", 0), -1, ENOENT); if (tmp != -1) close(tmp); break; + CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", O_RDONLY), -1); if (tmp != -1) close(tmp); break; + CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", O_RDONLY), -1, ENOENT); if (tmp != -1) close(tmp); break; + CASE_TEST(openat_dir); EXPECT_SYSZR(1, test_openat()); break; CASE_TEST(pipe); EXPECT_SYSZR(1, test_pipe()); break; CASE_TEST(poll_null); EXPECT_SYSZR(1, poll(NULL, 0, 0)); break; CASE_TEST(poll_stdout); EXPECT_SYSNE(1, ({ struct pollfd fds = { 1, POLLOUT, 0}; poll(&fds, 1, 0); }), -1); break; @@ -1284,6 +1342,73 @@ static int expect_vfprintf(int llen, int c, const char *expected, const char *fm return ret; } +static int test_scanf(void) +{ + unsigned long long ull; + unsigned long ul; + unsigned int u; + long long ll; + long l; + void *p; + int i; + + /* return __LINE__ to point to the specific failure */ + + /* test EOF */ + if (sscanf("", "foo") != EOF) + return __LINE__; + + /* test simple literal without placeholder */ + if (sscanf("foo", "foo") != 0) + return __LINE__; + + /* test single placeholder */ + if (sscanf("123", "%d", &i) != 1) + return __LINE__; + + if (i != 123) + return __LINE__; + + /* test multiple place holders and separators */ + if (sscanf("a123b456c0x90", "a%db%uc%p", &i, &u, &p) != 3) + return __LINE__; + + if (i != 123) + return __LINE__; + + if (u != 456) + return __LINE__; + + if (p != (void *)0x90) + return __LINE__; + + /* test space handling */ + if (sscanf("a b1", "a b%d", &i) != 1) + return __LINE__; + + if (i != 1) + return __LINE__; + + /* test literal percent */ + if (sscanf("a%1", "a%%%d", &i) != 1) + return __LINE__; + + if (i != 1) + return __LINE__; + + /* test stdint.h types */ + if (sscanf("1|2|3|4|5|6", + "%d|%ld|%lld|%u|%lu|%llu", + &i, &l, &ll, &u, &ul, &ull) != 6) + return __LINE__; + + if (i != 1 || l != 2 || ll != 3 || + u != 4 || ul != 5 || ull != 6) + return __LINE__; + + return 0; +} + static int run_vfprintf(int min, int max) { int test; @@ -1305,6 +1430,7 @@ static int run_vfprintf(int min, int max) CASE_TEST(char); EXPECT_VFPRINTF(1, "c", "%c", 'c'); break; CASE_TEST(hex); EXPECT_VFPRINTF(1, "f", "%x", 0xf); break; CASE_TEST(pointer); EXPECT_VFPRINTF(3, "0x1", "%p", (void *) 0x1); break; + CASE_TEST(scanf); EXPECT_ZR(1, test_scanf()); break; case __LINE__: return ret; /* must be last */ /* note: do not set any defaults so as to permit holes above */ diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index 9c5160c53881..0299a0912d40 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -17,7 +17,16 @@ perform_download=0 test_mode=system werror=1 llvm= -archs="i386 x86_64 arm64 arm mips32le mips32be ppc ppc64 ppc64le riscv32 riscv64 s390 loongarch" +all_archs=( + i386 x86_64 + arm64 arm armthumb + mips32le mips32be + ppc ppc64 ppc64le + riscv32 riscv64 + s390x s390 + loongarch +) +archs="${all_archs[@]}" TEMP=$(getopt -o 'j:d:c:b:a:m:pelh' -n "$0" -- "$@") @@ -94,19 +103,21 @@ fi crosstool_arch() { case "$1" in arm64) echo aarch64;; + armthumb) echo arm;; ppc) echo powerpc;; ppc64) echo powerpc64;; ppc64le) echo powerpc64;; riscv) echo riscv64;; loongarch) echo loongarch64;; mips*) echo mips;; + s390*) echo s390;; *) echo "$1";; esac } crosstool_abi() { case "$1" in - arm) echo linux-gnueabi;; + arm | armthumb) echo linux-gnueabi;; *) echo linux;; esac } @@ -157,10 +168,6 @@ test_arch() { fi MAKE=(make -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" LLVM="${llvm}" O="${build_dir}") - mkdir -p "$build_dir" - if [ "$test_mode" = "system" ] && [ ! -f "${build_dir}/.config" ]; then - swallow_output "${MAKE[@]}" defconfig - fi case "$test_mode" in 'system') test_target=run @@ -173,6 +180,13 @@ test_arch() { exit 1 esac printf '%-15s' "$arch:" + if [ "$arch" = "s390" ] && ([ "$llvm" = "1" ] || [ "$test_mode" = "user" ]); then + echo "Unsupported configuration" + return + fi + + mkdir -p "$build_dir" + swallow_output "${MAKE[@]}" defconfig swallow_output "${MAKE[@]}" CFLAGS_EXTRA="$CFLAGS_EXTRA" "$test_target" V=1 cp run.out run.out."${arch}" "${MAKE[@]}" report | grep passed diff --git a/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c index c267b822c108..ac26481d29d9 100644 --- a/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c +++ b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c @@ -25,7 +25,7 @@ #define pci_ep_ioctl(cmd, arg) \ ({ \ ret = ioctl(self->fd, cmd, arg); \ - ret = ret < 0 ? -errno : 0; \ + ret = ret < 0 ? -errno : ret; \ }) static const char *test_device = "/dev/pci-endpoint-test.0"; @@ -65,6 +65,8 @@ TEST_F(pci_ep_bar, BAR_TEST) int ret; pci_ep_ioctl(PCITEST_BAR, variant->barno); + if (ret == -ENODATA) + SKIP(return, "BAR is disabled"); EXPECT_FALSE(ret) TH_LOG("Test failed for BAR%d", variant->barno); } @@ -97,9 +99,12 @@ TEST_F(pci_ep_basic, LEGACY_IRQ_TEST) { int ret; - pci_ep_ioctl(PCITEST_SET_IRQTYPE, 0); + pci_ep_ioctl(PCITEST_SET_IRQTYPE, PCITEST_IRQ_TYPE_INTX); ASSERT_EQ(0, ret) TH_LOG("Can't set Legacy IRQ type"); + pci_ep_ioctl(PCITEST_GET_IRQTYPE, 0); + ASSERT_EQ(PCITEST_IRQ_TYPE_INTX, ret) TH_LOG("Can't get Legacy IRQ type"); + pci_ep_ioctl(PCITEST_LEGACY_IRQ, 0); EXPECT_FALSE(ret) TH_LOG("Test failed for Legacy IRQ"); } @@ -108,9 +113,12 @@ TEST_F(pci_ep_basic, MSI_TEST) { int ret, i; - pci_ep_ioctl(PCITEST_SET_IRQTYPE, 1); + pci_ep_ioctl(PCITEST_SET_IRQTYPE, PCITEST_IRQ_TYPE_MSI); ASSERT_EQ(0, ret) TH_LOG("Can't set MSI IRQ type"); + pci_ep_ioctl(PCITEST_GET_IRQTYPE, 0); + ASSERT_EQ(PCITEST_IRQ_TYPE_MSI, ret) TH_LOG("Can't get MSI IRQ type"); + for (i = 1; i <= 32; i++) { pci_ep_ioctl(PCITEST_MSI, i); EXPECT_FALSE(ret) TH_LOG("Test failed for MSI%d", i); @@ -121,9 +129,12 @@ TEST_F(pci_ep_basic, MSIX_TEST) { int ret, i; - pci_ep_ioctl(PCITEST_SET_IRQTYPE, 2); + pci_ep_ioctl(PCITEST_SET_IRQTYPE, PCITEST_IRQ_TYPE_MSIX); ASSERT_EQ(0, ret) TH_LOG("Can't set MSI-X IRQ type"); + pci_ep_ioctl(PCITEST_GET_IRQTYPE, 0); + ASSERT_EQ(PCITEST_IRQ_TYPE_MSIX, ret) TH_LOG("Can't get MSI-X IRQ type"); + for (i = 1; i <= 2048; i++) { pci_ep_ioctl(PCITEST_MSIX, i); EXPECT_FALSE(ret) TH_LOG("Test failed for MSI-X%d", i); @@ -170,8 +181,8 @@ TEST_F(pci_ep_data_transfer, READ_TEST) if (variant->use_dma) param.flags = PCITEST_FLAGS_USE_DMA; - pci_ep_ioctl(PCITEST_SET_IRQTYPE, 1); - ASSERT_EQ(0, ret) TH_LOG("Can't set MSI IRQ type"); + pci_ep_ioctl(PCITEST_SET_IRQTYPE, PCITEST_IRQ_TYPE_AUTO); + ASSERT_EQ(0, ret) TH_LOG("Can't set AUTO IRQ type"); for (i = 0; i < ARRAY_SIZE(test_size); i++) { param.size = test_size[i]; @@ -189,8 +200,8 @@ TEST_F(pci_ep_data_transfer, WRITE_TEST) if (variant->use_dma) param.flags = PCITEST_FLAGS_USE_DMA; - pci_ep_ioctl(PCITEST_SET_IRQTYPE, 1); - ASSERT_EQ(0, ret) TH_LOG("Can't set MSI IRQ type"); + pci_ep_ioctl(PCITEST_SET_IRQTYPE, PCITEST_IRQ_TYPE_AUTO); + ASSERT_EQ(0, ret) TH_LOG("Can't set AUTO IRQ type"); for (i = 0; i < ARRAY_SIZE(test_size); i++) { param.size = test_size[i]; @@ -208,8 +219,8 @@ TEST_F(pci_ep_data_transfer, COPY_TEST) if (variant->use_dma) param.flags = PCITEST_FLAGS_USE_DMA; - pci_ep_ioctl(PCITEST_SET_IRQTYPE, 1); - ASSERT_EQ(0, ret) TH_LOG("Can't set MSI IRQ type"); + pci_ep_ioctl(PCITEST_SET_IRQTYPE, PCITEST_IRQ_TYPE_AUTO); + ASSERT_EQ(0, ret) TH_LOG("Can't set AUTO IRQ type"); for (i = 0; i < ARRAY_SIZE(test_size); i++) { param.size = test_size[i]; diff --git a/tools/testing/selftests/pcie_bwctrl/Makefile b/tools/testing/selftests/pcie_bwctrl/Makefile index 3e84e26341d1..48ec048f47af 100644 --- a/tools/testing/selftests/pcie_bwctrl/Makefile +++ b/tools/testing/selftests/pcie_bwctrl/Makefile @@ -1,2 +1,2 @@ -TEST_PROGS = set_pcie_cooling_state.sh +TEST_PROGS = set_pcie_cooling_state.sh set_pcie_speed.sh include ../lib.mk diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h index 3a0129467de6..d6deb6ffa1b9 100644 --- a/tools/testing/selftests/powerpc/include/pkeys.h +++ b/tools/testing/selftests/powerpc/include/pkeys.h @@ -24,6 +24,9 @@ #undef PKEY_DISABLE_EXECUTE #define PKEY_DISABLE_EXECUTE 0x4 +#undef PKEY_UNRESTRICTED +#define PKEY_UNRESTRICTED 0x0 + /* Older versions of libc do not define this */ #ifndef SEGV_PKUERR #define SEGV_PKUERR 4 @@ -93,7 +96,7 @@ int pkeys_unsupported(void) SKIP_IF(!hash_mmu); /* Check if the system call is supported */ - pkey = sys_pkey_alloc(0, 0); + pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); SKIP_IF(pkey < 0); sys_pkey_free(pkey); diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c index 0af4f02669a1..29b91b7456eb 100644 --- a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c +++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c @@ -72,7 +72,7 @@ static void segv_handler(int signum, siginfo_t *sinfo, void *ctx) switch (fault_type) { case PKEY_DISABLE_ACCESS: - pkey_set_rights(fault_pkey, 0); + pkey_set_rights(fault_pkey, PKEY_UNRESTRICTED); break; case PKEY_DISABLE_EXECUTE: /* diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c index 2db76e56d4cb..e89a164c686b 100644 --- a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c +++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c @@ -83,7 +83,7 @@ static void segv_handler(int signum, siginfo_t *sinfo, void *ctx) mprotect(pgstart, pgsize, PROT_EXEC)) _exit(1); else - pkey_set_rights(pkey, 0); + pkey_set_rights(pkey, PKEY_UNRESTRICTED); fault_count++; } diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c index 8be7aada6523..355f8bbe06c3 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c @@ -26,6 +26,7 @@ static int event_alternatives_tests_p10(void) { struct event *e, events[5]; int i; + int pvr = PVR_VER(mfspr(SPRN_PVR)); /* Check for platform support for the test */ SKIP_IF(platform_check_for_tests()); @@ -36,7 +37,7 @@ static int event_alternatives_tests_p10(void) * code and using PVR will work correctly for all cases * including generic compat mode. */ - SKIP_IF(PVR_VER(mfspr(SPRN_PVR)) != POWER10); + SKIP_IF((pvr != POWER10) && (pvr != POWER11)); SKIP_IF(check_for_generic_compat_pmu()); diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c index 0d237c15d3f2..a378fa9a5a7b 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c @@ -17,6 +17,7 @@ static int generic_events_valid_test(void) { struct event event; + int pvr = mfspr(SPRN_PVR); /* Check for platform support for the test */ SKIP_IF(platform_check_for_tests()); @@ -31,7 +32,7 @@ static int generic_events_valid_test(void) * - PERF_COUNT_HW_STALLED_CYCLES_BACKEND * - PERF_COUNT_HW_REF_CPU_CYCLES */ - if (PVR_VER(mfspr(SPRN_PVR)) == POWER10) { + if ((pvr == POWER10) || (pvr == POWER11)) { event_init_opts(&event, PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "event"); FAIL_IF(event_open(&event)); event_close(&event); diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c index 85a636886069..e3c7a0c071e2 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c @@ -30,7 +30,7 @@ static int group_constraint_l2l3_sel(void) /* * Check for platform support for the test. - * This test is only aplicable on power10 + * This test is only aplicable on ISA v3.1 */ SKIP_IF(platform_check_for_tests()); SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c index 9225618b846a..9233175787cc 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c @@ -26,7 +26,7 @@ static int group_constraint_radix_scope_qual(void) /* * Check for platform support for the test. - * This test is aplicable on power10 only. + * This test is aplicable on ISA v3.1 only. */ SKIP_IF(platform_check_for_tests()); SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c index 9f1197104e8c..4b69e7214c0b 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c @@ -25,7 +25,7 @@ /* * Testcase for group constraint check of thresh_cmp bits which is * used to program thresh compare field in Monitor Mode Control Register A - * (MMCRA: 9-18 bits for power9 and MMCRA: 8-18 bits for power10). + * (MMCRA: 9-18 bits for power9 and MMCRA: 8-18 bits for power10/power11). * All events in the group should match thresh compare bits otherwise * event_open for the group will fail. */ diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c index f51fcab837fc..88aa7fd64ec1 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c @@ -20,7 +20,7 @@ * Some of the bits in the event code is * reserved for specific platforms. * Event code bits 52-59 are reserved in power9, - * whereas in power10, these are used for programming + * whereas in ISA v3.1, these are used for programming * Monitor Mode Control Register 3 (MMCR3). * Bit 9 in event code is reserved in power9, * whereas it is used for programming "radix_scope_qual" @@ -39,7 +39,7 @@ static int invalid_event_code(void) /* * Events using MMCR3 bits and radix scope qual bits - * should fail in power9 and should succeed in power10. + * should fail in power9 and should succeed in power10 ( ISA v3.1 ) * Init the events and check for pass/fail in event open. */ if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) { diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c index 4c119c821b99..757f454c0dc0 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c @@ -21,6 +21,7 @@ static int reserved_bits_mmcra_sample_elig_mode(void) { struct event event; + int pvr = PVR_VER(mfspr(SPRN_PVR)); /* Check for platform support for the test */ SKIP_IF(platform_check_for_tests()); @@ -56,10 +57,10 @@ static int reserved_bits_mmcra_sample_elig_mode(void) /* * MMCRA Random Sampling Mode (SM) value 0x10 - * is reserved in power10 and 0xC is reserved in + * is reserved in power10/power11 and 0xC is reserved in * power9. */ - if (PVR_VER(mfspr(SPRN_PVR)) == POWER10) { + if ((pvr == POWER10) || (pvr == POWER11)) { event_init(&event, 0x100401e0); FAIL_IF(!event_open(&event)); } else if (PVR_VER(mfspr(SPRN_PVR)) == POWER9) { diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index 9f79bec5fce7..0c4ed299c3b8 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -5,7 +5,8 @@ TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test mmcr3_src_test mmcra_thresh_marked_sample_test mmcra_thresh_cmp_test \ mmcra_bhrb_ind_call_test mmcra_bhrb_any_test mmcra_bhrb_cond_test \ mmcra_bhrb_disable_test bhrb_no_crash_wo_pmu_test intr_regs_no_crash_wo_pmu_test \ - bhrb_filter_map_test mmcr1_sel_unit_cache_test mmcra_bhrb_disable_no_branch_test + bhrb_filter_map_test mmcr1_sel_unit_cache_test mmcra_bhrb_disable_no_branch_test \ + check_extended_reg_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c index 3f43c315c666..3ad71d49ae65 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c @@ -14,7 +14,7 @@ * A perf sampling test to check bhrb filter * map. All the branch filters are not supported * in powerpc. Supported filters in: - * power10: any, any_call, ind_call, cond + * power10/power11: any, any_call, ind_call, cond * power9: any, any_call * * Testcase checks event open for invalid bhrb filter @@ -24,13 +24,13 @@ */ /* Invalid types for powerpc */ -/* Valid bhrb filters in power9/power10 */ +/* Valid bhrb filters in power9/power10/power11 */ int bhrb_filter_map_valid_common[] = { PERF_SAMPLE_BRANCH_ANY, PERF_SAMPLE_BRANCH_ANY_CALL, }; -/* Valid bhrb filters in power10 */ +/* Valid bhrb filters in power10/power11 */ int bhrb_filter_map_valid_p10[] = { PERF_SAMPLE_BRANCH_IND_CALL, PERF_SAMPLE_BRANCH_COND, @@ -69,7 +69,7 @@ static int bhrb_filter_map_test(void) FAIL_IF(!event_open(&event)); } - /* valid filter maps for power9/power10 which are expected to pass in event_open */ + /* valid filter maps for power9/power10/power11 which are expected to pass in event_open */ for (i = 0; i < ARRAY_SIZE(bhrb_filter_map_valid_common); i++) { event.attr.branch_sample_type = bhrb_filter_map_valid_common[i]; FAIL_IF(event_open(&event)); @@ -77,19 +77,22 @@ static int bhrb_filter_map_test(void) } /* - * filter maps which are valid in power10 and invalid in power9. + * filter maps which are valid in power10/power11 and invalid in power9. * PVR check is used here since PMU specific data like bhrb filter * alternative tests is handled by respective PMU driver code and * using PVR will work correctly for all cases including generic * compat mode. */ - if (PVR_VER(mfspr(SPRN_PVR)) == POWER10) { + switch (PVR_VER(mfspr(SPRN_PVR))) { + case POWER11: + case POWER10: for (i = 0; i < ARRAY_SIZE(bhrb_filter_map_valid_p10); i++) { event.attr.branch_sample_type = bhrb_filter_map_valid_p10[i]; FAIL_IF(event_open(&event)); event_close(&event); } - } else { + break; + default: for (i = 0; i < ARRAY_SIZE(bhrb_filter_map_valid_p10); i++) { event.attr.branch_sample_type = bhrb_filter_map_valid_p10[i]; FAIL_IF(!event_open(&event)); diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/check_extended_reg_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/check_extended_reg_test.c new file mode 100644 index 000000000000..865bc69f920c --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/check_extended_reg_test.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2024, Kajol Jain, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* + * A perf sampling test to check extended + * reg support. + */ +static int check_extended_reg_test(void) +{ + /* Check for platform support for the test */ + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); + + /* Skip for Generic compat PMU */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* Check if platform supports extended regs */ + platform_extended_mask = perf_get_platform_reg_mask(); + FAIL_IF(check_extended_regs_support()); + + return 0; +} + +int main(void) +{ + return test_harness(check_extended_reg_test, "check_extended_reg_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c index eac6420abdf1..8a538b6182a1 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c @@ -59,6 +59,7 @@ static void init_ev_encodes(void) ev_shift_thd_stop = 32; switch (pvr) { + case POWER11: case POWER10: ev_mask_thd_cmp = 0x3ffff; ev_shift_thd_cmp = 0; @@ -91,7 +92,7 @@ static void init_ev_encodes(void) } /* Return the extended regs mask value */ -static u64 perf_get_platform_reg_mask(void) +u64 perf_get_platform_reg_mask(void) { if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) return PERF_POWER10_MASK; @@ -129,8 +130,14 @@ int platform_check_for_tests(void) * Check for supported platforms * for sampling test */ - if ((pvr != POWER10) && (pvr != POWER9)) + switch (pvr) { + case POWER11: + case POWER10: + case POWER9: + break; + default: goto out; + } /* * Check PMU driver registered by looking for @@ -490,6 +497,13 @@ int get_thresh_cmp_val(struct event event) * Utility function to check for generic compat PMU * by comparing base_platform value from auxv and real * PVR value. + * auxv_base_platform() func gives information of "base platform" + * corresponding to PVR value. Incase, if the distro doesn't + * support platform PVR (missing cputable support), base platform + * in auxv will have a default value other than the real PVR's. + * In this case, ISAv3 PMU (generic compat PMU) will be registered + * in the system. auxv_generic_compat_pmu() makes use of the base + * platform value from auxv to do this check. */ static bool auxv_generic_compat_pmu(void) { @@ -499,6 +513,8 @@ static bool auxv_generic_compat_pmu(void) base_pvr = POWER9; else if (!strcmp(auxv_base_platform(), "power10")) base_pvr = POWER10; + else if (!strcmp(auxv_base_platform(), "power11")) + base_pvr = POWER11; return (!base_pvr); } diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h index 64e25cce1435..357e9f0fc0f7 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -8,10 +8,12 @@ #include <sys/stat.h> #include "../event.h" +#define POWER11 0x82 #define POWER10 0x80 #define POWER9 0x4e #define PERF_POWER9_MASK 0x7f8ffffffffffff #define PERF_POWER10_MASK 0x7ffffffffffffff +#define PERF_POWER11_MASK PERF_POWER10_MASK #define MMCR0_FC56 0x00000010UL /* freeze counters 5 and 6 */ #define MMCR0_PMCCEXT 0x00000200UL /* PMCCEXT control */ @@ -37,6 +39,8 @@ extern int pvr; extern u64 platform_extended_mask; extern int check_pvr_for_sampling_tests(void); extern int platform_check_for_tests(void); +extern int check_extended_regs_support(void); +extern u64 perf_get_platform_reg_mask(void); /* * Event code field extraction macro. @@ -165,21 +169,21 @@ static inline int get_mmcr2_fcta(u64 mmcr2, int pmc) static inline int get_mmcr2_l2l3(u64 mmcr2, int pmc) { - if (pvr == POWER10) + if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) return ((mmcr2 & 0xf8) >> 3); return 0; } static inline int get_mmcr3_src(u64 mmcr3, int pmc) { - if (pvr != POWER10) + if (!have_hwcap2(PPC_FEATURE2_ARCH_3_1)) return 0; return ((mmcr3 >> ((49 - (15 * ((pmc) - 1))))) & 0x7fff); } static inline int get_mmcra_thd_cmp(u64 mmcra, int pmc) { - if (pvr == POWER10) + if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) return ((mmcra >> 45) & 0x7ff); return ((mmcra >> 45) & 0x3ff); } @@ -191,7 +195,7 @@ static inline int get_mmcra_sm(u64 mmcra, int pmc) static inline u64 get_mmcra_bhrb_disable(u64 mmcra, int pmc) { - if (pvr == POWER10) + if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) return mmcra & BHRB_DISABLE; return 0; } diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c index 3e08176eb7f8..809de8d58b3b 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c @@ -29,7 +29,7 @@ static int mmcra_bhrb_cond_test(void) /* * Check for platform support for the test. - * This test is only aplicable on power10 + * This test is only aplicable on ISA v3.1 */ SKIP_IF(check_pvr_for_sampling_tests()); SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c index 488c865387e4..fa0dc15f9123 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c @@ -26,7 +26,7 @@ static int mmcra_bhrb_disable_no_branch_test(void) /* * Check for platform support for the test. - * This test is only aplicable on power10 + * This test is only aplicable on ISA v3.1 */ SKIP_IF(check_pvr_for_sampling_tests()); SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c index 186a853c0f62..bc3161ab003d 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c @@ -26,7 +26,7 @@ static int mmcra_bhrb_disable_test(void) /* * Check for platform support for the test. - * This test is only aplicable on power10 + * This test is only aplicable on ISA v3.1 */ SKIP_IF(check_pvr_for_sampling_tests()); SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c index f0706730c099..fd6c9f12212c 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c @@ -29,7 +29,7 @@ static int mmcra_bhrb_ind_call_test(void) /* * Check for platform support for the test. - * This test is only aplicable on power10 + * This test is only aplicable on ISA v3.1 */ SKIP_IF(check_pvr_for_sampling_tests()); SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index f061434af452..7ff53caeb4aa 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -95,16 +95,16 @@ static int child(struct shared_info *info) /* Get some pkeys so that we can change their bits in the AMR. */ pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE); if (pkey1 < 0) { - pkey1 = sys_pkey_alloc(0, 0); + pkey1 = sys_pkey_alloc(0, PKEY_UNRESTRICTED); FAIL_IF(pkey1 < 0); disable_execute = false; } - pkey2 = sys_pkey_alloc(0, 0); + pkey2 = sys_pkey_alloc(0, PKEY_UNRESTRICTED); FAIL_IF(pkey2 < 0); - pkey3 = sys_pkey_alloc(0, 0); + pkey3 = sys_pkey_alloc(0, PKEY_UNRESTRICTED); FAIL_IF(pkey3 < 0); info->amr |= 3ul << pkeyshift(pkey1) | 2ul << pkeyshift(pkey2); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c index fc633014424f..10f63042cf91 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -57,16 +57,16 @@ static int child(struct shared_info *info) /* Get some pkeys so that we can change their bits in the AMR. */ pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE); if (pkey1 < 0) { - pkey1 = sys_pkey_alloc(0, 0); + pkey1 = sys_pkey_alloc(0, PKEY_UNRESTRICTED); CHILD_FAIL_IF(pkey1 < 0, &info->child_sync); disable_execute = false; } - pkey2 = sys_pkey_alloc(0, 0); + pkey2 = sys_pkey_alloc(0, PKEY_UNRESTRICTED); CHILD_FAIL_IF(pkey2 < 0, &info->child_sync); - pkey3 = sys_pkey_alloc(0, 0); + pkey3 = sys_pkey_alloc(0, PKEY_UNRESTRICTED); CHILD_FAIL_IF(pkey3 < 0, &info->child_sync); info->amr1 |= 3ul << pkeyshift(pkey1); diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index 58064151f2c8..edc08a4433fd 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -140,6 +140,7 @@ static void usage(char *progname) " -H val set output phase to 'val' nanoseconds (requires -p)\n" " -w val set output pulse width to 'val' nanoseconds (requires -p)\n" " -P val enable or disable (val=1|0) the system clock PPS\n" + " -r open the ptp clock in readonly mode\n" " -s set the ptp clock time from the system time\n" " -S set the system time from the ptp clock time\n" " -t val shift the ptp clock time by 'val' seconds\n" @@ -188,6 +189,7 @@ int main(int argc, char *argv[]) int pin_index = -1, pin_func; int pps = -1; int seconds = 0; + int readonly = 0; int settime = 0; int channel = -1; clockid_t ext_clockid = CLOCK_REALTIME; @@ -200,7 +202,7 @@ int main(int argc, char *argv[]) progname = strrchr(argv[0], '/'); progname = progname ? 1+progname : argv[0]; - while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:sSt:T:w:x:Xy:z"))) { + while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xy:z"))) { switch (c) { case 'c': capabilities = 1; @@ -252,6 +254,9 @@ int main(int argc, char *argv[]) case 'P': pps = atoi(optarg); break; + case 'r': + readonly = 1; + break; case 's': settime = 1; break; @@ -308,7 +313,7 @@ int main(int argc, char *argv[]) } } - fd = open(device, O_RDWR); + fd = open(device, readonly ? O_RDONLY : O_RDWR); if (fd < 0) { fprintf(stderr, "opening %s: %s\n", device, strerror(errno)); return -1; @@ -436,14 +441,16 @@ int main(int argc, char *argv[]) } if (extts) { - memset(&extts_request, 0, sizeof(extts_request)); - extts_request.index = index; - extts_request.flags = PTP_ENABLE_FEATURE; - if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { - perror("PTP_EXTTS_REQUEST"); - extts = 0; - } else { - puts("external time stamp request okay"); + if (!readonly) { + memset(&extts_request, 0, sizeof(extts_request)); + extts_request.index = index; + extts_request.flags = PTP_ENABLE_FEATURE; + if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { + perror("PTP_EXTTS_REQUEST"); + extts = 0; + } else { + puts("external time stamp request okay"); + } } for (; extts; extts--) { cnt = read(fd, &event, sizeof(event)); @@ -455,10 +462,12 @@ int main(int argc, char *argv[]) event.t.sec, event.t.nsec); fflush(stdout); } - /* Disable the feature again. */ - extts_request.flags = 0; - if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { - perror("PTP_EXTTS_REQUEST"); + if (!readonly) { + /* Disable the feature again. */ + extts_request.flags = 0; + if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { + perror("PTP_EXTTS_REQUEST"); + } } } diff --git a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh index 2e63ef009d59..2db12c5cad9c 100755 --- a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh +++ b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh @@ -49,7 +49,7 @@ do do err= val=$((d*1000+t*10+c)) - tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --bootargs "rcutorture.test_srcu_lockdep=$val" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1 + tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.test_srcu_lockdep=$val rcutorture.reader_flavor=0x2" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1 ret=$? mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val" if test "$d" -ne 0 && test "$ret" -eq 0 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot index 2db39f298d18..fb61703690cb 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot @@ -2,3 +2,4 @@ rcutorture.torture_type=srcud rcupdate.rcu_self_test=1 rcutorture.fwd_progress=3 srcutree.big_cpu_lim=5 +rcutorture.reader_flavor=0x8 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot index c419cac233ee..54f5c9053474 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot @@ -2,3 +2,9 @@ rcutree.gp_preinit_delay=3 rcutree.gp_init_delay=3 rcutree.gp_cleanup_delay=3 rcupdate.rcu_self_test=1 + +# This part is for synchronize_rcu() testing +rcutorture.nfakewriters=-1 +rcutorture.gp_sync=1 +rcupdate.rcu_normal=1 +rcutree.rcu_normal_wake_from_gp=1 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 index d30922d8c883..352393bc5c56 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 @@ -1,7 +1,8 @@ CONFIG_SMP=y CONFIG_NR_CPUS=16 -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT_LAZY=y CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_TREE_RCU=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 index 759ee51d3ddc..420632b030dc 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 @@ -1,6 +1,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=74 -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_LAZY=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore index 16496de5f6ce..0fda241fa62b 100644 --- a/tools/testing/selftests/rseq/.gitignore +++ b/tools/testing/selftests/rseq/.gitignore @@ -9,3 +9,4 @@ param_test_compare_twice param_test_mm_cid param_test_mm_cid_benchmark param_test_mm_cid_compare_twice +syscall_errors_test diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile index 5a3432fceb58..0d0a5fae5954 100644 --- a/tools/testing/selftests/rseq/Makefile +++ b/tools/testing/selftests/rseq/Makefile @@ -16,11 +16,12 @@ OVERRIDE_TARGETS = 1 TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \ param_test_benchmark param_test_compare_twice param_test_mm_cid \ - param_test_mm_cid_benchmark param_test_mm_cid_compare_twice + param_test_mm_cid_benchmark param_test_mm_cid_compare_twice \ + syscall_errors_test TEST_GEN_PROGS_EXTENDED = librseq.so -TEST_PROGS = run_param_test.sh +TEST_PROGS = run_param_test.sh run_syscall_errors_test.sh TEST_FILES := settings @@ -54,3 +55,7 @@ $(OUTPUT)/param_test_mm_cid_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \ $(OUTPUT)/param_test_mm_cid_compare_twice: param_test.c $(TEST_GEN_PROGS_EXTENDED) \ rseq.h rseq-*.h $(CC) $(CFLAGS) -DBUILDOPT_RSEQ_PERCPU_MM_CID -DRSEQ_COMPARE_TWICE $< $(LDLIBS) -lrseq -o $@ + +$(OUTPUT)/syscall_errors_test: syscall_errors_test.c $(TEST_GEN_PROGS_EXTENDED) \ + rseq.h rseq-*.h + $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@ diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index f6156790c3b4..663a9cef1952 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -71,9 +71,20 @@ static int rseq_ownership; /* Original struct rseq allocation size is 32 bytes. */ #define ORIG_RSEQ_ALLOC_SIZE 32 +/* + * Use a union to ensure we allocate a TLS area of 1024 bytes to accomodate an + * rseq registration that is larger than the current rseq ABI. + */ +union rseq_tls { + struct rseq_abi abi; + char dummy[RSEQ_THREAD_AREA_ALLOC_SIZE]; +}; + static -__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = { - .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, +__thread union rseq_tls __rseq __attribute__((tls_model("initial-exec"))) = { + .abi = { + .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, + }, }; static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, @@ -87,7 +98,7 @@ static int sys_getcpu(unsigned *cpu, unsigned *node) return syscall(__NR_getcpu, cpu, node, NULL); } -int rseq_available(void) +bool rseq_available(void) { int rc; @@ -96,9 +107,9 @@ int rseq_available(void) abort(); switch (errno) { case ENOSYS: - return 0; + return false; case EINVAL: - return 1; + return true; default: abort(); } @@ -149,7 +160,7 @@ int rseq_register_current_thread(void) /* Treat libc's ownership as a successful registration. */ return 0; } - rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); + rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); if (rc) { /* * After at least one thread has registered successfully @@ -183,7 +194,7 @@ int rseq_unregister_current_thread(void) /* Treat libc's ownership as a successful unregistration. */ return 0; } - rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); if (rc) return -1; return 0; @@ -249,7 +260,7 @@ void rseq_init(void) rseq_ownership = 1; /* Calculate the offset of the rseq area from the thread pointer. */ - rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); + rseq_offset = (void *)&__rseq.abi - rseq_thread_pointer(); /* rseq flags are deprecated, always set to 0. */ rseq_flags = 0; diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index ba424ce80a71..f51a5fdb0444 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -160,6 +160,11 @@ int32_t rseq_fallback_current_cpu(void); int32_t rseq_fallback_current_node(void); /* + * Returns true if rseq is supported. + */ +bool rseq_available(void); + +/* * Values returned can be either the current CPU number, -1 (rseq is * uninitialized), or -2 (rseq initialization has failed). */ diff --git a/tools/testing/selftests/rseq/run_syscall_errors_test.sh b/tools/testing/selftests/rseq/run_syscall_errors_test.sh new file mode 100755 index 000000000000..9272246b39f2 --- /dev/null +++ b/tools/testing/selftests/rseq/run_syscall_errors_test.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2024 Michael Jeanson <mjeanson@efficios.com> + +GLIBC_TUNABLES="${GLIBC_TUNABLES:-}:glibc.pthread.rseq=0" ./syscall_errors_test diff --git a/tools/testing/selftests/rseq/syscall_errors_test.c b/tools/testing/selftests/rseq/syscall_errors_test.c new file mode 100644 index 000000000000..a5d9e1f8a2dc --- /dev/null +++ b/tools/testing/selftests/rseq/syscall_errors_test.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2024 Michael Jeanson <mjeanson@efficios.com> + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <assert.h> +#include <stdint.h> +#include <syscall.h> +#include <string.h> +#include <unistd.h> + +#include "rseq.h" + +static int sys_rseq(void *rseq_abi, uint32_t rseq_len, + int flags, uint32_t sig) +{ + return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); +} + +/* + * Check the value of errno on some expected failures of the rseq syscall. + */ + +int main(void) +{ + struct rseq_abi *global_rseq = rseq_get_abi(); + int ret; + int errno_copy; + + if (!rseq_available()) { + fprintf(stderr, "rseq syscall unavailable"); + goto error; + } + + /* The current thread is NOT registered. */ + + /* EINVAL */ + errno = 0; + ret = sys_rseq(global_rseq, 32, -1, RSEQ_SIG); + errno_copy = errno; + fprintf(stderr, "Registration with invalid flag fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy)); + if (ret == 0 || errno_copy != EINVAL) + goto error; + + errno = 0; + ret = sys_rseq((char *) global_rseq + 1, 32, 0, RSEQ_SIG); + errno_copy = errno; + fprintf(stderr, "Registration with unaligned rseq_abi fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy)); + if (ret == 0 || errno_copy != EINVAL) + goto error; + + errno = 0; + ret = sys_rseq(global_rseq, 31, 0, RSEQ_SIG); + errno_copy = errno; + fprintf(stderr, "Registration with invalid size fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy)); + if (ret == 0 || errno_copy != EINVAL) + goto error; + + +#if defined(__LP64__) && (!defined(__s390__) && !defined(__s390x__)) + /* + * We haven't found a reliable way to find an invalid address when + * running a 32bit userspace on a 64bit kernel, so only run this test + * on 64bit builds for the moment. + * + * Also exclude architectures that select + * CONFIG_ALTERNATE_USER_ADDRESS_SPACE where the kernel and userspace + * have their own address space and this failure can't happen. + */ + + /* EFAULT */ + errno = 0; + ret = sys_rseq((void *) -4096UL, 32, 0, RSEQ_SIG); + errno_copy = errno; + fprintf(stderr, "Registration with invalid address fails with errno set to EFAULT (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy)); + if (ret == 0 || errno_copy != EFAULT) + goto error; +#endif + + errno = 0; + ret = sys_rseq(global_rseq, 32, 0, RSEQ_SIG); + errno_copy = errno; + fprintf(stderr, "Registration succeeds for the current thread (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy)); + if (ret != 0 && errno != 0) + goto error; + + /* The current thread is registered. */ + + /* EBUSY */ + errno = 0; + ret = sys_rseq(global_rseq, 32, 0, RSEQ_SIG); + errno_copy = errno; + fprintf(stderr, "Double registration fails with errno set to EBUSY (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy)); + if (ret == 0 || errno_copy != EBUSY) + goto error; + + /* EPERM */ + errno = 0; + ret = sys_rseq(global_rseq, 32, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG + 1); + errno_copy = errno; + fprintf(stderr, "Unregistration with wrong RSEQ_SIG fails with errno to EPERM (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy)); + if (ret == 0 || errno_copy != EPERM) + goto error; + + errno = 0; + ret = sys_rseq(global_rseq, 32, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + errno_copy = errno; + fprintf(stderr, "Unregistration succeeds for the current thread (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy)); + if (ret != 0) + goto error; + + errno = 0; + ret = sys_rseq(global_rseq, 32, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + errno_copy = errno; + fprintf(stderr, "Double unregistration fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy)); + if (ret == 0 || errno_copy != EINVAL) + goto error; + + return 0; +error: + return -1; +} diff --git a/tools/testing/selftests/sched/config b/tools/testing/selftests/sched/config index e8b09aa7c0c4..1bb8bf6d7fd4 100644 --- a/tools/testing/selftests/sched/config +++ b/tools/testing/selftests/sched/config @@ -1 +1 @@ -CONFIG_SCHED_DEBUG=y +# empty diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile index 011762224600..f4531327b8e7 100644 --- a/tools/testing/selftests/sched_ext/Makefile +++ b/tools/testing/selftests/sched_ext/Makefile @@ -172,6 +172,7 @@ auto-test-targets := \ maximal \ maybe_null \ minimal \ + numa \ prog_run \ reload_loop \ select_cpu_dfl \ diff --git a/tools/testing/selftests/sched_ext/config b/tools/testing/selftests/sched_ext/config index 0de9b4ee249d..aa901b05c8ad 100644 --- a/tools/testing/selftests/sched_ext/config +++ b/tools/testing/selftests/sched_ext/config @@ -1,4 +1,3 @@ -CONFIG_SCHED_DEBUG=y CONFIG_SCHED_CLASS_EXT=y CONFIG_CGROUPS=y CONFIG_CGROUP_SCHED=y diff --git a/tools/testing/selftests/sched_ext/numa.bpf.c b/tools/testing/selftests/sched_ext/numa.bpf.c new file mode 100644 index 000000000000..a79d86ed54a1 --- /dev/null +++ b/tools/testing/selftests/sched_ext/numa.bpf.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A scheduler that validates the behavior of the NUMA-aware + * functionalities. + * + * The scheduler creates a separate DSQ for each NUMA node, ensuring tasks + * are exclusively processed by CPUs within their respective nodes. Idle + * CPUs are selected only within the same node, so task migration can only + * occurs between CPUs belonging to the same node. + * + * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com> + */ + +#include <scx/common.bpf.h> + +char _license[] SEC("license") = "GPL"; + +UEI_DEFINE(uei); + +const volatile unsigned int __COMPAT_SCX_PICK_IDLE_IN_NODE; + +static bool is_cpu_idle(s32 cpu, int node) +{ + const struct cpumask *idle_cpumask; + bool idle; + + idle_cpumask = __COMPAT_scx_bpf_get_idle_cpumask_node(node); + idle = bpf_cpumask_test_cpu(cpu, idle_cpumask); + scx_bpf_put_cpumask(idle_cpumask); + + return idle; +} + +s32 BPF_STRUCT_OPS(numa_select_cpu, + struct task_struct *p, s32 prev_cpu, u64 wake_flags) +{ + int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p)); + s32 cpu; + + /* + * We could just use __COMPAT_scx_bpf_pick_any_cpu_node() here, + * since it already tries to pick an idle CPU within the node + * first, but let's use both functions for better testing coverage. + */ + cpu = __COMPAT_scx_bpf_pick_idle_cpu_node(p->cpus_ptr, node, + __COMPAT_SCX_PICK_IDLE_IN_NODE); + if (cpu < 0) + cpu = __COMPAT_scx_bpf_pick_any_cpu_node(p->cpus_ptr, node, + __COMPAT_SCX_PICK_IDLE_IN_NODE); + + if (is_cpu_idle(cpu, node)) + scx_bpf_error("CPU %d should be marked as busy", cpu); + + if (__COMPAT_scx_bpf_cpu_node(cpu) != node) + scx_bpf_error("CPU %d should be in node %d", cpu, node); + + return cpu; +} + +void BPF_STRUCT_OPS(numa_enqueue, struct task_struct *p, u64 enq_flags) +{ + int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p)); + + scx_bpf_dsq_insert(p, node, SCX_SLICE_DFL, enq_flags); +} + +void BPF_STRUCT_OPS(numa_dispatch, s32 cpu, struct task_struct *prev) +{ + int node = __COMPAT_scx_bpf_cpu_node(cpu); + + scx_bpf_dsq_move_to_local(node); +} + +s32 BPF_STRUCT_OPS_SLEEPABLE(numa_init) +{ + int node, err; + + bpf_for(node, 0, __COMPAT_scx_bpf_nr_node_ids()) { + err = scx_bpf_create_dsq(node, node); + if (err) + return err; + } + + return 0; +} + +void BPF_STRUCT_OPS(numa_exit, struct scx_exit_info *ei) +{ + UEI_RECORD(uei, ei); +} + +SEC(".struct_ops.link") +struct sched_ext_ops numa_ops = { + .select_cpu = (void *)numa_select_cpu, + .enqueue = (void *)numa_enqueue, + .dispatch = (void *)numa_dispatch, + .init = (void *)numa_init, + .exit = (void *)numa_exit, + .name = "numa", +}; diff --git a/tools/testing/selftests/sched_ext/numa.c b/tools/testing/selftests/sched_ext/numa.c new file mode 100644 index 000000000000..b060c3b65c82 --- /dev/null +++ b/tools/testing/selftests/sched_ext/numa.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com> + */ +#include <bpf/bpf.h> +#include <scx/common.h> +#include <sys/wait.h> +#include <unistd.h> +#include "numa.bpf.skel.h" +#include "scx_test.h" + +static enum scx_test_status setup(void **ctx) +{ + struct numa *skel; + + skel = numa__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + skel->rodata->__COMPAT_SCX_PICK_IDLE_IN_NODE = SCX_PICK_IDLE_IN_NODE; + skel->struct_ops.numa_ops->flags = SCX_OPS_BUILTIN_IDLE_PER_NODE; + SCX_FAIL_IF(numa__load(skel), "Failed to load skel"); + + *ctx = skel; + + return SCX_TEST_PASS; +} + +static enum scx_test_status run(void *ctx) +{ + struct numa *skel = ctx; + struct bpf_link *link; + + link = bpf_map__attach_struct_ops(skel->maps.numa_ops); + SCX_FAIL_IF(!link, "Failed to attach scheduler"); + + /* Just sleeping is fine, plenty of scheduling events happening */ + sleep(1); + + SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE)); + bpf_link__destroy(link); + + return SCX_TEST_PASS; +} + +static void cleanup(void *ctx) +{ + struct numa *skel = ctx; + + numa__destroy(skel); +} + +struct scx_test numa = { + .name = "numa", + .description = "Verify NUMA-aware functionalities", + .setup = setup, + .run = run, + .cleanup = cleanup, +}; +REGISTER_SCX_TEST(&numa) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 14ba51b52095..b2f76a52215a 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -155,6 +155,12 @@ struct seccomp_data { # endif #endif +#ifndef __NR_uretprobe +# if defined(__x86_64__) +# define __NR_uretprobe 335 +# endif +#endif + #ifndef SECCOMP_SET_MODE_STRICT #define SECCOMP_SET_MODE_STRICT 0 #endif diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh index 84472b436c07..db1616857d89 100755 --- a/tools/testing/selftests/sysctl/sysctl.sh +++ b/tools/testing/selftests/sysctl/sysctl.sh @@ -21,7 +21,7 @@ TEST_FILE=$(mktemp) # ENABLED: 1 if enabled, 0 otherwise # TARGET: test target file required on the test_sysctl module # SKIP_NO_TARGET: 1 skip if TARGET not there -# 0 run eventhough TARGET not there +# 0 run even though TARGET not there # # Once these are enabled please leave them as-is. Write your own test, # we have tons of space. @@ -764,7 +764,7 @@ sysctl_test_0007() fi if [ ! -f /proc/cmdline ]; then - echo -e "SKIPPING\nThere is no /proc/cmdline to check for paramter" + echo -e "SKIPPING\nThere is no /proc/cmdline to check for parameter" return $ksft_skip fi @@ -857,7 +857,7 @@ list_tests() echo echo "TEST_ID x NUM_TEST" echo "TEST_ID: Test ID" - echo "NUM_TESTS: Number of recommended times to run the test" + echo "NUM_TESTS: Recommended number of times to run the test" echo echo "0001 x $(get_test_count 0001) - tests proc_dointvec_minmax()" echo "0002 x $(get_test_count 0002) - tests proc_dostring()" @@ -884,7 +884,7 @@ usage() echo "Valid tests: 0001-$MAX_TEST" echo "" echo " all Runs all tests (default)" - echo " -t Run test ID the number amount of times is recommended" + echo " -t Run test ID the recommended number of times" echo " -w Watch test ID run until it runs into an error" echo " -c Run test ID once" echo " -s Run test ID x test-count number of times" @@ -898,7 +898,7 @@ usage() echo Example uses: echo echo "$TEST_NAME.sh -- executes all tests" - echo "$TEST_NAME.sh -t 0002 -- Executes test ID 0002 number of times is recomended" + echo "$TEST_NAME.sh -t 0002 -- Executes test ID 0002 the recommended number of times" echo "$TEST_NAME.sh -w 0002 -- Watch test ID 0002 run until an error occurs" echo "$TEST_NAME.sh -s 0002 -- Run test ID 0002 once" echo "$TEST_NAME.sh -c 0002 3 -- Run test ID 0002 three times" diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json index dd8109768f8f..5596f4df0e9f 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json @@ -689,7 +689,7 @@ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m continue index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action police index 1", - "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action continue", + "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst (1024Kb|1Mb) mtu 2Kb action continue", "matchCount": "1", "teardown": [ "$TC actions flush action police" @@ -716,7 +716,7 @@ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m drop index 1", "expExitCode": "0", "verifyCmd": "$TC actions ls action police", - "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action drop", + "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst (1024Kb|1Mb) mtu 2Kb action drop", "matchCount": "1", "teardown": [ "$TC actions flush action police" @@ -743,7 +743,7 @@ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m ok index 1", "expExitCode": "0", "verifyCmd": "$TC actions ls action police", - "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action pass", + "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst (1024Kb|1Mb) mtu 2Kb action pass", "matchCount": "1", "teardown": [ "$TC actions flush action police" @@ -770,7 +770,7 @@ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m reclassify index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action police index 1", - "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action reclassify", + "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst (1024Kb|1Mb) mtu 2Kb action reclassify", "matchCount": "1", "teardown": [ "$TC actions flush action police" @@ -797,7 +797,7 @@ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m pipe index 1", "expExitCode": "0", "verifyCmd": "$TC actions ls action police", - "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action pipe", + "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst (1024Kb|1Mb) mtu 2Kb action pipe", "matchCount": "1", "teardown": [ "$TC actions flush action police" diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index 9814b3a1c77d..f0eceb0faf34 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -7,6 +7,7 @@ * Kernel loop code stolen from Steven Rostedt <srostedt@redhat.com> */ #define _GNU_SOURCE +#include <sys/prctl.h> #include <sys/time.h> #include <sys/types.h> #include <stdio.h> @@ -599,14 +600,84 @@ static void check_overrun(int which, const char *name) "check_overrun %s\n", name); } +#include <sys/syscall.h> + +static int do_timer_create(int *id) +{ + return syscall(__NR_timer_create, CLOCK_MONOTONIC, NULL, id); +} + +static int do_timer_delete(int id) +{ + return syscall(__NR_timer_delete, id); +} + +#ifndef PR_TIMER_CREATE_RESTORE_IDS +# define PR_TIMER_CREATE_RESTORE_IDS 77 +# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0 +# define PR_TIMER_CREATE_RESTORE_IDS_ON 1 +# define PR_TIMER_CREATE_RESTORE_IDS_GET 2 +#endif + +static void check_timer_create_exact(void) +{ + int id; + + if (prctl(PR_TIMER_CREATE_RESTORE_IDS, PR_TIMER_CREATE_RESTORE_IDS_ON, 0, 0, 0)) { + switch (errno) { + case EINVAL: + ksft_test_result_skip("check timer create exact, not supported\n"); + return; + default: + ksft_test_result_skip("check timer create exact, errno = %d\n", errno); + return; + } + } + + if (prctl(PR_TIMER_CREATE_RESTORE_IDS, PR_TIMER_CREATE_RESTORE_IDS_GET, 0, 0, 0) != 1) + fatal_error(NULL, "prctl(GET) failed\n"); + + id = 8; + if (do_timer_create(&id) < 0) + fatal_error(NULL, "timer_create()"); + + if (do_timer_delete(id)) + fatal_error(NULL, "timer_delete()"); + + if (prctl(PR_TIMER_CREATE_RESTORE_IDS, PR_TIMER_CREATE_RESTORE_IDS_OFF, 0, 0, 0)) + fatal_error(NULL, "prctl(OFF)"); + + if (prctl(PR_TIMER_CREATE_RESTORE_IDS, PR_TIMER_CREATE_RESTORE_IDS_GET, 0, 0, 0) != 0) + fatal_error(NULL, "prctl(GET) failed\n"); + + if (id != 8) { + ksft_test_result_fail("check timer create exact %d != 8\n", id); + return; + } + + /* Validate that it went back to normal mode and allocates ID 9 */ + if (do_timer_create(&id) < 0) + fatal_error(NULL, "timer_create()"); + + if (do_timer_delete(id)) + fatal_error(NULL, "timer_delete()"); + + if (id == 9) + ksft_test_result_pass("check timer create exact\n"); + else + ksft_test_result_fail("check timer create exact. Disabling failed.\n"); +} + int main(int argc, char **argv) { ksft_print_header(); - ksft_set_plan(18); + ksft_set_plan(19); ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n"); ksft_print_msg("based timers if other threads run on the CPU...\n"); + check_timer_create_exact(); + check_itimer(ITIMER_VIRTUAL, "ITIMER_VIRTUAL"); check_itimer(ITIMER_PROF, "ITIMER_PROF"); check_itimer(ITIMER_REAL, "ITIMER_REAL"); diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c index 83450145fe65..46c391d7f45d 100644 --- a/tools/testing/selftests/timers/skew_consistency.c +++ b/tools/testing/selftests/timers/skew_consistency.c @@ -47,7 +47,7 @@ int main(int argc, char **argv) pid = fork(); if (!pid) - return system("./inconsistency-check -c 1 -t 600"); + return system("./inconsistency-check -t 60"); ppm = 500; ret = 0; diff --git a/tools/testing/selftests/ublk/.gitignore b/tools/testing/selftests/ublk/.gitignore new file mode 100644 index 000000000000..8b2871ea7751 --- /dev/null +++ b/tools/testing/selftests/ublk/.gitignore @@ -0,0 +1,3 @@ +kublk +/tools +*-verify.state diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile new file mode 100644 index 000000000000..7817afe29005 --- /dev/null +++ b/tools/testing/selftests/ublk/Makefile @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir) +LDLIBS += -lpthread -lm -luring + +TEST_PROGS := test_generic_01.sh + +TEST_PROGS += test_null_01.sh +TEST_PROGS += test_null_02.sh +TEST_PROGS += test_loop_01.sh +TEST_PROGS += test_loop_02.sh +TEST_PROGS += test_loop_03.sh +TEST_PROGS += test_loop_04.sh +TEST_PROGS += test_stripe_01.sh +TEST_PROGS += test_stripe_02.sh + +TEST_PROGS += test_stress_01.sh +TEST_PROGS += test_stress_02.sh + +TEST_GEN_PROGS_EXTENDED = kublk + +include ../lib.mk + +$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c + +check: + shellcheck -x -f gcc *.sh diff --git a/tools/testing/selftests/ublk/common.c b/tools/testing/selftests/ublk/common.c new file mode 100644 index 000000000000..01580a6f8519 --- /dev/null +++ b/tools/testing/selftests/ublk/common.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "kublk.h" + +void backing_file_tgt_deinit(struct ublk_dev *dev) +{ + int i; + + for (i = 1; i < dev->nr_fds; i++) { + fsync(dev->fds[i]); + close(dev->fds[i]); + } +} + +int backing_file_tgt_init(struct ublk_dev *dev) +{ + int fd, i; + + assert(dev->nr_fds == 1); + + for (i = 0; i < dev->tgt.nr_backing_files; i++) { + char *file = dev->tgt.backing_file[i]; + unsigned long bytes; + struct stat st; + + ublk_dbg(UBLK_DBG_DEV, "%s: file %d: %s\n", __func__, i, file); + + fd = open(file, O_RDWR | O_DIRECT); + if (fd < 0) { + ublk_err("%s: backing file %s can't be opened: %s\n", + __func__, file, strerror(errno)); + return -EBADF; + } + + if (fstat(fd, &st) < 0) { + close(fd); + return -EBADF; + } + + if (S_ISREG(st.st_mode)) + bytes = st.st_size; + else if (S_ISBLK(st.st_mode)) { + if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) + return -1; + } else { + return -EINVAL; + } + + dev->tgt.backing_file_size[i] = bytes; + dev->fds[dev->nr_fds] = fd; + dev->nr_fds += 1; + } + + return 0; +} diff --git a/tools/testing/selftests/ublk/config b/tools/testing/selftests/ublk/config new file mode 100644 index 000000000000..592b0ba4d661 --- /dev/null +++ b/tools/testing/selftests/ublk/config @@ -0,0 +1 @@ +CONFIG_BLK_DEV_UBLK=m diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c new file mode 100644 index 000000000000..6f34eabfae97 --- /dev/null +++ b/tools/testing/selftests/ublk/file_backed.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "kublk.h" + +static enum io_uring_op ublk_to_uring_op(const struct ublksrv_io_desc *iod, int zc) +{ + unsigned ublk_op = ublksrv_get_op(iod); + + if (ublk_op == UBLK_IO_OP_READ) + return zc ? IORING_OP_READ_FIXED : IORING_OP_READ; + else if (ublk_op == UBLK_IO_OP_WRITE) + return zc ? IORING_OP_WRITE_FIXED : IORING_OP_WRITE; + assert(0); +} + +static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) +{ + unsigned ublk_op = ublksrv_get_op(iod); + struct io_uring_sqe *sqe[1]; + + ublk_queue_alloc_sqes(q, sqe, 1); + io_uring_prep_fsync(sqe[0], 1 /*fds[1]*/, IORING_FSYNC_DATASYNC); + io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE); + /* bit63 marks us as tgt io */ + sqe[0]->user_data = build_user_data(tag, ublk_op, 0, 1); + return 1; +} + +static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) +{ + unsigned ublk_op = ublksrv_get_op(iod); + int zc = ublk_queue_use_zc(q); + enum io_uring_op op = ublk_to_uring_op(iod, zc); + struct io_uring_sqe *sqe[3]; + + if (!zc) { + ublk_queue_alloc_sqes(q, sqe, 1); + if (!sqe[0]) + return -ENOMEM; + + io_uring_prep_rw(op, sqe[0], 1 /*fds[1]*/, + (void *)iod->addr, + iod->nr_sectors << 9, + iod->start_sector << 9); + io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE); + /* bit63 marks us as tgt io */ + sqe[0]->user_data = build_user_data(tag, ublk_op, 0, 1); + return 1; + } + + ublk_queue_alloc_sqes(q, sqe, 3); + + io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag); + sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; + sqe[0]->user_data = build_user_data(tag, + ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1); + + io_uring_prep_rw(op, sqe[1], 1 /*fds[1]*/, 0, + iod->nr_sectors << 9, + iod->start_sector << 9); + sqe[1]->buf_index = tag; + sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK; + sqe[1]->user_data = build_user_data(tag, ublk_op, 0, 1); + + io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, tag); + sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, 1); + + return 2; +} + +static int loop_queue_tgt_io(struct ublk_queue *q, int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + unsigned ublk_op = ublksrv_get_op(iod); + int ret; + + switch (ublk_op) { + case UBLK_IO_OP_FLUSH: + ret = loop_queue_flush_io(q, iod, tag); + break; + case UBLK_IO_OP_WRITE_ZEROES: + case UBLK_IO_OP_DISCARD: + ret = -ENOTSUP; + break; + case UBLK_IO_OP_READ: + case UBLK_IO_OP_WRITE: + ret = loop_queue_tgt_rw_io(q, iod, tag); + break; + default: + ret = -EINVAL; + break; + } + + ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag, + iod->op_flags, iod->start_sector, iod->nr_sectors << 9); + return ret; +} + +static int ublk_loop_queue_io(struct ublk_queue *q, int tag) +{ + int queued = loop_queue_tgt_io(q, tag); + + ublk_queued_tgt_io(q, tag, queued); + return 0; +} + +static void ublk_loop_io_done(struct ublk_queue *q, int tag, + const struct io_uring_cqe *cqe) +{ + unsigned op = user_data_to_op(cqe->user_data); + struct ublk_io *io = ublk_get_io(q, tag); + + if (cqe->res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) { + if (!io->result) + io->result = cqe->res; + if (cqe->res < 0) + ublk_err("%s: io failed op %x user_data %lx\n", + __func__, op, cqe->user_data); + } + + /* buffer register op is IOSQE_CQE_SKIP_SUCCESS */ + if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF)) + io->tgt_ios += 1; + + if (ublk_completed_tgt_io(q, tag)) + ublk_complete_io(q, tag, io->result); +} + +static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) +{ + unsigned long long bytes; + int ret; + struct ublk_params p = { + .types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DMA_ALIGN, + .basic = { + .attrs = UBLK_ATTR_VOLATILE_CACHE, + .logical_bs_shift = 9, + .physical_bs_shift = 12, + .io_opt_shift = 12, + .io_min_shift = 9, + .max_sectors = dev->dev_info.max_io_buf_bytes >> 9, + }, + .dma = { + .alignment = 511, + }, + }; + + ret = backing_file_tgt_init(dev); + if (ret) + return ret; + + if (dev->tgt.nr_backing_files != 1) + return -EINVAL; + + bytes = dev->tgt.backing_file_size[0]; + dev->tgt.dev_size = bytes; + p.basic.dev_sectors = bytes >> 9; + dev->tgt.params = p; + + return 0; +} + +const struct ublk_tgt_ops loop_tgt_ops = { + .name = "loop", + .init_tgt = ublk_loop_tgt_init, + .deinit_tgt = backing_file_tgt_deinit, + .queue_io = ublk_loop_queue_io, + .tgt_io_done = ublk_loop_io_done, +}; diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c new file mode 100644 index 000000000000..05147b53c361 --- /dev/null +++ b/tools/testing/selftests/ublk/kublk.c @@ -0,0 +1,1138 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: uring_cmd based ublk + */ + +#include "kublk.h" + +unsigned int ublk_dbg_mask = UBLK_LOG; +static const struct ublk_tgt_ops *tgt_ops_list[] = { + &null_tgt_ops, + &loop_tgt_ops, + &stripe_tgt_ops, +}; + +static const struct ublk_tgt_ops *ublk_find_tgt(const char *name) +{ + const struct ublk_tgt_ops *ops; + int i; + + if (name == NULL) + return NULL; + + for (i = 0; sizeof(tgt_ops_list) / sizeof(ops); i++) + if (strcmp(tgt_ops_list[i]->name, name) == 0) + return tgt_ops_list[i]; + return NULL; +} + +static inline int ublk_setup_ring(struct io_uring *r, int depth, + int cq_depth, unsigned flags) +{ + struct io_uring_params p; + + memset(&p, 0, sizeof(p)); + p.flags = flags | IORING_SETUP_CQSIZE; + p.cq_entries = cq_depth; + + return io_uring_queue_init_params(depth, r, &p); +} + +static void ublk_ctrl_init_cmd(struct ublk_dev *dev, + struct io_uring_sqe *sqe, + struct ublk_ctrl_cmd_data *data) +{ + struct ublksrv_ctrl_dev_info *info = &dev->dev_info; + struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe); + + sqe->fd = dev->ctrl_fd; + sqe->opcode = IORING_OP_URING_CMD; + sqe->ioprio = 0; + + if (data->flags & CTRL_CMD_HAS_BUF) { + cmd->addr = data->addr; + cmd->len = data->len; + } + + if (data->flags & CTRL_CMD_HAS_DATA) + cmd->data[0] = data->data[0]; + + cmd->dev_id = info->dev_id; + cmd->queue_id = -1; + + ublk_set_sqe_cmd_op(sqe, data->cmd_op); + + io_uring_sqe_set_data(sqe, cmd); +} + +static int __ublk_ctrl_cmd(struct ublk_dev *dev, + struct ublk_ctrl_cmd_data *data) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + int ret = -EINVAL; + + sqe = io_uring_get_sqe(&dev->ring); + if (!sqe) { + ublk_err("%s: can't get sqe ret %d\n", __func__, ret); + return ret; + } + + ublk_ctrl_init_cmd(dev, sqe, data); + + ret = io_uring_submit(&dev->ring); + if (ret < 0) { + ublk_err("uring submit ret %d\n", ret); + return ret; + } + + ret = io_uring_wait_cqe(&dev->ring, &cqe); + if (ret < 0) { + ublk_err("wait cqe: %s\n", strerror(-ret)); + return ret; + } + io_uring_cqe_seen(&dev->ring, cqe); + + return cqe->res; +} + +static int ublk_ctrl_stop_dev(struct ublk_dev *dev) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_CMD_STOP_DEV, + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_start_dev(struct ublk_dev *dev, + int daemon_pid) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_START_DEV, + .flags = CTRL_CMD_HAS_DATA, + }; + + dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_add_dev(struct ublk_dev *dev) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_ADD_DEV, + .flags = CTRL_CMD_HAS_BUF, + .addr = (__u64) (uintptr_t) &dev->dev_info, + .len = sizeof(struct ublksrv_ctrl_dev_info), + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_del_dev(struct ublk_dev *dev) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_DEL_DEV, + .flags = 0, + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_get_info(struct ublk_dev *dev) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_GET_DEV_INFO, + .flags = CTRL_CMD_HAS_BUF, + .addr = (__u64) (uintptr_t) &dev->dev_info, + .len = sizeof(struct ublksrv_ctrl_dev_info), + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_set_params(struct ublk_dev *dev, + struct ublk_params *params) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_SET_PARAMS, + .flags = CTRL_CMD_HAS_BUF, + .addr = (__u64) (uintptr_t) params, + .len = sizeof(*params), + }; + params->len = sizeof(*params); + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_get_params(struct ublk_dev *dev, + struct ublk_params *params) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_CMD_GET_PARAMS, + .flags = CTRL_CMD_HAS_BUF, + .addr = (__u64)params, + .len = sizeof(*params), + }; + + params->len = sizeof(*params); + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_get_features(struct ublk_dev *dev, + __u64 *features) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_GET_FEATURES, + .flags = CTRL_CMD_HAS_BUF, + .addr = (__u64) (uintptr_t) features, + .len = sizeof(*features), + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +static const char *ublk_dev_state_desc(struct ublk_dev *dev) +{ + switch (dev->dev_info.state) { + case UBLK_S_DEV_DEAD: + return "DEAD"; + case UBLK_S_DEV_LIVE: + return "LIVE"; + case UBLK_S_DEV_QUIESCED: + return "QUIESCED"; + default: + return "UNKNOWN"; + }; +} + +static void ublk_ctrl_dump(struct ublk_dev *dev) +{ + struct ublksrv_ctrl_dev_info *info = &dev->dev_info; + struct ublk_params p; + int ret; + + ret = ublk_ctrl_get_params(dev, &p); + if (ret < 0) { + ublk_err("failed to get params %m\n"); + return; + } + + ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n", + info->dev_id, info->nr_hw_queues, info->queue_depth, + 1 << p.basic.logical_bs_shift, p.basic.dev_sectors); + ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n", + info->max_io_buf_bytes, info->ublksrv_pid, info->flags, + ublk_dev_state_desc(dev)); + fflush(stdout); +} + +static void ublk_ctrl_deinit(struct ublk_dev *dev) +{ + close(dev->ctrl_fd); + free(dev); +} + +static struct ublk_dev *ublk_ctrl_init(void) +{ + struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev)); + struct ublksrv_ctrl_dev_info *info = &dev->dev_info; + int ret; + + dev->ctrl_fd = open(CTRL_DEV, O_RDWR); + if (dev->ctrl_fd < 0) { + free(dev); + return NULL; + } + + info->max_io_buf_bytes = UBLK_IO_MAX_BYTES; + + ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH, + UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128); + if (ret < 0) { + ublk_err("queue_init: %s\n", strerror(-ret)); + free(dev); + return NULL; + } + dev->nr_fds = 1; + + return dev; +} + +static int __ublk_queue_cmd_buf_sz(unsigned depth) +{ + int size = depth * sizeof(struct ublksrv_io_desc); + unsigned int page_sz = getpagesize(); + + return round_up(size, page_sz); +} + +static int ublk_queue_max_cmd_buf_sz(void) +{ + return __ublk_queue_cmd_buf_sz(UBLK_MAX_QUEUE_DEPTH); +} + +static int ublk_queue_cmd_buf_sz(struct ublk_queue *q) +{ + return __ublk_queue_cmd_buf_sz(q->q_depth); +} + +static void ublk_queue_deinit(struct ublk_queue *q) +{ + int i; + int nr_ios = q->q_depth; + + io_uring_unregister_buffers(&q->ring); + + io_uring_unregister_ring_fd(&q->ring); + + if (q->ring.ring_fd > 0) { + io_uring_unregister_files(&q->ring); + close(q->ring.ring_fd); + q->ring.ring_fd = -1; + } + + if (q->io_cmd_buf) + munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q)); + + for (i = 0; i < nr_ios; i++) + free(q->ios[i].buf_addr); +} + +static int ublk_queue_init(struct ublk_queue *q) +{ + struct ublk_dev *dev = q->dev; + int depth = dev->dev_info.queue_depth; + int i, ret = -1; + int cmd_buf_size, io_buf_size; + unsigned long off; + int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth; + + q->tgt_ops = dev->tgt.ops; + q->state = 0; + q->q_depth = depth; + q->cmd_inflight = 0; + q->tid = gettid(); + + if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) { + q->state |= UBLKSRV_NO_BUF; + q->state |= UBLKSRV_ZC; + } + + cmd_buf_size = ublk_queue_cmd_buf_sz(q); + off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); + q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ, + MAP_SHARED | MAP_POPULATE, dev->fds[0], off); + if (q->io_cmd_buf == MAP_FAILED) { + ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n", + q->dev->dev_info.dev_id, q->q_id); + goto fail; + } + + io_buf_size = dev->dev_info.max_io_buf_bytes; + for (i = 0; i < q->q_depth; i++) { + q->ios[i].buf_addr = NULL; + q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE; + + if (q->state & UBLKSRV_NO_BUF) + continue; + + if (posix_memalign((void **)&q->ios[i].buf_addr, + getpagesize(), io_buf_size)) { + ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n", + dev->dev_info.dev_id, q->q_id, i); + goto fail; + } + } + + ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth, + IORING_SETUP_COOP_TASKRUN); + if (ret < 0) { + ublk_err("ublk dev %d queue %d setup io_uring failed %d\n", + q->dev->dev_info.dev_id, q->q_id, ret); + goto fail; + } + + if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) { + ret = io_uring_register_buffers_sparse(&q->ring, q->q_depth); + if (ret) { + ublk_err("ublk dev %d queue %d register spare buffers failed %d", + dev->dev_info.dev_id, q->q_id, ret); + goto fail; + } + } + + io_uring_register_ring_fd(&q->ring); + + ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds); + if (ret) { + ublk_err("ublk dev %d queue %d register files failed %d\n", + q->dev->dev_info.dev_id, q->q_id, ret); + goto fail; + } + + return 0; + fail: + ublk_queue_deinit(q); + ublk_err("ublk dev %d queue %d failed\n", + dev->dev_info.dev_id, q->q_id); + return -ENOMEM; +} + +#define WAIT_USEC 100000 +#define MAX_WAIT_USEC (3 * 1000000) +static int ublk_dev_prep(const struct dev_ctx *ctx, struct ublk_dev *dev) +{ + int dev_id = dev->dev_info.dev_id; + unsigned int wait_usec = 0; + int ret = 0, fd = -1; + char buf[64]; + + snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id); + + while (wait_usec < MAX_WAIT_USEC) { + fd = open(buf, O_RDWR); + if (fd >= 0) + break; + usleep(WAIT_USEC); + wait_usec += WAIT_USEC; + } + if (fd < 0) { + ublk_err("can't open %s %s\n", buf, strerror(errno)); + return -1; + } + + dev->fds[0] = fd; + if (dev->tgt.ops->init_tgt) + ret = dev->tgt.ops->init_tgt(ctx, dev); + if (ret) + close(dev->fds[0]); + return ret; +} + +static void ublk_dev_unprep(struct ublk_dev *dev) +{ + if (dev->tgt.ops->deinit_tgt) + dev->tgt.ops->deinit_tgt(dev); + close(dev->fds[0]); +} + +int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag) +{ + struct ublksrv_io_cmd *cmd; + struct io_uring_sqe *sqe[1]; + unsigned int cmd_op = 0; + __u64 user_data; + + /* only freed io can be issued */ + if (!(io->flags & UBLKSRV_IO_FREE)) + return 0; + + /* we issue because we need either fetching or committing */ + if (!(io->flags & + (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP))) + return 0; + + if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) + cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ; + else if (io->flags & UBLKSRV_NEED_FETCH_RQ) + cmd_op = UBLK_U_IO_FETCH_REQ; + + if (io_uring_sq_space_left(&q->ring) < 1) + io_uring_submit(&q->ring); + + ublk_queue_alloc_sqes(q, sqe, 1); + if (!sqe[0]) { + ublk_err("%s: run out of sqe %d, tag %d\n", + __func__, q->q_id, tag); + return -1; + } + + cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe[0]); + + if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ) + cmd->result = io->result; + + /* These fields should be written once, never change */ + ublk_set_sqe_cmd_op(sqe[0], cmd_op); + sqe[0]->fd = 0; /* dev->fds[0] */ + sqe[0]->opcode = IORING_OP_URING_CMD; + sqe[0]->flags = IOSQE_FIXED_FILE; + sqe[0]->rw_flags = 0; + cmd->tag = tag; + cmd->q_id = q->q_id; + if (!(q->state & UBLKSRV_NO_BUF)) + cmd->addr = (__u64) (uintptr_t) io->buf_addr; + else + cmd->addr = 0; + + user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0); + io_uring_sqe_set_data64(sqe[0], user_data); + + io->flags = 0; + + q->cmd_inflight += 1; + + ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n", + __func__, q->q_id, tag, cmd_op, + io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING)); + return 1; +} + +static void ublk_submit_fetch_commands(struct ublk_queue *q) +{ + int i = 0; + + for (i = 0; i < q->q_depth; i++) + ublk_queue_io_cmd(q, &q->ios[i], i); +} + +static int ublk_queue_is_idle(struct ublk_queue *q) +{ + return !io_uring_sq_ready(&q->ring) && !q->io_inflight; +} + +static int ublk_queue_is_done(struct ublk_queue *q) +{ + return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q); +} + +static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q, + struct io_uring_cqe *cqe) +{ + unsigned tag = user_data_to_tag(cqe->user_data); + + if (cqe->res < 0 && cqe->res != -EAGAIN) + ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n", + __func__, cqe->res, q->q_id, + user_data_to_tag(cqe->user_data), + user_data_to_op(cqe->user_data)); + + if (q->tgt_ops->tgt_io_done) + q->tgt_ops->tgt_io_done(q, tag, cqe); +} + +static void ublk_handle_cqe(struct io_uring *r, + struct io_uring_cqe *cqe, void *data) +{ + struct ublk_queue *q = container_of(r, struct ublk_queue, ring); + unsigned tag = user_data_to_tag(cqe->user_data); + unsigned cmd_op = user_data_to_op(cqe->user_data); + int fetch = (cqe->res != UBLK_IO_RES_ABORT) && + !(q->state & UBLKSRV_QUEUE_STOPPING); + struct ublk_io *io; + + if (cqe->res < 0 && cqe->res != -ENODEV) + ublk_err("%s: res %d userdata %llx queue state %x\n", __func__, + cqe->res, cqe->user_data, q->state); + + ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d/%d) stopping %d\n", + __func__, cqe->res, q->q_id, tag, cmd_op, + is_target_io(cqe->user_data), + user_data_to_tgt_data(cqe->user_data), + (q->state & UBLKSRV_QUEUE_STOPPING)); + + /* Don't retrieve io in case of target io */ + if (is_target_io(cqe->user_data)) { + ublksrv_handle_tgt_cqe(q, cqe); + return; + } + + io = &q->ios[tag]; + q->cmd_inflight--; + + if (!fetch) { + q->state |= UBLKSRV_QUEUE_STOPPING; + io->flags &= ~UBLKSRV_NEED_FETCH_RQ; + } + + if (cqe->res == UBLK_IO_RES_OK) { + assert(tag < q->q_depth); + if (q->tgt_ops->queue_io) + q->tgt_ops->queue_io(q, tag); + } else { + /* + * COMMIT_REQ will be completed immediately since no fetching + * piggyback is required. + * + * Marking IO_FREE only, then this io won't be issued since + * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*) + * + * */ + io->flags = UBLKSRV_IO_FREE; + } +} + +static int ublk_reap_events_uring(struct io_uring *r) +{ + struct io_uring_cqe *cqe; + unsigned head; + int count = 0; + + io_uring_for_each_cqe(r, head, cqe) { + ublk_handle_cqe(r, cqe, NULL); + count += 1; + } + io_uring_cq_advance(r, count); + + return count; +} + +static int ublk_process_io(struct ublk_queue *q) +{ + int ret, reapped; + + ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n", + q->dev->dev_info.dev_id, + q->q_id, io_uring_sq_ready(&q->ring), + q->cmd_inflight, + (q->state & UBLKSRV_QUEUE_STOPPING)); + + if (ublk_queue_is_done(q)) + return -ENODEV; + + ret = io_uring_submit_and_wait(&q->ring, 1); + reapped = ublk_reap_events_uring(&q->ring); + + ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n", + ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING), + (q->state & UBLKSRV_QUEUE_IDLE)); + + return reapped; +} + +static void *ublk_io_handler_fn(void *data) +{ + struct ublk_queue *q = data; + int dev_id = q->dev->dev_info.dev_id; + int ret; + + ret = ublk_queue_init(q); + if (ret) { + ublk_err("ublk dev %d queue %d init queue failed\n", + dev_id, q->q_id); + return NULL; + } + ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n", + q->tid, dev_id, q->q_id); + + /* submit all io commands to ublk driver */ + ublk_submit_fetch_commands(q); + do { + if (ublk_process_io(q) < 0) + break; + } while (1); + + ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id); + ublk_queue_deinit(q); + return NULL; +} + +static void ublk_set_parameters(struct ublk_dev *dev) +{ + int ret; + + ret = ublk_ctrl_set_params(dev, &dev->tgt.params); + if (ret) + ublk_err("dev %d set basic parameter failed %d\n", + dev->dev_info.dev_id, ret); +} + +static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id) +{ + uint64_t id; + int evtfd = ctx->_evtfd; + + if (evtfd < 0) + return -EBADF; + + if (dev_id >= 0) + id = dev_id + 1; + else + id = ERROR_EVTFD_DEVID; + + if (write(evtfd, &id, sizeof(id)) != sizeof(id)) + return -EINVAL; + + return 0; +} + + +static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) +{ + int ret, i; + void *thread_ret; + const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info; + + ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__); + + ret = ublk_dev_prep(ctx, dev); + if (ret) + return ret; + + for (i = 0; i < dinfo->nr_hw_queues; i++) { + dev->q[i].dev = dev; + dev->q[i].q_id = i; + pthread_create(&dev->q[i].thread, NULL, + ublk_io_handler_fn, + &dev->q[i]); + } + + /* everything is fine now, start us */ + ublk_set_parameters(dev); + ret = ublk_ctrl_start_dev(dev, getpid()); + if (ret < 0) { + ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret); + goto fail; + } + + ublk_ctrl_get_info(dev); + if (ctx->fg) + ublk_ctrl_dump(dev); + else + ublk_send_dev_event(ctx, dev->dev_info.dev_id); + + /* wait until we are terminated */ + for (i = 0; i < dinfo->nr_hw_queues; i++) + pthread_join(dev->q[i].thread, &thread_ret); + fail: + ublk_dev_unprep(dev); + ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__); + + return ret; +} + +static int wait_ublk_dev(const char *path, int evt_mask, unsigned timeout) +{ +#define EV_SIZE (sizeof(struct inotify_event)) +#define EV_BUF_LEN (128 * (EV_SIZE + 16)) + struct pollfd pfd; + int fd, wd; + int ret = -EINVAL; + const char *dev_name = basename(path); + + fd = inotify_init(); + if (fd < 0) { + ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__); + return fd; + } + + wd = inotify_add_watch(fd, "/dev", evt_mask); + if (wd == -1) { + ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__); + goto fail; + } + + pfd.fd = fd; + pfd.events = POLL_IN; + while (1) { + int i = 0; + char buffer[EV_BUF_LEN]; + ret = poll(&pfd, 1, 1000 * timeout); + + if (ret == -1) { + ublk_err("%s: poll inotify failed: %d\n", __func__, ret); + goto rm_watch; + } else if (ret == 0) { + ublk_err("%s: poll inotify timeout\n", __func__); + ret = -ETIMEDOUT; + goto rm_watch; + } + + ret = read(fd, buffer, EV_BUF_LEN); + if (ret < 0) { + ublk_err("%s: read inotify fd failed\n", __func__); + goto rm_watch; + } + + while (i < ret) { + struct inotify_event *event = (struct inotify_event *)&buffer[i]; + + ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n", + __func__, event->mask, event->name); + if (event->mask & evt_mask) { + if (!strcmp(event->name, dev_name)) { + ret = 0; + goto rm_watch; + } + } + i += EV_SIZE + event->len; + } + } +rm_watch: + inotify_rm_watch(fd, wd); +fail: + close(fd); + return ret; +} + +static int ublk_stop_io_daemon(const struct ublk_dev *dev) +{ + int daemon_pid = dev->dev_info.ublksrv_pid; + int dev_id = dev->dev_info.dev_id; + char ublkc[64]; + int ret = 0; + + if (daemon_pid < 0) + return 0; + + /* daemon may be dead already */ + if (kill(daemon_pid, 0) < 0) + goto wait; + + snprintf(ublkc, sizeof(ublkc), "/dev/%s%d", "ublkc", dev_id); + + /* ublk char device may be gone already */ + if (access(ublkc, F_OK) != 0) + goto wait; + + /* Wait until ublk char device is closed, when the daemon is shutdown */ + ret = wait_ublk_dev(ublkc, IN_CLOSE, 10); + /* double check and since it may be closed before starting inotify */ + if (ret == -ETIMEDOUT) + ret = kill(daemon_pid, 0) < 0; +wait: + waitpid(daemon_pid, NULL, 0); + ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n", + __func__, daemon_pid, dev_id, ret); + + return ret; +} + +static int __cmd_dev_add(const struct dev_ctx *ctx) +{ + unsigned nr_queues = ctx->nr_hw_queues; + const char *tgt_type = ctx->tgt_type; + unsigned depth = ctx->queue_depth; + __u64 features; + const struct ublk_tgt_ops *ops; + struct ublksrv_ctrl_dev_info *info; + struct ublk_dev *dev; + int dev_id = ctx->dev_id; + int ret, i; + + ops = ublk_find_tgt(tgt_type); + if (!ops) { + ublk_err("%s: no such tgt type, type %s\n", + __func__, tgt_type); + return -ENODEV; + } + + if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) { + ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n", + __func__, nr_queues, depth); + return -EINVAL; + } + + dev = ublk_ctrl_init(); + if (!dev) { + ublk_err("%s: can't alloc dev id %d, type %s\n", + __func__, dev_id, tgt_type); + return -ENOMEM; + } + + /* kernel doesn't support get_features */ + ret = ublk_ctrl_get_features(dev, &features); + if (ret < 0) + return -EINVAL; + + if (!(features & UBLK_F_CMD_IOCTL_ENCODE)) + return -ENOTSUP; + + info = &dev->dev_info; + info->dev_id = ctx->dev_id; + info->nr_hw_queues = nr_queues; + info->queue_depth = depth; + info->flags = ctx->flags; + dev->tgt.ops = ops; + dev->tgt.sq_depth = depth; + dev->tgt.cq_depth = depth; + + for (i = 0; i < MAX_BACK_FILES; i++) { + if (ctx->files[i]) { + strcpy(dev->tgt.backing_file[i], ctx->files[i]); + dev->tgt.nr_backing_files++; + } + } + + ret = ublk_ctrl_add_dev(dev); + if (ret < 0) { + ublk_err("%s: can't add dev id %d, type %s ret %d\n", + __func__, dev_id, tgt_type, ret); + goto fail; + } + + ret = ublk_start_daemon(ctx, dev); + ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\b", ret); + if (ret < 0) + ublk_ctrl_del_dev(dev); + +fail: + if (ret < 0) + ublk_send_dev_event(ctx, -1); + ublk_ctrl_deinit(dev); + return ret; +} + +static int __cmd_dev_list(struct dev_ctx *ctx); + +static int cmd_dev_add(struct dev_ctx *ctx) +{ + int res; + + if (ctx->fg) + goto run; + + ctx->_evtfd = eventfd(0, 0); + if (ctx->_evtfd < 0) { + ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno)); + exit(-1); + } + + setsid(); + res = fork(); + if (res == 0) { +run: + res = __cmd_dev_add(ctx); + return res; + } else if (res > 0) { + uint64_t id; + + res = read(ctx->_evtfd, &id, sizeof(id)); + close(ctx->_evtfd); + if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) { + ctx->dev_id = id - 1; + return __cmd_dev_list(ctx); + } + exit(EXIT_FAILURE); + } else { + return res; + } +} + +static int __cmd_dev_del(struct dev_ctx *ctx) +{ + int number = ctx->dev_id; + struct ublk_dev *dev; + int ret; + + dev = ublk_ctrl_init(); + dev->dev_info.dev_id = number; + + ret = ublk_ctrl_get_info(dev); + if (ret < 0) + goto fail; + + ret = ublk_ctrl_stop_dev(dev); + if (ret < 0) + ublk_err("%s: stop dev %d failed ret %d\n", __func__, number, ret); + + ret = ublk_stop_io_daemon(dev); + if (ret < 0) + ublk_err("%s: stop daemon id %d dev %d, ret %d\n", + __func__, dev->dev_info.ublksrv_pid, number, ret); + ublk_ctrl_del_dev(dev); +fail: + ublk_ctrl_deinit(dev); + + return (ret >= 0) ? 0 : ret; +} + +static int cmd_dev_del(struct dev_ctx *ctx) +{ + int i; + + if (ctx->dev_id >= 0 || !ctx->all) + return __cmd_dev_del(ctx); + + for (i = 0; i < 255; i++) { + ctx->dev_id = i; + __cmd_dev_del(ctx); + } + return 0; +} + +static int __cmd_dev_list(struct dev_ctx *ctx) +{ + struct ublk_dev *dev = ublk_ctrl_init(); + int ret; + + if (!dev) + return -ENODEV; + + dev->dev_info.dev_id = ctx->dev_id; + + ret = ublk_ctrl_get_info(dev); + if (ret < 0) { + if (ctx->logging) + ublk_err("%s: can't get dev info from %d: %d\n", + __func__, ctx->dev_id, ret); + } else { + ublk_ctrl_dump(dev); + } + + ublk_ctrl_deinit(dev); + + return ret; +} + +static int cmd_dev_list(struct dev_ctx *ctx) +{ + int i; + + if (ctx->dev_id >= 0 || !ctx->all) + return __cmd_dev_list(ctx); + + ctx->logging = false; + for (i = 0; i < 255; i++) { + ctx->dev_id = i; + __cmd_dev_list(ctx); + } + return 0; +} + +static int cmd_dev_get_features(void) +{ +#define const_ilog2(x) (63 - __builtin_clzll(x)) + static const char *feat_map[] = { + [const_ilog2(UBLK_F_SUPPORT_ZERO_COPY)] = "ZERO_COPY", + [const_ilog2(UBLK_F_URING_CMD_COMP_IN_TASK)] = "COMP_IN_TASK", + [const_ilog2(UBLK_F_NEED_GET_DATA)] = "GET_DATA", + [const_ilog2(UBLK_F_USER_RECOVERY)] = "USER_RECOVERY", + [const_ilog2(UBLK_F_USER_RECOVERY_REISSUE)] = "RECOVERY_REISSUE", + [const_ilog2(UBLK_F_UNPRIVILEGED_DEV)] = "UNPRIVILEGED_DEV", + [const_ilog2(UBLK_F_CMD_IOCTL_ENCODE)] = "CMD_IOCTL_ENCODE", + [const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY", + [const_ilog2(UBLK_F_ZONED)] = "ZONED", + [const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO", + }; + struct ublk_dev *dev; + __u64 features = 0; + int ret; + + dev = ublk_ctrl_init(); + if (!dev) { + fprintf(stderr, "ublksrv_ctrl_init failed id\n"); + return -EOPNOTSUPP; + } + + ret = ublk_ctrl_get_features(dev, &features); + if (!ret) { + int i; + + printf("ublk_drv features: 0x%llx\n", features); + + for (i = 0; i < sizeof(features) * 8; i++) { + const char *feat; + + if (!((1ULL << i) & features)) + continue; + if (i < sizeof(feat_map) / sizeof(feat_map[0])) + feat = feat_map[i]; + else + feat = "unknown"; + printf("\t%-20s: 0x%llx\n", feat, 1ULL << i); + } + } + + return ret; +} + +static int cmd_dev_help(char *exe) +{ + printf("%s add -t [null|loop] [-q nr_queues] [-d depth] [-n dev_id] [backfile1] [backfile2] ...\n", exe); + printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n"); + printf("%s del [-n dev_id] -a \n", exe); + printf("\t -a delete all devices -n delete specified device\n"); + printf("%s list [-n dev_id] -a \n", exe); + printf("\t -a list all devices, -n list specified device, default -a \n"); + printf("%s features\n", exe); + return 0; +} + +int main(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "all", 0, NULL, 'a' }, + { "type", 1, NULL, 't' }, + { "number", 1, NULL, 'n' }, + { "queues", 1, NULL, 'q' }, + { "depth", 1, NULL, 'd' }, + { "debug_mask", 1, NULL, 0 }, + { "quiet", 0, NULL, 0 }, + { "zero_copy", 0, NULL, 'z' }, + { "foreground", 0, NULL, 0 }, + { "chunk_size", 1, NULL, 0 }, + { 0, 0, 0, 0 } + }; + int option_idx, opt; + const char *cmd = argv[1]; + struct dev_ctx ctx = { + .queue_depth = 128, + .nr_hw_queues = 2, + .dev_id = -1, + .tgt_type = "unknown", + .chunk_size = 65536, /* def chunk size is 64K */ + }; + int ret = -EINVAL, i; + + if (argc == 1) + return ret; + + optind = 2; + while ((opt = getopt_long(argc, argv, "t:n:d:q:az", + longopts, &option_idx)) != -1) { + switch (opt) { + case 'a': + ctx.all = 1; + break; + case 'n': + ctx.dev_id = strtol(optarg, NULL, 10); + break; + case 't': + if (strlen(optarg) < sizeof(ctx.tgt_type)) + strcpy(ctx.tgt_type, optarg); + break; + case 'q': + ctx.nr_hw_queues = strtol(optarg, NULL, 10); + break; + case 'd': + ctx.queue_depth = strtol(optarg, NULL, 10); + break; + case 'z': + ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY; + break; + case 0: + if (!strcmp(longopts[option_idx].name, "debug_mask")) + ublk_dbg_mask = strtol(optarg, NULL, 16); + if (!strcmp(longopts[option_idx].name, "quiet")) + ublk_dbg_mask = 0; + if (!strcmp(longopts[option_idx].name, "foreground")) + ctx.fg = 1; + if (!strcmp(longopts[option_idx].name, "chunk_size")) + ctx.chunk_size = strtol(optarg, NULL, 10); + } + } + + i = optind; + while (i < argc && ctx.nr_files < MAX_BACK_FILES) { + ctx.files[ctx.nr_files++] = argv[i++]; + } + + if (!strcmp(cmd, "add")) + ret = cmd_dev_add(&ctx); + else if (!strcmp(cmd, "del")) + ret = cmd_dev_del(&ctx); + else if (!strcmp(cmd, "list")) { + ctx.all = 1; + ret = cmd_dev_list(&ctx); + } else if (!strcmp(cmd, "help")) + ret = cmd_dev_help(argv[0]); + else if (!strcmp(cmd, "features")) + ret = cmd_dev_get_features(); + else + cmd_dev_help(argv[0]); + + return ret; +} diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h new file mode 100644 index 000000000000..f31a5c4d4143 --- /dev/null +++ b/tools/testing/selftests/ublk/kublk.h @@ -0,0 +1,370 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef KUBLK_INTERNAL_H +#define KUBLK_INTERNAL_H + +#include <unistd.h> +#include <stdlib.h> +#include <assert.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <pthread.h> +#include <getopt.h> +#include <limits.h> +#include <poll.h> +#include <fcntl.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <sys/inotify.h> +#include <sys/wait.h> +#include <sys/eventfd.h> +#include <sys/uio.h> +#include <liburing.h> +#include <linux/ublk_cmd.h> +#include "ublk_dep.h" + +#define __maybe_unused __attribute__((unused)) +#define MAX_BACK_FILES 4 +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + +/****************** part 1: libublk ********************/ + +#define CTRL_DEV "/dev/ublk-control" +#define UBLKC_DEV "/dev/ublkc" +#define UBLKB_DEV "/dev/ublkb" +#define UBLK_CTRL_RING_DEPTH 32 +#define ERROR_EVTFD_DEVID -2 + +/* queue idle timeout */ +#define UBLKSRV_IO_IDLE_SECS 20 + +#define UBLK_IO_MAX_BYTES (1 << 20) +#define UBLK_MAX_QUEUES 4 +#define UBLK_QUEUE_DEPTH 128 + +#define UBLK_DBG_DEV (1U << 0) +#define UBLK_DBG_QUEUE (1U << 1) +#define UBLK_DBG_IO_CMD (1U << 2) +#define UBLK_DBG_IO (1U << 3) +#define UBLK_DBG_CTRL_CMD (1U << 4) +#define UBLK_LOG (1U << 5) + +struct ublk_dev; +struct ublk_queue; + +struct dev_ctx { + char tgt_type[16]; + unsigned long flags; + unsigned nr_hw_queues; + unsigned queue_depth; + int dev_id; + int nr_files; + char *files[MAX_BACK_FILES]; + unsigned int logging:1; + unsigned int all:1; + unsigned int fg:1; + + /* stripe */ + unsigned int chunk_size; + + int _evtfd; +}; + +struct ublk_ctrl_cmd_data { + __u32 cmd_op; +#define CTRL_CMD_HAS_DATA 1 +#define CTRL_CMD_HAS_BUF 2 + __u32 flags; + + __u64 data[2]; + __u64 addr; + __u32 len; +}; + +struct ublk_io { + char *buf_addr; + +#define UBLKSRV_NEED_FETCH_RQ (1UL << 0) +#define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) +#define UBLKSRV_IO_FREE (1UL << 2) + unsigned short flags; + unsigned short refs; /* used by target code only */ + + int result; + + unsigned short tgt_ios; + void *private_data; +}; + +struct ublk_tgt_ops { + const char *name; + int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *); + void (*deinit_tgt)(struct ublk_dev *); + + int (*queue_io)(struct ublk_queue *, int tag); + void (*tgt_io_done)(struct ublk_queue *, + int tag, const struct io_uring_cqe *); +}; + +struct ublk_tgt { + unsigned long dev_size; + unsigned int sq_depth; + unsigned int cq_depth; + const struct ublk_tgt_ops *ops; + struct ublk_params params; + + int nr_backing_files; + unsigned long backing_file_size[MAX_BACK_FILES]; + char backing_file[MAX_BACK_FILES][PATH_MAX]; +}; + +struct ublk_queue { + int q_id; + int q_depth; + unsigned int cmd_inflight; + unsigned int io_inflight; + struct ublk_dev *dev; + const struct ublk_tgt_ops *tgt_ops; + char *io_cmd_buf; + struct io_uring ring; + struct ublk_io ios[UBLK_QUEUE_DEPTH]; +#define UBLKSRV_QUEUE_STOPPING (1U << 0) +#define UBLKSRV_QUEUE_IDLE (1U << 1) +#define UBLKSRV_NO_BUF (1U << 2) +#define UBLKSRV_ZC (1U << 3) + unsigned state; + pid_t tid; + pthread_t thread; +}; + +struct ublk_dev { + struct ublk_tgt tgt; + struct ublksrv_ctrl_dev_info dev_info; + struct ublk_queue q[UBLK_MAX_QUEUES]; + + int fds[MAX_BACK_FILES + 1]; /* fds[0] points to /dev/ublkcN */ + int nr_fds; + int ctrl_fd; + struct io_uring ring; + + void *private_data; +}; + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) +#endif + +#ifndef container_of +#define container_of(ptr, type, member) ({ \ + unsigned long __mptr = (unsigned long)(ptr); \ + ((type *)(__mptr - offsetof(type, member))); }) +#endif + +#define round_up(val, rnd) \ + (((val) + ((rnd) - 1)) & ~((rnd) - 1)) + + +extern unsigned int ublk_dbg_mask; +extern int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag); + +static inline int is_target_io(__u64 user_data) +{ + return (user_data & (1ULL << 63)) != 0; +} + +static inline __u64 build_user_data(unsigned tag, unsigned op, + unsigned tgt_data, unsigned is_target_io) +{ + assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16)); + + return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63; +} + +static inline unsigned int user_data_to_tag(__u64 user_data) +{ + return user_data & 0xffff; +} + +static inline unsigned int user_data_to_op(__u64 user_data) +{ + return (user_data >> 16) & 0xff; +} + +static inline unsigned int user_data_to_tgt_data(__u64 user_data) +{ + return (user_data >> 24) & 0xffff; +} + +static inline unsigned short ublk_cmd_op_nr(unsigned int op) +{ + return _IOC_NR(op); +} + +static inline void ublk_err(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); +} + +static inline void ublk_log(const char *fmt, ...) +{ + if (ublk_dbg_mask & UBLK_LOG) { + va_list ap; + + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + } +} + +static inline void ublk_dbg(int level, const char *fmt, ...) +{ + if (level & ublk_dbg_mask) { + va_list ap; + + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + } +} + +static inline int ublk_queue_alloc_sqes(struct ublk_queue *q, + struct io_uring_sqe *sqes[], int nr_sqes) +{ + unsigned left = io_uring_sq_space_left(&q->ring); + int i; + + if (left < nr_sqes) + io_uring_submit(&q->ring); + + for (i = 0; i < nr_sqes; i++) { + sqes[i] = io_uring_get_sqe(&q->ring); + if (!sqes[i]) + return i; + } + + return nr_sqes; +} + +static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, + int dev_fd, int tag, int q_id, __u64 index) +{ + struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; + + io_uring_prep_read(sqe, dev_fd, 0, 0, 0); + sqe->opcode = IORING_OP_URING_CMD; + sqe->flags |= IOSQE_FIXED_FILE; + sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; + + cmd->tag = tag; + cmd->addr = index; + cmd->q_id = q_id; +} + +static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, + int dev_fd, int tag, int q_id, __u64 index) +{ + struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; + + io_uring_prep_read(sqe, dev_fd, 0, 0, 0); + sqe->opcode = IORING_OP_URING_CMD; + sqe->flags |= IOSQE_FIXED_FILE; + sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF; + + cmd->tag = tag; + cmd->addr = index; + cmd->q_id = q_id; +} + +static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe) +{ + return (void *)&sqe->cmd; +} + +static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res) +{ + q->ios[tag].result = res; +} + +static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag) +{ + return q->ios[tag].result; +} + +static inline void ublk_mark_io_done(struct ublk_io *io, int res) +{ + io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE); + io->result = res; +} + +static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag) +{ + return (struct ublksrv_io_desc *)&(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]); +} + +static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op) +{ + __u32 *addr = (__u32 *)&sqe->off; + + addr[0] = cmd_op; + addr[1] = 0; +} + +static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag) +{ + return &q->ios[tag]; +} + +static inline int ublk_complete_io(struct ublk_queue *q, unsigned tag, int res) +{ + struct ublk_io *io = &q->ios[tag]; + + ublk_mark_io_done(io, res); + + return ublk_queue_io_cmd(q, io, tag); +} + +static inline void ublk_queued_tgt_io(struct ublk_queue *q, unsigned tag, int queued) +{ + if (queued < 0) + ublk_complete_io(q, tag, queued); + else { + struct ublk_io *io = ublk_get_io(q, tag); + + q->io_inflight += queued; + io->tgt_ios = queued; + io->result = 0; + } +} + +static inline int ublk_completed_tgt_io(struct ublk_queue *q, unsigned tag) +{ + struct ublk_io *io = ublk_get_io(q, tag); + + q->io_inflight--; + + return --io->tgt_ios == 0; +} + +static inline int ublk_queue_use_zc(const struct ublk_queue *q) +{ + return q->state & UBLKSRV_ZC; +} + +extern const struct ublk_tgt_ops null_tgt_ops; +extern const struct ublk_tgt_ops loop_tgt_ops; +extern const struct ublk_tgt_ops stripe_tgt_ops; + +void backing_file_tgt_deinit(struct ublk_dev *dev); +int backing_file_tgt_init(struct ublk_dev *dev); + +static inline unsigned int ilog2(unsigned int x) +{ + if (x == 0) + return 0; + return (sizeof(x) * 8 - 1) - __builtin_clz(x); +} +#endif diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c new file mode 100644 index 000000000000..899875ff50fe --- /dev/null +++ b/tools/testing/selftests/ublk/null.c @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "kublk.h" + +#ifndef IORING_NOP_INJECT_RESULT +#define IORING_NOP_INJECT_RESULT (1U << 0) +#endif + +#ifndef IORING_NOP_FIXED_BUFFER +#define IORING_NOP_FIXED_BUFFER (1U << 3) +#endif + +static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) +{ + const struct ublksrv_ctrl_dev_info *info = &dev->dev_info; + unsigned long dev_size = 250UL << 30; + + dev->tgt.dev_size = dev_size; + dev->tgt.params = (struct ublk_params) { + .types = UBLK_PARAM_TYPE_BASIC, + .basic = { + .logical_bs_shift = 9, + .physical_bs_shift = 12, + .io_opt_shift = 12, + .io_min_shift = 9, + .max_sectors = info->max_io_buf_bytes >> 9, + .dev_sectors = dev_size >> 9, + }, + }; + + if (info->flags & UBLK_F_SUPPORT_ZERO_COPY) + dev->tgt.sq_depth = dev->tgt.cq_depth = 2 * info->queue_depth; + return 0; +} + +static int null_queue_zc_io(struct ublk_queue *q, int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + unsigned ublk_op = ublksrv_get_op(iod); + struct io_uring_sqe *sqe[3]; + + ublk_queue_alloc_sqes(q, sqe, 3); + + io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag); + sqe[0]->user_data = build_user_data(tag, + ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1); + sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; + + io_uring_prep_nop(sqe[1]); + sqe[1]->buf_index = tag; + sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK; + sqe[1]->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT; + sqe[1]->len = iod->nr_sectors << 9; /* injected result */ + sqe[1]->user_data = build_user_data(tag, ublk_op, 0, 1); + + io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, tag); + sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, 1); + + // buf register is marked as IOSQE_CQE_SKIP_SUCCESS + return 2; +} + +static void ublk_null_io_done(struct ublk_queue *q, int tag, + const struct io_uring_cqe *cqe) +{ + unsigned op = user_data_to_op(cqe->user_data); + struct ublk_io *io = ublk_get_io(q, tag); + + if (cqe->res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) { + if (!io->result) + io->result = cqe->res; + if (cqe->res < 0) + ublk_err("%s: io failed op %x user_data %lx\n", + __func__, op, cqe->user_data); + } + + /* buffer register op is IOSQE_CQE_SKIP_SUCCESS */ + if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF)) + io->tgt_ios += 1; + + if (ublk_completed_tgt_io(q, tag)) + ublk_complete_io(q, tag, io->result); +} + +static int ublk_null_queue_io(struct ublk_queue *q, int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + int zc = ublk_queue_use_zc(q); + int queued; + + if (!zc) { + ublk_complete_io(q, tag, iod->nr_sectors << 9); + return 0; + } + + queued = null_queue_zc_io(q, tag); + ublk_queued_tgt_io(q, tag, queued); + return 0; +} + +const struct ublk_tgt_ops null_tgt_ops = { + .name = "null", + .init_tgt = ublk_null_tgt_init, + .queue_io = ublk_null_queue_io, + .tgt_io_done = ublk_null_io_done, +}; diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c new file mode 100644 index 000000000000..98c564b12f3c --- /dev/null +++ b/tools/testing/selftests/ublk/stripe.c @@ -0,0 +1,318 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "kublk.h" + +#define NR_STRIPE MAX_BACK_FILES + +struct stripe_conf { + unsigned nr_files; + unsigned shift; +}; + +struct stripe { + loff_t start; + unsigned nr_sects; + int seq; + + struct iovec *vec; + unsigned nr_vec; + unsigned cap; +}; + +struct stripe_array { + struct stripe s[NR_STRIPE]; + unsigned nr; + struct iovec _vec[]; +}; + +static inline const struct stripe_conf *get_chunk_shift(const struct ublk_queue *q) +{ + return (struct stripe_conf *)q->dev->private_data; +} + +static inline unsigned calculate_nr_vec(const struct stripe_conf *conf, + const struct ublksrv_io_desc *iod) +{ + const unsigned shift = conf->shift - 9; + const unsigned unit_sects = conf->nr_files << shift; + loff_t start = iod->start_sector; + loff_t end = start + iod->nr_sectors; + + return (end / unit_sects) - (start / unit_sects) + 1; +} + +static struct stripe_array *alloc_stripe_array(const struct stripe_conf *conf, + const struct ublksrv_io_desc *iod) +{ + unsigned nr_vecs = calculate_nr_vec(conf, iod); + unsigned total = nr_vecs * conf->nr_files; + struct stripe_array *s; + int i; + + s = malloc(sizeof(*s) + total * sizeof(struct iovec)); + + s->nr = 0; + for (i = 0; i < conf->nr_files; i++) { + struct stripe *t = &s->s[i]; + + t->nr_vec = 0; + t->vec = &s->_vec[i * nr_vecs]; + t->nr_sects = 0; + t->cap = nr_vecs; + } + + return s; +} + +static void free_stripe_array(struct stripe_array *s) +{ + free(s); +} + +static void calculate_stripe_array(const struct stripe_conf *conf, + const struct ublksrv_io_desc *iod, struct stripe_array *s) +{ + const unsigned shift = conf->shift - 9; + const unsigned chunk_sects = 1 << shift; + const unsigned unit_sects = conf->nr_files << shift; + off64_t start = iod->start_sector; + off64_t end = start + iod->nr_sectors; + unsigned long done = 0; + unsigned idx = 0; + + while (start < end) { + unsigned nr_sects = chunk_sects - (start & (chunk_sects - 1)); + loff_t unit_off = (start / unit_sects) * unit_sects; + unsigned seq = (start - unit_off) >> shift; + struct stripe *this = &s->s[idx]; + loff_t stripe_off = (unit_off / conf->nr_files) + + (start & (chunk_sects - 1)); + + if (nr_sects > end - start) + nr_sects = end - start; + if (this->nr_sects == 0) { + this->nr_sects = nr_sects; + this->start = stripe_off; + this->seq = seq; + s->nr += 1; + } else { + assert(seq == this->seq); + assert(this->start + this->nr_sects == stripe_off); + this->nr_sects += nr_sects; + } + + assert(this->nr_vec < this->cap); + this->vec[this->nr_vec].iov_base = (void *)(iod->addr + done); + this->vec[this->nr_vec++].iov_len = nr_sects << 9; + + start += nr_sects; + done += nr_sects << 9; + idx = (idx + 1) % conf->nr_files; + } +} + +static inline enum io_uring_op stripe_to_uring_op(const struct ublksrv_io_desc *iod) +{ + unsigned ublk_op = ublksrv_get_op(iod); + + if (ublk_op == UBLK_IO_OP_READ) + return IORING_OP_READV; + else if (ublk_op == UBLK_IO_OP_WRITE) + return IORING_OP_WRITEV; + assert(0); +} + +static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) +{ + const struct stripe_conf *conf = get_chunk_shift(q); + enum io_uring_op op = stripe_to_uring_op(iod); + struct io_uring_sqe *sqe[NR_STRIPE]; + struct stripe_array *s = alloc_stripe_array(conf, iod); + struct ublk_io *io = ublk_get_io(q, tag); + int i; + + io->private_data = s; + calculate_stripe_array(conf, iod, s); + + ublk_queue_alloc_sqes(q, sqe, s->nr); + for (i = 0; i < s->nr; i++) { + struct stripe *t = &s->s[i]; + + io_uring_prep_rw(op, sqe[i], + t->seq + 1, + (void *)t->vec, + t->nr_vec, + t->start << 9); + io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); + /* bit63 marks us as tgt io */ + sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i, 1); + } + return s->nr; +} + +static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) +{ + const struct stripe_conf *conf = get_chunk_shift(q); + struct io_uring_sqe *sqe[NR_STRIPE]; + int i; + + ublk_queue_alloc_sqes(q, sqe, conf->nr_files); + for (i = 0; i < conf->nr_files; i++) { + io_uring_prep_fsync(sqe[i], i + 1, IORING_FSYNC_DATASYNC); + io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); + sqe[i]->user_data = build_user_data(tag, UBLK_IO_OP_FLUSH, 0, 1); + } + return conf->nr_files; +} + +static int stripe_queue_tgt_io(struct ublk_queue *q, int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + unsigned ublk_op = ublksrv_get_op(iod); + int ret = 0; + + switch (ublk_op) { + case UBLK_IO_OP_FLUSH: + ret = handle_flush(q, iod, tag); + break; + case UBLK_IO_OP_WRITE_ZEROES: + case UBLK_IO_OP_DISCARD: + ret = -ENOTSUP; + break; + case UBLK_IO_OP_READ: + case UBLK_IO_OP_WRITE: + ret = stripe_queue_tgt_rw_io(q, iod, tag); + break; + default: + ret = -EINVAL; + break; + } + ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u ret %d\n", __func__, tag, + iod->op_flags, iod->start_sector, iod->nr_sectors << 9, ret); + return ret; +} + +static int ublk_stripe_queue_io(struct ublk_queue *q, int tag) +{ + int queued = stripe_queue_tgt_io(q, tag); + + ublk_queued_tgt_io(q, tag, queued); + return 0; +} + +static void ublk_stripe_io_done(struct ublk_queue *q, int tag, + const struct io_uring_cqe *cqe) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + unsigned op = user_data_to_op(cqe->user_data); + struct ublk_io *io = ublk_get_io(q, tag); + int res = cqe->res; + + if (res < 0) { + if (!io->result) + io->result = res; + ublk_err("%s: io failure %d tag %u\n", __func__, res, tag); + } + + /* fail short READ/WRITE simply */ + if (op == UBLK_IO_OP_READ || op == UBLK_IO_OP_WRITE) { + unsigned seq = user_data_to_tgt_data(cqe->user_data); + struct stripe_array *s = io->private_data; + + if (res < s->s[seq].vec->iov_len) + io->result = -EIO; + } + + if (ublk_completed_tgt_io(q, tag)) { + int res = io->result; + + if (!res) + res = iod->nr_sectors << 9; + + ublk_complete_io(q, tag, res); + + free_stripe_array(io->private_data); + io->private_data = NULL; + } +} + +static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) +{ + struct ublk_params p = { + .types = UBLK_PARAM_TYPE_BASIC, + .basic = { + .attrs = UBLK_ATTR_VOLATILE_CACHE, + .logical_bs_shift = 9, + .physical_bs_shift = 12, + .io_opt_shift = 12, + .io_min_shift = 9, + .max_sectors = dev->dev_info.max_io_buf_bytes >> 9, + }, + }; + unsigned chunk_size = ctx->chunk_size; + struct stripe_conf *conf; + unsigned chunk_shift; + loff_t bytes = 0; + int ret, i; + + if ((chunk_size & (chunk_size - 1)) || !chunk_size) { + ublk_err("invalid chunk size %u\n", chunk_size); + return -EINVAL; + } + + if (chunk_size < 4096 || chunk_size > 512 * 1024) { + ublk_err("invalid chunk size %u\n", chunk_size); + return -EINVAL; + } + + chunk_shift = ilog2(chunk_size); + + ret = backing_file_tgt_init(dev); + if (ret) + return ret; + + if (!dev->tgt.nr_backing_files || dev->tgt.nr_backing_files > NR_STRIPE) + return -EINVAL; + + assert(dev->nr_fds == dev->tgt.nr_backing_files + 1); + + for (i = 0; i < dev->tgt.nr_backing_files; i++) + dev->tgt.backing_file_size[i] &= ~((1 << chunk_shift) - 1); + + for (i = 0; i < dev->tgt.nr_backing_files; i++) { + unsigned long size = dev->tgt.backing_file_size[i]; + + if (size != dev->tgt.backing_file_size[0]) + return -EINVAL; + bytes += size; + } + + conf = malloc(sizeof(*conf)); + conf->shift = chunk_shift; + conf->nr_files = dev->tgt.nr_backing_files; + + dev->private_data = conf; + dev->tgt.dev_size = bytes; + p.basic.dev_sectors = bytes >> 9; + dev->tgt.params = p; + dev->tgt.sq_depth = dev->dev_info.queue_depth * conf->nr_files; + dev->tgt.cq_depth = dev->dev_info.queue_depth * conf->nr_files; + + printf("%s: shift %u files %u\n", __func__, conf->shift, conf->nr_files); + + return 0; +} + +static void ublk_stripe_tgt_deinit(struct ublk_dev *dev) +{ + free(dev->private_data); + backing_file_tgt_deinit(dev); +} + +const struct ublk_tgt_ops stripe_tgt_ops = { + .name = "stripe", + .init_tgt = ublk_stripe_tgt_init, + .deinit_tgt = ublk_stripe_tgt_deinit, + .queue_io = ublk_stripe_queue_io, + .tgt_io_done = ublk_stripe_io_done, +}; diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh new file mode 100755 index 000000000000..75f54ac6b1c4 --- /dev/null +++ b/tools/testing/selftests/ublk/test_common.sh @@ -0,0 +1,246 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +UBLK_SKIP_CODE=4 + +_have_program() { + if command -v "$1" >/dev/null 2>&1; then + return 0 + fi + return 1 +} + +_get_disk_dev_t() { + local dev_id=$1 + local dev + local major + local minor + + dev=/dev/ublkb"${dev_id}" + major=$(stat -c '%Hr' "$dev") + minor=$(stat -c '%Lr' "$dev") + + echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) )) +} + +_create_backfile() { + local my_size=$1 + local my_file + + my_file=$(mktemp ublk_file_"${my_size}"_XXXXX) + truncate -s "${my_size}" "${my_file}" + echo "$my_file" +} + +_remove_backfile() { + local file=$1 + + [ -f "$file" ] && rm -f "$file" +} + +_create_tmp_dir() { + local my_file; + + my_file=$(mktemp -d ublk_dir_XXXXX) + echo "$my_file" +} + +_remove_tmp_dir() { + local dir=$1 + + [ -d "$dir" ] && rmdir "$dir" +} + +_mkfs_mount_test() +{ + local dev=$1 + local err_code=0 + local mnt_dir; + + mnt_dir=$(_create_tmp_dir) + mkfs.ext4 -F "$dev" > /dev/null 2>&1 + err_code=$? + if [ $err_code -ne 0 ]; then + return $err_code + fi + + mount -t ext4 "$dev" "$mnt_dir" > /dev/null 2>&1 + umount "$dev" + err_code=$? + _remove_tmp_dir "$mnt_dir" + if [ $err_code -ne 0 ]; then + return $err_code + fi +} + +_check_root() { + local ksft_skip=4 + + if [ $UID != 0 ]; then + echo please run this as root >&2 + exit $ksft_skip + fi +} + +_remove_ublk_devices() { + ${UBLK_PROG} del -a + modprobe -r ublk_drv > /dev/null 2>&1 +} + +_get_ublk_dev_state() { + ${UBLK_PROG} list -n "$1" | grep "state" | awk '{print $11}' +} + +_get_ublk_daemon_pid() { + ${UBLK_PROG} list -n "$1" | grep "pid" | awk '{print $7}' +} + +_prep_test() { + _check_root + local type=$1 + shift 1 + modprobe ublk_drv > /dev/null 2>&1 + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "ublk $type: $*" +} + +_remove_test_files() +{ + local files=$* + + for file in ${files}; do + [ -f "${file}" ] && rm -f "${file}" + done +} + +_show_result() +{ + if [ "$UBLK_TEST_SHOW_RESULT" -ne 0 ]; then + if [ "$2" -eq 0 ]; then + echo "$1 : [PASS]" + elif [ "$2" -eq 4 ]; then + echo "$1 : [SKIP]" + else + echo "$1 : [FAIL]" + fi + fi + [ "$2" -ne 0 ] && exit "$2" + return 0 +} + +# don't call from sub-shell, otherwise can't exit +_check_add_dev() +{ + local tid=$1 + local code=$2 + shift 2 + if [ "${code}" -ne 0 ]; then + _remove_test_files "$@" + _show_result "${tid}" "${code}" + fi +} + +_cleanup_test() { + "${UBLK_PROG}" del -a + rm -f "$UBLK_TMP" +} + +_have_feature() +{ + if $UBLK_PROG "features" | grep "$1" > /dev/null 2>&1; then + return 0 + fi + return 1 +} + +_add_ublk_dev() { + local kublk_temp; + local dev_id; + + if [ ! -c /dev/ublk-control ]; then + return ${UBLK_SKIP_CODE} + fi + if echo "$@" | grep -q "\-z"; then + if ! _have_feature "ZERO_COPY"; then + return ${UBLK_SKIP_CODE} + fi + fi + + kublk_temp=$(mktemp /tmp/kublk-XXXXXX) + if ! "${UBLK_PROG}" add "$@" > "${kublk_temp}" 2>&1; then + echo "fail to add ublk dev $*" + rm -f "${kublk_temp}" + return 255 + fi + + dev_id=$(grep "dev id" "${kublk_temp}" | awk -F '[ :]' '{print $3}') + udevadm settle + rm -f "${kublk_temp}" + echo "${dev_id}" +} + +# kill the ublk daemon and return ublk device state +__ublk_kill_daemon() +{ + local dev_id=$1 + local exp_state=$2 + local daemon_pid + local state + + daemon_pid=$(_get_ublk_daemon_pid "${dev_id}") + state=$(_get_ublk_dev_state "${dev_id}") + + for ((j=0;j<50;j++)); do + [ "$state" == "$exp_state" ] && break + kill -9 "$daemon_pid" > /dev/null 2>&1 + sleep 1 + state=$(_get_ublk_dev_state "${dev_id}") + done + echo "$state" +} + +__remove_ublk_dev_return() { + local dev_id=$1 + + ${UBLK_PROG} del -n "${dev_id}" + local res=$? + udevadm settle + return ${res} +} + +__run_io_and_remove() +{ + local dev_id=$1 + local size=$2 + local kill_server=$3 + + fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ + --rw=readwrite --iodepth=64 --size="${size}" --numjobs=4 \ + --runtime=20 --time_based > /dev/null 2>&1 & + sleep 2 + if [ "${kill_server}" = "yes" ]; then + local state + state=$(__ublk_kill_daemon "${dev_id}" "DEAD") + if [ "$state" != "DEAD" ]; then + echo "device isn't dead($state) after killing daemon" + return 255 + fi + fi + if ! __remove_ublk_dev_return "${dev_id}"; then + echo "delete dev ${dev_id} failed" + return 255 + fi + wait +} + +_ublk_test_top_dir() +{ + cd "$(dirname "$0")" && pwd +} + +UBLK_TMP=$(mktemp ublk_test_XXXXX) +UBLK_PROG=$(_ublk_test_top_dir)/kublk +UBLK_TEST_QUIET=1 +UBLK_TEST_SHOW_RESULT=1 +export UBLK_PROG +export UBLK_TEST_QUIET +export UBLK_TEST_SHOW_RESULT diff --git a/tools/testing/selftests/ublk/test_generic_01.sh b/tools/testing/selftests/ublk/test_generic_01.sh new file mode 100755 index 000000000000..9227a208ba53 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_01.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_01" +ERR_CODE=0 + +if ! _have_program bpftrace; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "sequential io order" + +dev_id=$(_add_ublk_dev -t null) +_check_add_dev $TID $? + +dev_t=$(_get_disk_dev_t "$dev_id") +bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 & +btrace_pid=$! +sleep 2 + +if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then + _cleanup_test "null" + exit "$UBLK_SKIP_CODE" +fi + +# run fio over this ublk disk +fio --name=write_seq \ + --filename=/dev/ublkb"${dev_id}" \ + --ioengine=libaio --iodepth=16 \ + --rw=write \ + --size=512M \ + --direct=1 \ + --bs=4k > /dev/null 2>&1 +ERR_CODE=$? +kill "$btrace_pid" +wait +if grep -q "io_out_of_order" "$UBLK_TMP"; then + cat "$UBLK_TMP" + ERR_CODE=255 +fi +_cleanup_test "null" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_01.sh b/tools/testing/selftests/ublk/test_loop_01.sh new file mode 100755 index 000000000000..c882d2a08e13 --- /dev/null +++ b/tools/testing/selftests/ublk/test_loop_01.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="loop_01" +ERR_CODE=0 + +_prep_test "loop" "write and verify test" + +backfile_0=$(_create_backfile 256M) + +dev_id=$(_add_ublk_dev -t loop "$backfile_0") +_check_add_dev $TID $? "${backfile_0}" + +# run fio over the ublk disk +fio --name=write_and_verify \ + --filename=/dev/ublkb"${dev_id}" \ + --ioengine=libaio --iodepth=16 \ + --rw=write \ + --size=256M \ + --direct=1 \ + --verify=crc32c \ + --do_verify=1 \ + --bs=4k > /dev/null 2>&1 +ERR_CODE=$? + +_cleanup_test "loop" + +_remove_backfile "$backfile_0" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_02.sh b/tools/testing/selftests/ublk/test_loop_02.sh new file mode 100755 index 000000000000..03863d825e07 --- /dev/null +++ b/tools/testing/selftests/ublk/test_loop_02.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="loop_02" +ERR_CODE=0 + +_prep_test "loop" "mkfs & mount & umount" + +backfile_0=$(_create_backfile 256M) +dev_id=$(_add_ublk_dev -t loop "$backfile_0") +_check_add_dev $TID $? "$backfile_0" + +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test "loop" + +_remove_backfile "$backfile_0" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_03.sh b/tools/testing/selftests/ublk/test_loop_03.sh new file mode 100755 index 000000000000..269c96787d7d --- /dev/null +++ b/tools/testing/selftests/ublk/test_loop_03.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="loop_03" +ERR_CODE=0 + +_prep_test "loop" "write and verify over zero copy" + +backfile_0=$(_create_backfile 256M) +dev_id=$(_add_ublk_dev -t loop -z "$backfile_0") +_check_add_dev $TID $? "$backfile_0" + +# run fio over the ublk disk +fio --name=write_and_verify \ + --filename=/dev/ublkb"${dev_id}" \ + --ioengine=libaio --iodepth=64 \ + --rw=write \ + --size=256M \ + --direct=1 \ + --verify=crc32c \ + --do_verify=1 \ + --bs=4k > /dev/null 2>&1 +ERR_CODE=$? + +_cleanup_test "loop" + +_remove_backfile "$backfile_0" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_04.sh b/tools/testing/selftests/ublk/test_loop_04.sh new file mode 100755 index 000000000000..1435422c38ec --- /dev/null +++ b/tools/testing/selftests/ublk/test_loop_04.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="loop_04" +ERR_CODE=0 + +_prep_test "loop" "mkfs & mount & umount with zero copy" + +backfile_0=$(_create_backfile 256M) +dev_id=$(_add_ublk_dev -t loop -z "$backfile_0") +_check_add_dev $TID $? "$backfile_0" + +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test "loop" + +_remove_backfile "$backfile_0" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_null_01.sh b/tools/testing/selftests/ublk/test_null_01.sh new file mode 100755 index 000000000000..a34203f72668 --- /dev/null +++ b/tools/testing/selftests/ublk/test_null_01.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="null_01" +ERR_CODE=0 + +_prep_test "null" "basic IO test" + +dev_id=$(_add_ublk_dev -t null) +_check_add_dev $TID $? + +# run fio over the two disks +fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1 +ERR_CODE=$? + +_cleanup_test "null" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_null_02.sh b/tools/testing/selftests/ublk/test_null_02.sh new file mode 100755 index 000000000000..5633ca876655 --- /dev/null +++ b/tools/testing/selftests/ublk/test_null_02.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="null_02" +ERR_CODE=0 + +_prep_test "null" "basic IO test with zero copy" + +dev_id=$(_add_ublk_dev -t null -z) +_check_add_dev $TID $? + +# run fio over the two disks +fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1 +ERR_CODE=$? + +_cleanup_test "null" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh new file mode 100755 index 000000000000..7177f6c57bc5 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_01.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +TID="stress_01" +ERR_CODE=0 +DEV_ID=-1 + +ublk_io_and_remove() +{ + local size=$1 + shift 1 + local backfile="" + if echo "$@" | grep -q "loop"; then + backfile=${*: -1} + fi + DEV_ID=$(_add_ublk_dev "$@") + _check_add_dev $TID $? "${backfile}" + + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" + if ! __run_io_and_remove "${DEV_ID}" "${size}" "no"; then + echo "/dev/ublkc${DEV_ID} isn't removed" + _remove_backfile "${backfile}" + exit 255 + fi +} + +_prep_test "stress" "run IO and remove device" + +ublk_io_and_remove 8G -t null +ERR_CODE=$? +if [ ${ERR_CODE} -ne 0 ]; then + _show_result $TID $ERR_CODE +fi + +BACK_FILE=$(_create_backfile 256M) +ublk_io_and_remove 256M -t loop "${BACK_FILE}" +ERR_CODE=$? +if [ ${ERR_CODE} -ne 0 ]; then + _show_result $TID $ERR_CODE +fi + +ublk_io_and_remove 256M -t loop -z "${BACK_FILE}" +ERR_CODE=$? +_cleanup_test "stress" +_remove_backfile "${BACK_FILE}" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh new file mode 100755 index 000000000000..2a8e60579a06 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_02.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +TID="stress_02" +ERR_CODE=0 +DEV_ID=-1 + +ublk_io_and_kill_daemon() +{ + local size=$1 + shift 1 + local backfile="" + if echo "$@" | grep -q "loop"; then + backfile=${*: -1} + fi + DEV_ID=$(_add_ublk_dev "$@") + _check_add_dev $TID $? "${backfile}" + + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)" + if ! __run_io_and_remove "${DEV_ID}" "${size}" "yes"; then + echo "/dev/ublkc${DEV_ID} isn't removed res ${res}" + _remove_backfile "${backfile}" + exit 255 + fi +} + +_prep_test "stress" "run IO and kill ublk server" + +ublk_io_and_kill_daemon 8G -t null +ERR_CODE=$? +if [ ${ERR_CODE} -ne 0 ]; then + _show_result $TID $ERR_CODE +fi + +BACK_FILE=$(_create_backfile 256M) +ublk_io_and_kill_daemon 256M -t loop "${BACK_FILE}" +ERR_CODE=$? +if [ ${ERR_CODE} -ne 0 ]; then + _show_result $TID $ERR_CODE +fi + +ublk_io_and_kill_daemon 256M -t loop -z "${BACK_FILE}" +ERR_CODE=$? +_cleanup_test "stress" +_remove_backfile "${BACK_FILE}" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_01.sh b/tools/testing/selftests/ublk/test_stripe_01.sh new file mode 100755 index 000000000000..c01f3dc325ab --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_01.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="stripe_01" +ERR_CODE=0 + +_prep_test "stripe" "write and verify test" + +backfile_0=$(_create_backfile 256M) +backfile_1=$(_create_backfile 256M) + +dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1") +_check_add_dev $TID $? "${backfile_0}" + +# run fio over the ublk disk +fio --name=write_and_verify \ + --filename=/dev/ublkb"${dev_id}" \ + --ioengine=libaio --iodepth=32 \ + --rw=write \ + --size=512M \ + --direct=1 \ + --verify=crc32c \ + --do_verify=1 \ + --bs=4k > /dev/null 2>&1 +ERR_CODE=$? + +_cleanup_test "stripe" + +_remove_backfile "$backfile_0" +_remove_backfile "$backfile_1" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_02.sh b/tools/testing/selftests/ublk/test_stripe_02.sh new file mode 100755 index 000000000000..e8a45fa82dde --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_02.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="stripe_02" +ERR_CODE=0 + +_prep_test "stripe" "mkfs & mount & umount" + +backfile_0=$(_create_backfile 256M) +backfile_1=$(_create_backfile 256M) +dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1") +_check_add_dev $TID $? "$backfile_0" "$backfile_1" + +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test "stripe" + +_remove_backfile "$backfile_0" +_remove_backfile "$backfile_1" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/trace/seq_io.bt b/tools/testing/selftests/ublk/trace/seq_io.bt new file mode 100644 index 000000000000..272ac54c9d5f --- /dev/null +++ b/tools/testing/selftests/ublk/trace/seq_io.bt @@ -0,0 +1,25 @@ +/* + $1: dev_t + $2: RWBS + $3: strlen($2) +*/ +BEGIN { + @last_rw[$1, str($2)] = 0; +} +tracepoint:block:block_rq_complete +{ + $dev = $1; + if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) { + $last = @last_rw[$dev, str($2)]; + if ((uint64)args.sector != $last) { + printf("io_out_of_order: exp %llu actual %llu\n", + args.sector, $last); + } + @last_rw[$dev, str($2)] = (args.sector + args.nr_sector); + } + @ios = count(); +} + +END { + clear(@last_rw); +} diff --git a/tools/testing/selftests/ublk/ublk_dep.h b/tools/testing/selftests/ublk/ublk_dep.h new file mode 100644 index 000000000000..f68fa7eac939 --- /dev/null +++ b/tools/testing/selftests/ublk/ublk_dep.h @@ -0,0 +1,18 @@ +#ifndef UBLK_DEP_H +#define UBLK_DEP_H + +#ifndef UBLK_U_IO_REGISTER_IO_BUF +#define UBLK_U_IO_REGISTER_IO_BUF \ + _IOWR('u', 0x23, struct ublksrv_io_cmd) +#define UBLK_U_IO_UNREGISTER_IO_BUF \ + _IOWR('u', 0x24, struct ublksrv_io_cmd) +#endif + +#ifndef UBLK_F_USER_RECOVERY_FAIL_IO +#define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9) +#endif + +#ifndef UBLK_F_ZONED +#define UBLK_F_ZONED (1ULL << 8) +#endif +#endif diff --git a/tools/testing/selftests/user_events/dyn_test.c b/tools/testing/selftests/user_events/dyn_test.c index bdf9ab127488..54c9412f8dee 100644 --- a/tools/testing/selftests/user_events/dyn_test.c +++ b/tools/testing/selftests/user_events/dyn_test.c @@ -127,6 +127,8 @@ static int parse_abi(int *check, const char *value) close(fd); + wait_for_delete(); + return ret; } diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 1cf14a8da438..12a0614b9fd4 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -19,13 +19,20 @@ LDLIBS += -lgcc_s endif include ../lib.mk + +CFLAGS += $(TOOLS_INCLUDES) + +CFLAGS_NOLIBC := -nostdlib -nostdinc -ffreestanding -fno-asynchronous-unwind-tables \ + -fno-stack-protector -include $(top_srcdir)/tools/include/nolibc/nolibc.h \ + -I$(top_srcdir)/tools/include/nolibc/ $(KHDR_INCLUDES) + $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c $(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c $(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c -$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c -$(OUTPUT)/vdso_standalone_test_x86: CFLAGS +=-nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector +$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c | headers +$(OUTPUT)/vdso_standalone_test_x86: CFLAGS:=$(CFLAGS_NOLIBC) $(CFLAGS) $(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c $(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index f89d052c730e..3ff00fb624a4 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -19,13 +19,14 @@ #include <stdint.h> #include <string.h> #include <limits.h> -#include <elf.h> +#include <linux/auxvec.h> +#include <linux/elf.h> #include "parse_vdso.h" /* And here's the code. */ #ifndef ELF_BITS -# if ULONG_MAX > 0xffffffffUL +# if __SIZEOF_LONG__ >= 8 # define ELF_BITS 64 # else # define ELF_BITS 32 @@ -297,17 +298,3 @@ void *vdso_sym(const char *version, const char *name) return 0; } - -void vdso_init_from_auxv(void *auxv) -{ - ELF(auxv_t) *elf_auxv = auxv; - for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++) - { - if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) { - vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val); - return; - } - } - - vdso_info.valid = false; -} diff --git a/tools/testing/selftests/vDSO/parse_vdso.h b/tools/testing/selftests/vDSO/parse_vdso.h index de0453067d7c..09d068ed11f9 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.h +++ b/tools/testing/selftests/vDSO/parse_vdso.h @@ -26,6 +26,5 @@ */ void *vdso_sym(const char *version, const char *name); void vdso_init_from_sysinfo_ehdr(uintptr_t base); -void vdso_init_from_auxv(void *auxv); #endif diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c index 644915862af8..9ce795b806f0 100644 --- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c +++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c @@ -1,142 +1,58 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * vdso_test.c: Sample code to test parse_vdso.c on x86 - * Copyright (c) 2011-2014 Andy Lutomirski + * vdso_test_gettimeofday.c: Sample code to test parse_vdso.c and + * vDSO gettimeofday() + * Copyright (c) 2014 Andy Lutomirski * - * You can amuse yourself by compiling with: - * gcc -std=gnu99 -nostdlib - * -Os -fno-asynchronous-unwind-tables -flto -lgcc_s - * vdso_standalone_test_x86.c parse_vdso.c - * to generate a small binary. On x86_64, you can omit -lgcc_s - * if you want the binary to be completely standalone. + * Compile with: + * gcc -std=gnu99 vdso_test_gettimeofday.c parse_vdso_gettimeofday.c + * + * Tested on x86, 32-bit and 64-bit. It may work on other architectures, too. */ -#include <sys/syscall.h> +#include <stdio.h> +#ifndef NOLIBC +#include <sys/auxv.h> #include <sys/time.h> -#include <unistd.h> -#include <stdint.h> - -#include "parse_vdso.h" - -/* We need some libc functions... */ -int strcmp(const char *a, const char *b) -{ - /* This implementation is buggy: it never returns -1. */ - while (*a || *b) { - if (*a != *b) - return 1; - if (*a == 0 || *b == 0) - return 1; - a++; - b++; - } - - return 0; -} - -/* - * The clang build needs this, although gcc does not. - * Stolen from lib/string.c. - */ -void *memcpy(void *dest, const void *src, size_t count) -{ - char *tmp = dest; - const char *s = src; - - while (count--) - *tmp++ = *s++; - return dest; -} - -/* ...and two syscalls. This is x86-specific. */ -static inline long x86_syscall3(long nr, long a0, long a1, long a2) -{ - long ret; -#ifdef __x86_64__ - asm volatile ("syscall" : "=a" (ret) : "a" (nr), - "D" (a0), "S" (a1), "d" (a2) : - "cc", "memory", "rcx", - "r8", "r9", "r10", "r11" ); -#else - asm volatile ("int $0x80" : "=a" (ret) : "a" (nr), - "b" (a0), "c" (a1), "d" (a2) : - "cc", "memory" ); #endif - return ret; -} -static inline long linux_write(int fd, const void *data, size_t len) -{ - return x86_syscall3(__NR_write, fd, (long)data, (long)len); -} +#include "../kselftest.h" +#include "parse_vdso.h" +#include "vdso_config.h" +#include "vdso_call.h" -static inline void linux_exit(int code) +int main(int argc, char **argv) { - x86_syscall3(__NR_exit, code, 0, 0); -} + const char *version = versions[VDSO_VERSION]; + const char **name = (const char **)&names[VDSO_NAMES]; -void to_base10(char *lastdig, time_t n) -{ - while (n) { - *lastdig = (n % 10) + '0'; - n /= 10; - lastdig--; + unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); + if (!sysinfo_ehdr) { + printf("AT_SYSINFO_EHDR is not present!\n"); + return KSFT_SKIP; } -} - -void c_main(void **stack) -{ - /* Parse the stack */ - long argc = (long)*stack; - stack += argc + 2; - - /* Now we're pointing at the environment. Skip it. */ - while(*stack) - stack++; - stack++; - /* Now we're pointing at auxv. Initialize the vDSO parser. */ - vdso_init_from_auxv((void *)stack); + vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR)); /* Find gettimeofday. */ typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); - gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday"); + gtod_t gtod = (gtod_t)vdso_sym(version, name[0]); - if (!gtod) - linux_exit(1); + if (!gtod) { + printf("Could not find %s\n", name[0]); + return KSFT_SKIP; + } struct timeval tv; - long ret = gtod(&tv, 0); + long ret = VDSO_CALL(gtod, 2, &tv, 0); if (ret == 0) { - char buf[] = "The time is .000000\n"; - to_base10(buf + 31, tv.tv_sec); - to_base10(buf + 38, tv.tv_usec); - linux_write(1, buf, sizeof(buf) - 1); + printf("The time is %lld.%06lld\n", + (long long)tv.tv_sec, (long long)tv.tv_usec); } else { - linux_exit(ret); + printf("%s failed\n", name[0]); + return KSFT_FAIL; } - linux_exit(0); + return 0; } - -/* - * This is the real entry point. It passes the initial stack into - * the C entry point. - */ -asm ( - ".text\n" - ".global _start\n" - ".type _start,@function\n" - "_start:\n\t" -#ifdef __x86_64__ - "mov %rsp,%rdi\n\t" - "and $-16,%rsp\n\t" - "sub $8,%rsp\n\t" - "jmp c_main" -#else - "push %esp\n\t" - "call c_main\n\t" - "int $3" -#endif - ); diff --git a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c index e31b18ffae33..9ce795b806f0 100644 --- a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c +++ b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c @@ -10,11 +10,11 @@ * Tested on x86, 32-bit and 64-bit. It may work on other architectures, too. */ -#include <stdint.h> -#include <elf.h> #include <stdio.h> +#ifndef NOLIBC #include <sys/auxv.h> #include <sys/time.h> +#endif #include "../kselftest.h" #include "parse_vdso.h" diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index 139fd9aa8b12..c305d2f613f0 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -27,7 +27,6 @@ CONFIG_DEBUG_KMEMLEAK=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_SHIRQ=y CONFIG_WQ_WATCHDOG=y -CONFIG_SCHED_DEBUG=y CONFIG_SCHED_INFO=y CONFIG_SCHEDSTATS=y CONFIG_SCHED_STACK_END_CHECK=y diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index d51249f14e2f..28422c32cc8f 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -19,7 +19,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \ - corrupt_xstate_header amx lam test_shadow_stack + corrupt_xstate_header amx lam test_shadow_stack avx # Some selftests require 32bit support enabled also on 64bit systems TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall @@ -132,3 +132,7 @@ $(OUTPUT)/check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static $(OUTPUT)/nx_stack_32: CFLAGS += -Wl,-z,noexecstack $(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack + +$(OUTPUT)/avx_64: CFLAGS += -mno-avx -mno-avx512f +$(OUTPUT)/amx_64: EXTRA_FILES += xstate.c +$(OUTPUT)/avx_64: EXTRA_FILES += xstate.c diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c index 1fdf35a4d7f6..40769c16de1b 100644 --- a/tools/testing/selftests/x86/amx.c +++ b/tools/testing/selftests/x86/amx.c @@ -3,7 +3,6 @@ #define _GNU_SOURCE #include <err.h> #include <errno.h> -#include <pthread.h> #include <setjmp.h> #include <stdio.h> #include <string.h> @@ -14,169 +13,27 @@ #include <sys/auxv.h> #include <sys/mman.h> #include <sys/shm.h> -#include <sys/ptrace.h> #include <sys/syscall.h> #include <sys/wait.h> -#include <sys/uio.h> -#include "../kselftest.h" /* For __cpuid_count() */ +#include "helpers.h" +#include "xstate.h" #ifndef __x86_64__ # error This test is 64-bit only #endif -#define XSAVE_HDR_OFFSET 512 -#define XSAVE_HDR_SIZE 64 - -struct xsave_buffer { - union { - struct { - char legacy[XSAVE_HDR_OFFSET]; - char header[XSAVE_HDR_SIZE]; - char extended[0]; - }; - char bytes[0]; - }; -}; - -static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm) -{ - uint32_t rfbm_lo = rfbm; - uint32_t rfbm_hi = rfbm >> 32; - - asm volatile("xsave (%%rdi)" - : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi) - : "memory"); -} - -static inline void xrstor(struct xsave_buffer *xbuf, uint64_t rfbm) -{ - uint32_t rfbm_lo = rfbm; - uint32_t rfbm_hi = rfbm >> 32; - - asm volatile("xrstor (%%rdi)" - : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)); -} - /* err() exits and will not return */ #define fatal_error(msg, ...) err(1, "[FAIL]\t" msg, ##__VA_ARGS__) -static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), - int flags) -{ - struct sigaction sa; - - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO | flags; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - fatal_error("sigaction"); -} - -static void clearhandler(int sig) -{ - struct sigaction sa; - - memset(&sa, 0, sizeof(sa)); - sa.sa_handler = SIG_DFL; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - fatal_error("sigaction"); -} - -#define XFEATURE_XTILECFG 17 -#define XFEATURE_XTILEDATA 18 #define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG) #define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA) #define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA) -#define CPUID_LEAF1_ECX_XSAVE_MASK (1 << 26) -#define CPUID_LEAF1_ECX_OSXSAVE_MASK (1 << 27) - -static uint32_t xbuf_size; - -static struct { - uint32_t xbuf_offset; - uint32_t size; -} xtiledata; - -#define CPUID_LEAF_XSTATE 0xd -#define CPUID_SUBLEAF_XSTATE_USER 0x0 -#define TILE_CPUID 0x1d -#define TILE_PALETTE_ID 0x1 - -static void check_cpuid_xtiledata(void) -{ - uint32_t eax, ebx, ecx, edx; - - __cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER, - eax, ebx, ecx, edx); - - /* - * EBX enumerates the size (in bytes) required by the XSAVE - * instruction for an XSAVE area containing all the user state - * components corresponding to bits currently set in XCR0. - * - * Stash that off so it can be used to allocate buffers later. - */ - xbuf_size = ebx; - - __cpuid_count(CPUID_LEAF_XSTATE, XFEATURE_XTILEDATA, - eax, ebx, ecx, edx); - /* - * eax: XTILEDATA state component size - * ebx: XTILEDATA state component offset in user buffer - */ - if (!eax || !ebx) - fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d", - eax, ebx); - - xtiledata.size = eax; - xtiledata.xbuf_offset = ebx; -} +struct xstate_info xtiledata; /* The helpers for managing XSAVE buffer and tile states: */ -struct xsave_buffer *alloc_xbuf(void) -{ - struct xsave_buffer *xbuf; - - /* XSAVE buffer should be 64B-aligned. */ - xbuf = aligned_alloc(64, xbuf_size); - if (!xbuf) - fatal_error("aligned_alloc()"); - return xbuf; -} - -static inline void clear_xstate_header(struct xsave_buffer *buffer) -{ - memset(&buffer->header, 0, sizeof(buffer->header)); -} - -static inline void set_xstatebv(struct xsave_buffer *buffer, uint64_t bv) -{ - /* XSTATE_BV is at the beginning of the header: */ - *(uint64_t *)(&buffer->header) = bv; -} - -static void set_rand_tiledata(struct xsave_buffer *xbuf) -{ - int *ptr = (int *)&xbuf->bytes[xtiledata.xbuf_offset]; - int data; - int i; - - /* - * Ensure that 'data' is never 0. This ensures that - * the registers are never in their initial configuration - * and thus never tracked as being in the init state. - */ - data = rand() | 1; - - for (i = 0; i < xtiledata.size / sizeof(int); i++, ptr++) - *ptr = data; -} - struct xsave_buffer *stashed_xsave; static void init_stashed_xsave(void) @@ -192,21 +49,6 @@ static void free_stashed_xsave(void) free(stashed_xsave); } -/* See 'struct _fpx_sw_bytes' at sigcontext.h */ -#define SW_BYTES_OFFSET 464 -/* N.B. The struct's field name varies so read from the offset. */ -#define SW_BYTES_BV_OFFSET (SW_BYTES_OFFSET + 8) - -static inline struct _fpx_sw_bytes *get_fpx_sw_bytes(void *buffer) -{ - return (struct _fpx_sw_bytes *)(buffer + SW_BYTES_OFFSET); -} - -static inline uint64_t get_fpx_sw_bytes_features(void *buffer) -{ - return *(uint64_t *)(buffer + SW_BYTES_BV_OFFSET); -} - /* Work around printf() being unsafe in signals: */ #define SIGNAL_BUF_LEN 1000 char signal_message_buffer[SIGNAL_BUF_LEN]; @@ -304,17 +146,10 @@ static inline bool load_rand_tiledata(struct xsave_buffer *xbuf) { clear_xstate_header(xbuf); set_xstatebv(xbuf, XFEATURE_MASK_XTILEDATA); - set_rand_tiledata(xbuf); + set_rand_data(&xtiledata, xbuf); return xrstor_safe(xbuf, XFEATURE_MASK_XTILEDATA); } -/* Return XTILEDATA to its initial configuration. */ -static inline void init_xtiledata(void) -{ - clear_xstate_header(stashed_xsave); - xrstor_safe(stashed_xsave, XFEATURE_MASK_XTILEDATA); -} - enum expected_result { FAIL_EXPECTED, SUCCESS_EXPECTED }; /* arch_prctl() and sigaltstack() test */ @@ -587,14 +422,6 @@ static inline bool __validate_tiledata_regs(struct xsave_buffer *xbuf1) return true; } -static inline void validate_tiledata_regs_same(struct xsave_buffer *xbuf) -{ - int ret = __validate_tiledata_regs(xbuf); - - if (ret != 0) - fatal_error("TILEDATA registers changed"); -} - static inline void validate_tiledata_regs_changed(struct xsave_buffer *xbuf) { int ret = __validate_tiledata_regs(xbuf); @@ -651,251 +478,6 @@ static void test_fork(void) _exit(0); } -/* Context switching test */ - -static struct _ctxtswtest_cfg { - unsigned int iterations; - unsigned int num_threads; -} ctxtswtest_config; - -struct futex_info { - pthread_t thread; - int nr; - pthread_mutex_t mutex; - struct futex_info *next; -}; - -static void *check_tiledata(void *info) -{ - struct futex_info *finfo = (struct futex_info *)info; - struct xsave_buffer *xbuf; - int i; - - xbuf = alloc_xbuf(); - if (!xbuf) - fatal_error("unable to allocate XSAVE buffer"); - - /* - * Load random data into 'xbuf' and then restore - * it to the tile registers themselves. - */ - load_rand_tiledata(xbuf); - for (i = 0; i < ctxtswtest_config.iterations; i++) { - pthread_mutex_lock(&finfo->mutex); - - /* - * Ensure the register values have not - * diverged from those recorded in 'xbuf'. - */ - validate_tiledata_regs_same(xbuf); - - /* Load new, random values into xbuf and registers */ - load_rand_tiledata(xbuf); - - /* - * The last thread's last unlock will be for - * thread 0's mutex. However, thread 0 will - * have already exited the loop and the mutex - * will already be unlocked. - * - * Because this is not an ERRORCHECK mutex, - * that inconsistency will be silently ignored. - */ - pthread_mutex_unlock(&finfo->next->mutex); - } - - free(xbuf); - /* - * Return this thread's finfo, which is - * a unique value for this thread. - */ - return finfo; -} - -static int create_threads(int num, struct futex_info *finfo) -{ - int i; - - for (i = 0; i < num; i++) { - int next_nr; - - finfo[i].nr = i; - /* - * Thread 'i' will wait on this mutex to - * be unlocked. Lock it immediately after - * initialization: - */ - pthread_mutex_init(&finfo[i].mutex, NULL); - pthread_mutex_lock(&finfo[i].mutex); - - next_nr = (i + 1) % num; - finfo[i].next = &finfo[next_nr]; - - if (pthread_create(&finfo[i].thread, NULL, check_tiledata, &finfo[i])) - fatal_error("pthread_create()"); - } - return 0; -} - -static void affinitize_cpu0(void) -{ - cpu_set_t cpuset; - - CPU_ZERO(&cpuset); - CPU_SET(0, &cpuset); - - if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) - fatal_error("sched_setaffinity to CPU 0"); -} - -static void test_context_switch(void) -{ - struct futex_info *finfo; - int i; - - /* Affinitize to one CPU to force context switches */ - affinitize_cpu0(); - - req_xtiledata_perm(); - - printf("[RUN]\tCheck tiledata context switches, %d iterations, %d threads.\n", - ctxtswtest_config.iterations, - ctxtswtest_config.num_threads); - - - finfo = malloc(sizeof(*finfo) * ctxtswtest_config.num_threads); - if (!finfo) - fatal_error("malloc()"); - - create_threads(ctxtswtest_config.num_threads, finfo); - - /* - * This thread wakes up thread 0 - * Thread 0 will wake up 1 - * Thread 1 will wake up 2 - * ... - * the last thread will wake up 0 - * - * ... this will repeat for the configured - * number of iterations. - */ - pthread_mutex_unlock(&finfo[0].mutex); - - /* Wait for all the threads to finish: */ - for (i = 0; i < ctxtswtest_config.num_threads; i++) { - void *thread_retval; - int rc; - - rc = pthread_join(finfo[i].thread, &thread_retval); - - if (rc) - fatal_error("pthread_join() failed for thread %d err: %d\n", - i, rc); - - if (thread_retval != &finfo[i]) - fatal_error("unexpected thread retval for thread %d: %p\n", - i, thread_retval); - - } - - printf("[OK]\tNo incorrect case was found.\n"); - - free(finfo); -} - -/* Ptrace test */ - -/* - * Make sure the ptracee has the expanded kernel buffer on the first - * use. Then, initialize the state before performing the state - * injection from the ptracer. - */ -static inline void ptracee_firstuse_tiledata(void) -{ - load_rand_tiledata(stashed_xsave); - init_xtiledata(); -} - -/* - * Ptracer injects the randomized tile data state. It also reads - * before and after that, which will execute the kernel's state copy - * functions. So, the tester is advised to double-check any emitted - * kernel messages. - */ -static void ptracer_inject_tiledata(pid_t target) -{ - struct xsave_buffer *xbuf; - struct iovec iov; - - xbuf = alloc_xbuf(); - if (!xbuf) - fatal_error("unable to allocate XSAVE buffer"); - - printf("\tRead the init'ed tiledata via ptrace().\n"); - - iov.iov_base = xbuf; - iov.iov_len = xbuf_size; - - memset(stashed_xsave, 0, xbuf_size); - - if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov)) - fatal_error("PTRACE_GETREGSET"); - - if (!__compare_tiledata_state(stashed_xsave, xbuf)) - printf("[OK]\tThe init'ed tiledata was read from ptracee.\n"); - else - printf("[FAIL]\tThe init'ed tiledata was not read from ptracee.\n"); - - printf("\tInject tiledata via ptrace().\n"); - - load_rand_tiledata(xbuf); - - memcpy(&stashed_xsave->bytes[xtiledata.xbuf_offset], - &xbuf->bytes[xtiledata.xbuf_offset], - xtiledata.size); - - if (ptrace(PTRACE_SETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov)) - fatal_error("PTRACE_SETREGSET"); - - if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov)) - fatal_error("PTRACE_GETREGSET"); - - if (!__compare_tiledata_state(stashed_xsave, xbuf)) - printf("[OK]\tTiledata was correctly written to ptracee.\n"); - else - printf("[FAIL]\tTiledata was not correctly written to ptracee.\n"); -} - -static void test_ptrace(void) -{ - pid_t child; - int status; - - child = fork(); - if (child < 0) { - err(1, "fork"); - } else if (!child) { - if (ptrace(PTRACE_TRACEME, 0, NULL, NULL)) - err(1, "PTRACE_TRACEME"); - - ptracee_firstuse_tiledata(); - - raise(SIGTRAP); - _exit(0); - } - - do { - wait(&status); - } while (WSTOPSIG(status) != SIGTRAP); - - ptracer_inject_tiledata(child); - - ptrace(PTRACE_DETACH, child, NULL, NULL); - wait(&status); - if (!WIFEXITED(status) || WEXITSTATUS(status)) - err(1, "ptrace test"); -} - int main(void) { unsigned long features; @@ -907,7 +489,11 @@ int main(void) return KSFT_SKIP; } - check_cpuid_xtiledata(); + xtiledata = get_xstate_info(XFEATURE_XTILEDATA); + if (!xtiledata.size || !xtiledata.xbuf_offset) { + fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d", + xtiledata.size, xtiledata.xbuf_offset); + } init_stashed_xsave(); sethandler(SIGILL, handle_noperm, 0); @@ -919,11 +505,11 @@ int main(void) test_fork(); - ctxtswtest_config.iterations = 10; - ctxtswtest_config.num_threads = 5; - test_context_switch(); - - test_ptrace(); + /* + * Perform generic xstate tests for context switching, ptrace, + * and signal. + */ + test_xstate(XFEATURE_XTILEDATA); clearhandler(SIGILL); free_stashed_xsave(); diff --git a/tools/testing/selftests/x86/avx.c b/tools/testing/selftests/x86/avx.c new file mode 100644 index 000000000000..11d5367c235f --- /dev/null +++ b/tools/testing/selftests/x86/avx.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE /* Required for inline xstate helpers */ +#include "xstate.h" + +int main(void) +{ + test_xstate(XFEATURE_YMM); + test_xstate(XFEATURE_OPMASK); + test_xstate(XFEATURE_ZMM_Hi256); + test_xstate(XFEATURE_Hi16_ZMM); +} diff --git a/tools/testing/selftests/x86/corrupt_xstate_header.c b/tools/testing/selftests/x86/corrupt_xstate_header.c index cf9ce8fbb656..93a89a5997ca 100644 --- a/tools/testing/selftests/x86/corrupt_xstate_header.c +++ b/tools/testing/selftests/x86/corrupt_xstate_header.c @@ -18,6 +18,7 @@ #include <sys/wait.h> #include "../kselftest.h" /* For __cpuid_count() */ +#include "helpers.h" static inline int xsave_enabled(void) { @@ -29,19 +30,6 @@ static inline int xsave_enabled(void) return ecx & (1U << 27); } -static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), - int flags) -{ - struct sigaction sa; - - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO | flags; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - static void sigusr1(int sig, siginfo_t *info, void *uc_void) { ucontext_t *uc = uc_void; diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c index d1e919b0c1dc..5cb8393737d0 100644 --- a/tools/testing/selftests/x86/entry_from_vm86.c +++ b/tools/testing/selftests/x86/entry_from_vm86.c @@ -24,31 +24,11 @@ #include <errno.h> #include <sys/vm86.h> +#include "helpers.h" + static unsigned long load_addr = 0x10000; static int nerrs = 0; -static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), - int flags) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO | flags; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - -static void clearhandler(int sig) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_handler = SIG_DFL; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - static sig_atomic_t got_signal; static void sighandler(int sig, siginfo_t *info, void *ctx_void) diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 50cf32de6313..0a75252d31b6 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -28,6 +28,8 @@ #include <sys/wait.h> #include <setjmp.h> +#include "helpers.h" + #ifndef __x86_64__ # error This test is 64-bit only #endif @@ -39,28 +41,6 @@ static unsigned short *shared_scratch; static int nerrs; -static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), - int flags) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO | flags; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - -static void clearhandler(int sig) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_handler = SIG_DFL; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - static void sigsegv(int sig, siginfo_t *si, void *ctx_void) { ucontext_t *ctx = (ucontext_t*)ctx_void; diff --git a/tools/testing/selftests/x86/helpers.h b/tools/testing/selftests/x86/helpers.h index 4ef42c4559a9..6deaad035161 100644 --- a/tools/testing/selftests/x86/helpers.h +++ b/tools/testing/selftests/x86/helpers.h @@ -2,8 +2,13 @@ #ifndef __SELFTESTS_X86_HELPERS_H #define __SELFTESTS_X86_HELPERS_H +#include <signal.h> +#include <string.h> + #include <asm/processor-flags.h> +#include "../kselftest.h" + static inline unsigned long get_eflags(void) { #ifdef __x86_64__ @@ -22,4 +27,27 @@ static inline void set_eflags(unsigned long eflags) #endif } +static inline void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + ksft_exit_fail_msg("sigaction failed"); +} + +static inline void clearhandler(int sig) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + ksft_exit_fail_msg("sigaction failed"); +} + #endif /* __SELFTESTS_X86_HELPERS_H */ diff --git a/tools/testing/selftests/x86/ioperm.c b/tools/testing/selftests/x86/ioperm.c index 57ec5e99edb9..69d5fb7050c2 100644 --- a/tools/testing/selftests/x86/ioperm.c +++ b/tools/testing/selftests/x86/ioperm.c @@ -20,30 +20,9 @@ #include <sched.h> #include <sys/io.h> -static int nerrs = 0; - -static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), - int flags) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO | flags; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); - -} +#include "helpers.h" -static void clearhandler(int sig) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_handler = SIG_DFL; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} +static int nerrs = 0; static jmp_buf jmpbuf; diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c index 7e3e09c1abac..457b6715542b 100644 --- a/tools/testing/selftests/x86/iopl.c +++ b/tools/testing/selftests/x86/iopl.c @@ -20,30 +20,9 @@ #include <sched.h> #include <sys/io.h> -static int nerrs = 0; - -static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), - int flags) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO | flags; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); - -} +#include "helpers.h" -static void clearhandler(int sig) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_handler = SIG_DFL; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} +static int nerrs = 0; static jmp_buf jmpbuf; diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index 4d4a76532dc9..18d736640ece 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -4,6 +4,7 @@ #include <stdlib.h> #include <string.h> #include <sys/syscall.h> +#include <sys/ioctl.h> #include <time.h> #include <signal.h> #include <setjmp.h> @@ -43,7 +44,15 @@ #define FUNC_INHERITE 0x20 #define FUNC_PASID 0x40 +/* get_user() pointer test cases */ +#define GET_USER_USER 0 +#define GET_USER_KERNEL_TOP 1 +#define GET_USER_KERNEL_BOT 2 +#define GET_USER_KERNEL 3 + #define TEST_MASK 0x7f +#define L5_SIGN_EXT_MASK (0xFFUL << 56) +#define L4_SIGN_EXT_MASK (0x1FFFFUL << 47) #define LOW_ADDR (0x1UL << 30) #define HIGH_ADDR (0x3UL << 48) @@ -115,23 +124,42 @@ static void segv_handler(int sig) siglongjmp(segv_env, 1); } -static inline int cpu_has_lam(void) +static inline int lam_is_available(void) { unsigned int cpuinfo[4]; + unsigned long bits = 0; + int ret; __cpuid_count(0x7, 1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]); - return (cpuinfo[0] & (1 << 26)); + /* Check if cpu supports LAM */ + if (!(cpuinfo[0] & (1 << 26))) { + ksft_print_msg("LAM is not supported!\n"); + return 0; + } + + /* Return 0 if CONFIG_ADDRESS_MASKING is not set */ + ret = syscall(SYS_arch_prctl, ARCH_GET_MAX_TAG_BITS, &bits); + if (ret) { + ksft_print_msg("LAM is disabled in the kernel!\n"); + return 0; + } + + return 1; } -/* Check 5-level page table feature in CPUID.(EAX=07H, ECX=00H):ECX.[bit 16] */ -static inline int cpu_has_la57(void) +static inline int la57_enabled(void) { - unsigned int cpuinfo[4]; + int ret; + void *p; + + p = mmap((void *)HIGH_ADDR, PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); - __cpuid_count(0x7, 0, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]); + ret = p == MAP_FAILED ? 0 : 1; - return (cpuinfo[2] & (1 << 16)); + munmap(p, PAGE_SIZE); + return ret; } /* @@ -322,7 +350,7 @@ static int handle_mmap(struct testcases *test) flags, -1, 0); if (ptr == MAP_FAILED) { if (test->addr == HIGH_ADDR) - if (!cpu_has_la57()) + if (!la57_enabled()) return 3; /* unsupport LA57 */ return 1; } @@ -370,6 +398,78 @@ static int handle_syscall(struct testcases *test) return ret; } +static int get_user_syscall(struct testcases *test) +{ + uint64_t ptr_address, bitmask; + int fd, ret = 0; + void *ptr; + + if (la57_enabled()) { + bitmask = L5_SIGN_EXT_MASK; + ptr_address = HIGH_ADDR; + } else { + bitmask = L4_SIGN_EXT_MASK; + ptr_address = LOW_ADDR; + } + + ptr = mmap((void *)ptr_address, PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + + if (ptr == MAP_FAILED) { + perror("failed to map byte to pass into get_user"); + return 1; + } + + if (set_lam(test->lam) != 0) { + ret = 2; + goto error; + } + + fd = memfd_create("lam_ioctl", 0); + if (fd == -1) { + munmap(ptr, PAGE_SIZE); + exit(EXIT_FAILURE); + } + + switch (test->later) { + case GET_USER_USER: + /* Control group - properly tagged user pointer */ + ptr = (void *)set_metadata((uint64_t)ptr, test->lam); + break; + case GET_USER_KERNEL_TOP: + /* Kernel address with top bit cleared */ + bitmask &= (bitmask >> 1); + ptr = (void *)((uint64_t)ptr | bitmask); + break; + case GET_USER_KERNEL_BOT: + /* Kernel address with bottom sign-extension bit cleared */ + bitmask &= (bitmask << 1); + ptr = (void *)((uint64_t)ptr | bitmask); + break; + case GET_USER_KERNEL: + /* Try to pass a kernel address */ + ptr = (void *)((uint64_t)ptr | bitmask); + break; + default: + printf("Invalid test case value passed!\n"); + break; + } + + /* + * Use FIOASYNC ioctl because it utilizes get_user() internally and is + * very non-invasive to the system. Pass differently tagged pointers to + * get_user() in order to verify that valid user pointers are going + * through and invalid kernel/non-canonical pointers are not. + */ + if (ioctl(fd, FIOASYNC, ptr) != 0) + ret = 1; + + close(fd); +error: + munmap(ptr, PAGE_SIZE); + return ret; +} + int sys_uring_setup(unsigned int entries, struct io_uring_params *p) { return (int)syscall(__NR_io_uring_setup, entries, p); @@ -596,8 +696,10 @@ int do_uring(unsigned long lam) fi->file_fd = file_fd; ring = malloc(sizeof(*ring)); - if (!ring) + if (!ring) { + free(fi); return 1; + } memset(ring, 0, sizeof(struct io_ring)); @@ -883,6 +985,33 @@ static struct testcases syscall_cases[] = { .test_func = handle_syscall, .msg = "SYSCALL:[Negative] Disable LAM. Dereferencing pointer with metadata.\n", }, + { + .later = GET_USER_USER, + .lam = LAM_U57_BITS, + .test_func = get_user_syscall, + .msg = "GET_USER: get_user() and pass a properly tagged user pointer.\n", + }, + { + .later = GET_USER_KERNEL_TOP, + .expected = 1, + .lam = LAM_U57_BITS, + .test_func = get_user_syscall, + .msg = "GET_USER:[Negative] get_user() with a kernel pointer and the top bit cleared.\n", + }, + { + .later = GET_USER_KERNEL_BOT, + .expected = 1, + .lam = LAM_U57_BITS, + .test_func = get_user_syscall, + .msg = "GET_USER:[Negative] get_user() with a kernel pointer and the bottom sign-extension bit cleared.\n", + }, + { + .later = GET_USER_KERNEL, + .expected = 1, + .lam = LAM_U57_BITS, + .test_func = get_user_syscall, + .msg = "GET_USER:[Negative] get_user() and pass a kernel pointer.\n", + }, }; static struct testcases mmap_cases[] = { @@ -1181,10 +1310,8 @@ int main(int argc, char **argv) tests_cnt = 0; - if (!cpu_has_lam()) { - ksft_print_msg("Unsupported LAM feature!\n"); + if (!lam_is_available()) return KSFT_SKIP; - } while ((c = getopt(argc, argv, "ht:")) != -1) { switch (c) { diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 3a29346e1452..bb99a71380a5 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -26,6 +26,8 @@ #include <asm/prctl.h> #include <sys/prctl.h> +#include "helpers.h" + #define AR_ACCESSED (1<<8) #define AR_TYPE_RODATA (0 * (1<<9)) @@ -506,20 +508,6 @@ static void fix_sa_restorer(int sig) } #endif -static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), - int flags) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO | flags; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); - - fix_sa_restorer(sig); -} - static jmp_buf jmpbuf; static void sigsegv(int sig, siginfo_t *info, void *ctx_void) @@ -549,9 +537,11 @@ static void do_multicpu_tests(void) } sethandler(SIGSEGV, sigsegv, 0); + fix_sa_restorer(SIGSEGV); #ifdef __i386__ /* True 32-bit kernels send SIGILL instead of SIGSEGV on IRET faults. */ sethandler(SIGILL, sigsegv, 0); + fix_sa_restorer(SIGILL); #endif printf("[RUN]\tCross-CPU LDT invalidation\n"); diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c index cc3de6ff9fba..f22cb6b382f9 100644 --- a/tools/testing/selftests/x86/mov_ss_trap.c +++ b/tools/testing/selftests/x86/mov_ss_trap.c @@ -36,7 +36,7 @@ #include <setjmp.h> #include <sys/prctl.h> -#define X86_EFLAGS_RF (1UL << 16) +#include "helpers.h" #if __x86_64__ # define REG_IP REG_RIP @@ -94,18 +94,6 @@ static void enable_watchpoint(void) } } -static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), - int flags) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO | flags; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - static char const * const signames[] = { [SIGSEGV] = "SIGSEGV", [SIGBUS] = "SIBGUS", diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index 12aaa063196e..360ec88d5432 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c @@ -15,6 +15,8 @@ #include <asm/ptrace-abi.h> #include <sys/auxv.h> +#include "helpers.h" + /* Bitness-agnostic defines for user_regs_struct fields. */ #ifdef __x86_64__ # define user_syscall_nr orig_rax @@ -93,18 +95,6 @@ static siginfo_t wait_trap(pid_t chld) return si; } -static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), - int flags) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO | flags; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - static void setsigign(int sig, int flags) { struct sigaction sa; @@ -116,16 +106,6 @@ static void setsigign(int sig, int flags) err(1, "sigaction"); } -static void clearhandler(int sig) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_handler = SIG_DFL; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - #ifdef __x86_64__ # define REG_BP REG_RBP #else diff --git a/tools/testing/selftests/x86/sigaltstack.c b/tools/testing/selftests/x86/sigaltstack.c index f689af75e979..0ae1b784498c 100644 --- a/tools/testing/selftests/x86/sigaltstack.c +++ b/tools/testing/selftests/x86/sigaltstack.c @@ -14,6 +14,8 @@ #include <sys/resource.h> #include <setjmp.h> +#include "helpers.h" + /* sigaltstack()-enforced minimum stack */ #define ENFORCED_MINSIGSTKSZ 2048 @@ -27,30 +29,6 @@ static bool sigalrm_expected; static unsigned long at_minstack_size; -static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), - int flags) -{ - struct sigaction sa; - - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO | flags; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - -static void clearhandler(int sig) -{ - struct sigaction sa; - - memset(&sa, 0, sizeof(sa)); - sa.sa_handler = SIG_DFL; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - static int setup_altstack(void *start, unsigned long size) { stack_t ss; diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c index 0b75b29f794b..26ef562f4232 100644 --- a/tools/testing/selftests/x86/sigreturn.c +++ b/tools/testing/selftests/x86/sigreturn.c @@ -46,6 +46,8 @@ #include <sys/ptrace.h> #include <sys/user.h> +#include "helpers.h" + /* Pull in AR_xyz defines. */ typedef unsigned int u32; typedef unsigned short u16; @@ -138,28 +140,6 @@ static unsigned short LDT3(int idx) return (idx << 3) | 7; } -static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), - int flags) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO | flags; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - -static void clearhandler(int sig) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_handler = SIG_DFL; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - static void add_ldt(const struct user_desc *desc, unsigned short *var, const char *name) { diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c index 9a30f443e928..280d7a22b9c9 100644 --- a/tools/testing/selftests/x86/single_step_syscall.c +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -33,28 +33,6 @@ #include "helpers.h" -static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), - int flags) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO | flags; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - -static void clearhandler(int sig) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_handler = SIG_DFL; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - static volatile sig_atomic_t sig_traps, sig_eflags; sigjmp_buf jmpbuf; diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c index 48ab065a76f9..f67a2df335ba 100644 --- a/tools/testing/selftests/x86/syscall_arg_fault.c +++ b/tools/testing/selftests/x86/syscall_arg_fault.c @@ -17,18 +17,6 @@ #include "helpers.h" -static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), - int flags) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO | flags; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - static sigjmp_buf jmpbuf; static volatile sig_atomic_t n_errs; diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c index a108b80dd082..f9c9814160f0 100644 --- a/tools/testing/selftests/x86/syscall_nt.c +++ b/tools/testing/selftests/x86/syscall_nt.c @@ -18,18 +18,6 @@ static unsigned int nerrs; -static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), - int flags) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO | flags; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - static void sigtrap(int sig, siginfo_t *si, void *ctx_void) { } diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c index 991591718bb0..41c42b7b54a6 100644 --- a/tools/testing/selftests/x86/syscall_numbering.c +++ b/tools/testing/selftests/x86/syscall_numbering.c @@ -25,6 +25,7 @@ #include <sys/mman.h> #include <linux/ptrace.h> +#include "../kselftest.h" /* Common system call numbers */ #define SYS_READ 0 @@ -313,7 +314,7 @@ static void test_syscall_numbering(void) * The MSB is supposed to be ignored, so we loop over a few * to test that out. */ - for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) { + for (size_t i = 0; i < ARRAY_SIZE(msbs); i++) { int msb = msbs[i]; run("Checking system calls with msb = %d (0x%x)\n", msb, msb); diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c index b30de9aaa6d4..5fb531e3ad7c 100644 --- a/tools/testing/selftests/x86/sysret_rip.c +++ b/tools/testing/selftests/x86/sysret_rip.c @@ -22,6 +22,8 @@ #include <sys/mman.h> #include <assert.h> +#include "helpers.h" + /* * These items are in clang_helpers_64.S, in order to avoid clang inline asm * limitations: @@ -31,28 +33,6 @@ extern const char test_page[]; static void const *current_test_page_addr = test_page; -static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), - int flags) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO | flags; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - -static void clearhandler(int sig) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_handler = SIG_DFL; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - /* State used by our signal handlers. */ static gregset_t initial_regs; diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index 6de11b4df458..05e1e6774fba 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -310,19 +310,6 @@ static void test_getcpu(int cpu) static jmp_buf jmpbuf; static volatile unsigned long segv_err; -static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), - int flags) -{ - struct sigaction sa; - - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO | flags; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - ksft_exit_fail_msg("sigaction failed\n"); -} - static void sigsegv(int sig, siginfo_t *info, void *ctx_void) { ucontext_t *ctx = (ucontext_t *)ctx_void; diff --git a/tools/testing/selftests/x86/unwind_vdso.c b/tools/testing/selftests/x86/unwind_vdso.c index 4c311e1af4c7..9cc17588d818 100644 --- a/tools/testing/selftests/x86/unwind_vdso.c +++ b/tools/testing/selftests/x86/unwind_vdso.c @@ -43,18 +43,6 @@ int main() #include <dlfcn.h> #include <unwind.h> -static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), - int flags) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO | flags; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - static volatile sig_atomic_t nerrs; static unsigned long sysinfo; static bool got_sysinfo = false; diff --git a/tools/testing/selftests/x86/xstate.c b/tools/testing/selftests/x86/xstate.c new file mode 100644 index 000000000000..23c1d6c964ea --- /dev/null +++ b/tools/testing/selftests/x86/xstate.c @@ -0,0 +1,477 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <elf.h> +#include <pthread.h> +#include <stdbool.h> + +#include <asm/prctl.h> +#include <sys/ptrace.h> +#include <sys/syscall.h> +#include <sys/uio.h> +#include <sys/wait.h> + +#include "helpers.h" +#include "xstate.h" + +/* + * The userspace xstate test suite is designed to be generic and operates + * with randomized xstate data. However, some states require special handling: + * + * - PKRU and XTILECFG need specific adjustments, such as modifying + * randomization behavior or using fixed values. + * - But, PKRU already has a dedicated test suite in /tools/selftests/mm. + * - Legacy states (FP and SSE) are excluded, as they are not considered + * part of extended states (xstates) and their usage is already deeply + * integrated into user-space libraries. + */ +#define XFEATURE_MASK_TEST_SUPPORTED \ + ((1 << XFEATURE_YMM) | \ + (1 << XFEATURE_OPMASK) | \ + (1 << XFEATURE_ZMM_Hi256) | \ + (1 << XFEATURE_Hi16_ZMM) | \ + (1 << XFEATURE_XTILEDATA)) + +static inline uint64_t xgetbv(uint32_t index) +{ + uint32_t eax, edx; + + asm volatile("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index)); + return eax + ((uint64_t)edx << 32); +} + +static inline uint64_t get_xstatebv(struct xsave_buffer *xbuf) +{ + return *(uint64_t *)(&xbuf->header); +} + +static struct xstate_info xstate; + +struct futex_info { + unsigned int iterations; + struct futex_info *next; + pthread_mutex_t mutex; + pthread_t thread; + bool valid; + int nr; +}; + +static inline void load_rand_xstate(struct xstate_info *xstate, struct xsave_buffer *xbuf) +{ + clear_xstate_header(xbuf); + set_xstatebv(xbuf, xstate->mask); + set_rand_data(xstate, xbuf); + xrstor(xbuf, xstate->mask); +} + +static inline void load_init_xstate(struct xstate_info *xstate, struct xsave_buffer *xbuf) +{ + clear_xstate_header(xbuf); + xrstor(xbuf, xstate->mask); +} + +static inline void copy_xstate(struct xsave_buffer *xbuf_dst, struct xsave_buffer *xbuf_src) +{ + memcpy(&xbuf_dst->bytes[xstate.xbuf_offset], + &xbuf_src->bytes[xstate.xbuf_offset], + xstate.size); +} + +static inline bool validate_xstate_same(struct xsave_buffer *xbuf1, struct xsave_buffer *xbuf2) +{ + int ret; + + ret = memcmp(&xbuf1->bytes[xstate.xbuf_offset], + &xbuf2->bytes[xstate.xbuf_offset], + xstate.size); + return ret == 0; +} + +static inline bool validate_xregs_same(struct xsave_buffer *xbuf1) +{ + struct xsave_buffer *xbuf2; + bool ret; + + xbuf2 = alloc_xbuf(); + if (!xbuf2) + ksft_exit_fail_msg("failed to allocate XSAVE buffer\n"); + + xsave(xbuf2, xstate.mask); + ret = validate_xstate_same(xbuf1, xbuf2); + + free(xbuf2); + return ret; +} + +/* Context switching test */ + +static void *check_xstate(void *info) +{ + struct futex_info *finfo = (struct futex_info *)info; + struct xsave_buffer *xbuf; + int i; + + xbuf = alloc_xbuf(); + if (!xbuf) + ksft_exit_fail_msg("unable to allocate XSAVE buffer\n"); + + /* + * Load random data into 'xbuf' and then restore it to the xstate + * registers. + */ + load_rand_xstate(&xstate, xbuf); + finfo->valid = true; + + for (i = 0; i < finfo->iterations; i++) { + pthread_mutex_lock(&finfo->mutex); + + /* + * Ensure the register values have not diverged from the + * record. Then reload a new random value. If it failed + * ever before, skip it. + */ + if (finfo->valid) { + finfo->valid = validate_xregs_same(xbuf); + load_rand_xstate(&xstate, xbuf); + } + + /* + * The last thread's last unlock will be for thread 0's + * mutex. However, thread 0 will have already exited the + * loop and the mutex will already be unlocked. + * + * Because this is not an ERRORCHECK mutex, that + * inconsistency will be silently ignored. + */ + pthread_mutex_unlock(&finfo->next->mutex); + } + + free(xbuf); + return finfo; +} + +static void create_threads(uint32_t num_threads, uint32_t iterations, struct futex_info *finfo) +{ + int i; + + for (i = 0; i < num_threads; i++) { + int next_nr; + + finfo[i].nr = i; + finfo[i].iterations = iterations; + + /* + * Thread 'i' will wait on this mutex to be unlocked. + * Lock it immediately after initialization: + */ + pthread_mutex_init(&finfo[i].mutex, NULL); + pthread_mutex_lock(&finfo[i].mutex); + + next_nr = (i + 1) % num_threads; + finfo[i].next = &finfo[next_nr]; + + if (pthread_create(&finfo[i].thread, NULL, check_xstate, &finfo[i])) + ksft_exit_fail_msg("pthread_create() failed\n"); + } +} + +static bool checkout_threads(uint32_t num_threads, struct futex_info *finfo) +{ + void *thread_retval; + bool valid = true; + int err, i; + + for (i = 0; i < num_threads; i++) { + err = pthread_join(finfo[i].thread, &thread_retval); + if (err) + ksft_exit_fail_msg("pthread_join() failed for thread %d err: %d\n", i, err); + + if (thread_retval != &finfo[i]) { + ksft_exit_fail_msg("unexpected thread retval for thread %d: %p\n", + i, thread_retval); + } + + valid &= finfo[i].valid; + } + + return valid; +} + +static void affinitize_cpu0(void) +{ + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + ksft_exit_fail_msg("sched_setaffinity to CPU 0 failed\n"); +} + +static void test_context_switch(uint32_t num_threads, uint32_t iterations) +{ + struct futex_info *finfo; + + /* Affinitize to one CPU to force context switches */ + affinitize_cpu0(); + + printf("[RUN]\t%s: check context switches, %d iterations, %d threads.\n", + xstate.name, iterations, num_threads); + + finfo = malloc(sizeof(*finfo) * num_threads); + if (!finfo) + ksft_exit_fail_msg("unable allocate memory\n"); + + create_threads(num_threads, iterations, finfo); + + /* + * This thread wakes up thread 0 + * Thread 0 will wake up 1 + * Thread 1 will wake up 2 + * ... + * The last thread will wake up 0 + * + * This will repeat for the configured + * number of iterations. + */ + pthread_mutex_unlock(&finfo[0].mutex); + + /* Wait for all the threads to finish: */ + if (checkout_threads(num_threads, finfo)) + printf("[OK]\tNo incorrect case was found.\n"); + else + printf("[FAIL]\tFailed with context switching test.\n"); + + free(finfo); +} + +/* + * Ptrace test for the ABI format as described in arch/x86/include/asm/user.h + */ + +/* + * Make sure the ptracee has the expanded kernel buffer on the first use. + * Then, initialize the state before performing the state injection from + * the ptracer. For non-dynamic states, this is benign. + */ +static inline void ptracee_touch_xstate(void) +{ + struct xsave_buffer *xbuf; + + xbuf = alloc_xbuf(); + + load_rand_xstate(&xstate, xbuf); + load_init_xstate(&xstate, xbuf); + + free(xbuf); +} + +/* + * Ptracer injects the randomized xstate data. It also reads before and + * after that, which will execute the kernel's state copy functions. + */ +static void ptracer_inject_xstate(pid_t target) +{ + uint32_t xbuf_size = get_xbuf_size(); + struct xsave_buffer *xbuf1, *xbuf2; + struct iovec iov; + + /* + * Allocate buffers to keep data while ptracer can write the + * other buffer + */ + xbuf1 = alloc_xbuf(); + xbuf2 = alloc_xbuf(); + if (!xbuf1 || !xbuf2) + ksft_exit_fail_msg("unable to allocate XSAVE buffer\n"); + + iov.iov_base = xbuf1; + iov.iov_len = xbuf_size; + + if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov)) + ksft_exit_fail_msg("PTRACE_GETREGSET failed\n"); + + printf("[RUN]\t%s: inject xstate via ptrace().\n", xstate.name); + + load_rand_xstate(&xstate, xbuf1); + copy_xstate(xbuf2, xbuf1); + + if (ptrace(PTRACE_SETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov)) + ksft_exit_fail_msg("PTRACE_SETREGSET failed\n"); + + if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov)) + ksft_exit_fail_msg("PTRACE_GETREGSET failed\n"); + + if (*(uint64_t *)get_fpx_sw_bytes(xbuf1) == xgetbv(0)) + printf("[OK]\t'xfeatures' in SW reserved area was correctly written\n"); + else + printf("[FAIL]\t'xfeatures' in SW reserved area was not correctly written\n"); + + if (validate_xstate_same(xbuf2, xbuf1)) + printf("[OK]\txstate was correctly updated.\n"); + else + printf("[FAIL]\txstate was not correctly updated.\n"); + + free(xbuf1); + free(xbuf2); +} + +static void test_ptrace(void) +{ + pid_t child; + int status; + + child = fork(); + if (child < 0) { + ksft_exit_fail_msg("fork() failed\n"); + } else if (!child) { + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL)) + ksft_exit_fail_msg("PTRACE_TRACEME failed\n"); + + ptracee_touch_xstate(); + + raise(SIGTRAP); + _exit(0); + } + + do { + wait(&status); + } while (WSTOPSIG(status) != SIGTRAP); + + ptracer_inject_xstate(child); + + ptrace(PTRACE_DETACH, child, NULL, NULL); + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + ksft_exit_fail_msg("ptracee exit error\n"); +} + +/* + * Test signal delivery for the ABI compatibility. + * See the ABI format: arch/x86/include/uapi/asm/sigcontext.h + */ + +/* + * Avoid using printf() in signal handlers as it is not + * async-signal-safe. + */ +#define SIGNAL_BUF_LEN 1000 +static char signal_message_buffer[SIGNAL_BUF_LEN]; +static void sig_print(char *msg) +{ + int left = SIGNAL_BUF_LEN - strlen(signal_message_buffer) - 1; + + strncat(signal_message_buffer, msg, left); +} + +static struct xsave_buffer *stashed_xbuf; + +static void validate_sigfpstate(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t *)ctx_void; + void *xbuf = ctx->uc_mcontext.fpregs; + struct _fpx_sw_bytes *sw_bytes; + uint32_t magic2; + + /* Reset the signal message buffer: */ + signal_message_buffer[0] = '\0'; + + sw_bytes = get_fpx_sw_bytes(xbuf); + if (sw_bytes->magic1 == FP_XSTATE_MAGIC1) + sig_print("[OK]\t'magic1' is valid\n"); + else + sig_print("[FAIL]\t'magic1' is not valid\n"); + + if (get_fpx_sw_bytes_features(xbuf) & xstate.mask) + sig_print("[OK]\t'xfeatures' in SW reserved area is valid\n"); + else + sig_print("[FAIL]\t'xfeatures' in SW reserved area is not valid\n"); + + if (get_xstatebv(xbuf) & xstate.mask) + sig_print("[OK]\t'xfeatures' in XSAVE header is valid\n"); + else + sig_print("[FAIL]\t'xfeatures' in XSAVE header is not valid\n"); + + if (validate_xstate_same(stashed_xbuf, xbuf)) + sig_print("[OK]\txstate delivery was successful\n"); + else + sig_print("[FAIL]\txstate delivery was not successful\n"); + + magic2 = *(uint32_t *)(xbuf + sw_bytes->xstate_size); + if (magic2 == FP_XSTATE_MAGIC2) + sig_print("[OK]\t'magic2' is valid\n"); + else + sig_print("[FAIL]\t'magic2' is not valid\n"); + + set_rand_data(&xstate, xbuf); + copy_xstate(stashed_xbuf, xbuf); +} + +static void test_signal(void) +{ + bool valid_xstate; + + /* + * The signal handler will access this to verify xstate context + * preservation. + */ + stashed_xbuf = alloc_xbuf(); + if (!stashed_xbuf) + ksft_exit_fail_msg("unable to allocate XSAVE buffer\n"); + + printf("[RUN]\t%s: load xstate and raise SIGUSR1\n", xstate.name); + + sethandler(SIGUSR1, validate_sigfpstate, 0); + + load_rand_xstate(&xstate, stashed_xbuf); + + raise(SIGUSR1); + + /* + * Immediately record the test result, deferring printf() to + * prevent unintended state contamination by that. + */ + valid_xstate = validate_xregs_same(stashed_xbuf); + printf("%s", signal_message_buffer); + + printf("[RUN]\t%s: load new xstate from sighandler and check it after sigreturn\n", + xstate.name); + + if (valid_xstate) + printf("[OK]\txstate was restored correctly\n"); + else + printf("[FAIL]\txstate restoration failed\n"); + + clearhandler(SIGUSR1); + free(stashed_xbuf); +} + +void test_xstate(uint32_t feature_num) +{ + const unsigned int ctxtsw_num_threads = 5, ctxtsw_iterations = 10; + unsigned long features; + long rc; + + if (!(XFEATURE_MASK_TEST_SUPPORTED & (1 << feature_num))) { + ksft_print_msg("The xstate test does not fully support the component %u, yet.\n", + feature_num); + return; + } + + rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_SUPP, &features); + if (rc || !(features & (1 << feature_num))) { + ksft_print_msg("The kernel does not support feature number: %u\n", feature_num); + return; + } + + xstate = get_xstate_info(feature_num); + if (!xstate.size || !xstate.xbuf_offset) { + ksft_exit_fail_msg("invalid state size/offset (%d/%d)\n", + xstate.size, xstate.xbuf_offset); + } + + test_context_switch(ctxtsw_num_threads, ctxtsw_iterations); + test_ptrace(); + test_signal(); +} diff --git a/tools/testing/selftests/x86/xstate.h b/tools/testing/selftests/x86/xstate.h new file mode 100644 index 000000000000..42af36ec852f --- /dev/null +++ b/tools/testing/selftests/x86/xstate.h @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0-only +#ifndef __SELFTESTS_X86_XSTATE_H +#define __SELFTESTS_X86_XSTATE_H + +#include <stdint.h> + +#include "../kselftest.h" + +#define XSAVE_HDR_OFFSET 512 +#define XSAVE_HDR_SIZE 64 + +/* + * List of XSAVE features Linux knows about. Copied from + * arch/x86/include/asm/fpu/types.h + */ +enum xfeature { + XFEATURE_FP, + XFEATURE_SSE, + XFEATURE_YMM, + XFEATURE_BNDREGS, + XFEATURE_BNDCSR, + XFEATURE_OPMASK, + XFEATURE_ZMM_Hi256, + XFEATURE_Hi16_ZMM, + XFEATURE_PT_UNIMPLEMENTED_SO_FAR, + XFEATURE_PKRU, + XFEATURE_PASID, + XFEATURE_CET_USER, + XFEATURE_CET_KERNEL_UNUSED, + XFEATURE_RSRVD_COMP_13, + XFEATURE_RSRVD_COMP_14, + XFEATURE_LBR, + XFEATURE_RSRVD_COMP_16, + XFEATURE_XTILECFG, + XFEATURE_XTILEDATA, + + XFEATURE_MAX, +}; + +/* Copied from arch/x86/kernel/fpu/xstate.c */ +static const char *xfeature_names[] = +{ + "x87 floating point registers", + "SSE registers", + "AVX registers", + "MPX bounds registers", + "MPX CSR", + "AVX-512 opmask", + "AVX-512 Hi256", + "AVX-512 ZMM_Hi256", + "Processor Trace (unused)", + "Protection Keys User registers", + "PASID state", + "Control-flow User registers", + "Control-flow Kernel registers (unused)", + "unknown xstate feature", + "unknown xstate feature", + "unknown xstate feature", + "unknown xstate feature", + "AMX Tile config", + "AMX Tile data", + "unknown xstate feature", +}; + +struct xsave_buffer { + union { + struct { + char legacy[XSAVE_HDR_OFFSET]; + char header[XSAVE_HDR_SIZE]; + char extended[0]; + }; + char bytes[0]; + }; +}; + +static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm) +{ + uint32_t rfbm_hi = rfbm >> 32; + uint32_t rfbm_lo = rfbm; + + asm volatile("xsave (%%rdi)" + : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi) + : "memory"); +} + +static inline void xrstor(struct xsave_buffer *xbuf, uint64_t rfbm) +{ + uint32_t rfbm_hi = rfbm >> 32; + uint32_t rfbm_lo = rfbm; + + asm volatile("xrstor (%%rdi)" + : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)); +} + +#define CPUID_LEAF_XSTATE 0xd +#define CPUID_SUBLEAF_XSTATE_USER 0x0 + +static inline uint32_t get_xbuf_size(void) +{ + uint32_t eax, ebx, ecx, edx; + + __cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER, + eax, ebx, ecx, edx); + + /* + * EBX enumerates the size (in bytes) required by the XSAVE + * instruction for an XSAVE area containing all the user state + * components corresponding to bits currently set in XCR0. + */ + return ebx; +} + +struct xstate_info { + const char *name; + uint32_t num; + uint32_t mask; + uint32_t xbuf_offset; + uint32_t size; +}; + +static inline struct xstate_info get_xstate_info(uint32_t xfeature_num) +{ + struct xstate_info xstate = { }; + uint32_t eax, ebx, ecx, edx; + + if (xfeature_num >= XFEATURE_MAX) { + ksft_print_msg("unknown state\n"); + return xstate; + } + + xstate.name = xfeature_names[xfeature_num]; + xstate.num = xfeature_num; + xstate.mask = 1 << xfeature_num; + + __cpuid_count(CPUID_LEAF_XSTATE, xfeature_num, + eax, ebx, ecx, edx); + xstate.size = eax; + xstate.xbuf_offset = ebx; + return xstate; +} + +static inline struct xsave_buffer *alloc_xbuf(void) +{ + uint32_t xbuf_size = get_xbuf_size(); + + /* XSAVE buffer should be 64B-aligned. */ + return aligned_alloc(64, xbuf_size); +} + +static inline void clear_xstate_header(struct xsave_buffer *xbuf) +{ + memset(&xbuf->header, 0, sizeof(xbuf->header)); +} + +static inline void set_xstatebv(struct xsave_buffer *xbuf, uint64_t bv) +{ + /* XSTATE_BV is at the beginning of the header: */ + *(uint64_t *)(&xbuf->header) = bv; +} + +/* See 'struct _fpx_sw_bytes' at sigcontext.h */ +#define SW_BYTES_OFFSET 464 +/* N.B. The struct's field name varies so read from the offset. */ +#define SW_BYTES_BV_OFFSET (SW_BYTES_OFFSET + 8) + +static inline struct _fpx_sw_bytes *get_fpx_sw_bytes(void *xbuf) +{ + return xbuf + SW_BYTES_OFFSET; +} + +static inline uint64_t get_fpx_sw_bytes_features(void *buffer) +{ + return *(uint64_t *)(buffer + SW_BYTES_BV_OFFSET); +} + +static inline void set_rand_data(struct xstate_info *xstate, struct xsave_buffer *xbuf) +{ + int *ptr = (int *)&xbuf->bytes[xstate->xbuf_offset]; + int data, i; + + /* + * Ensure that 'data' is never 0. This ensures that + * the registers are never in their initial configuration + * and thus never tracked as being in the init state. + */ + data = rand() | 1; + + for (i = 0; i < xstate->size / sizeof(int); i++, ptr++) + *ptr = data; +} + +/* Testing kernel's context switching and ABI support for the xstate. */ +void test_xstate(uint32_t feature_num); + +#endif /* __SELFTESTS_X86_XSTATE_H */ |