diff options
Diffstat (limited to 'tools/testing')
108 files changed, 3381 insertions, 951 deletions
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 0a6572ab6f37..387f3df8b988 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -61,8 +61,11 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o cxl_core-y += $(CXL_CORE_SRC)/hdm.o cxl_core-y += $(CXL_CORE_SRC)/pmu.o cxl_core-y += $(CXL_CORE_SRC)/cdat.o +cxl_core-y += $(CXL_CORE_SRC)/ras.o +cxl_core-y += $(CXL_CORE_SRC)/acpi.o cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o +cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index cc8948f49117..1c3336095923 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -155,7 +155,7 @@ static struct { } cfmws7; struct { struct acpi_cedt_cfmws cfmws; - u32 target[4]; + u32 target[3]; } cfmws8; struct { struct acpi_cedt_cxims cxims; @@ -331,14 +331,14 @@ static struct { .length = sizeof(mock_cedt.cfmws8), }, .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, - .interleave_ways = 2, - .granularity = 0, + .interleave_ways = 8, + .granularity = 1, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, - .window_size = SZ_256M * 16UL, + .window_size = SZ_512M * 6UL, }, - .target = { 0, 1, 0, 1, }, + .target = { 0, 1, 2, }, }, .cxims0 = { .cxims = { @@ -1000,25 +1000,21 @@ static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port) find_cxl_root(port); struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); struct access_coordinate ep_c[ACCESS_COORDINATE_MAX]; - struct range pmem_range = { - .start = cxlds->pmem_res.start, - .end = cxlds->pmem_res.end, - }; - struct range ram_range = { - .start = cxlds->ram_res.start, - .end = cxlds->ram_res.end, - }; if (!cxl_root) return; - if (range_len(&ram_range)) - dpa_perf_setup(port, &ram_range, &mds->ram_perf); + for (int i = 0; i < cxlds->nr_partitions; i++) { + struct resource *res = &cxlds->part[i].res; + struct cxl_dpa_perf *perf = &cxlds->part[i].perf; + struct range range = { + .start = res->start, + .end = res->end, + }; - if (range_len(&pmem_range)) - dpa_perf_setup(port, &pmem_range, &mds->pmem_perf); + dpa_perf_setup(port, &range, perf); + } cxl_memdev_update_perf(cxlmd); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 9495dbcc03a7..f2957a3e36fe 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -78,6 +78,10 @@ static struct cxl_cel_entry mock_cel[] = { .effect = CXL_CMD_EFFECT_NONE, }, { + .opcode = cpu_to_le16(CXL_MBOX_OP_SET_SHUTDOWN_STATE), + .effect = POLICY_CHANGE_IMMEDIATE, + }, + { .opcode = cpu_to_le16(CXL_MBOX_OP_GET_POISON), .effect = CXL_CMD_EFFECT_NONE, }, @@ -178,6 +182,7 @@ struct cxl_mockmem_data { u64 timestamp; unsigned long sanitize_timeout; struct vendor_test_feat test_feat; + u8 shutdown_state; }; static struct mock_event_log *event_find_log(struct device *dev, int log_type) @@ -1105,6 +1110,21 @@ static int mock_health_info(struct cxl_mbox_cmd *cmd) return 0; } +static int mock_set_shutdown_state(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_set_shutdown_state_in *ss = cmd->payload_in; + + if (cmd->size_in != sizeof(*ss)) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + mdata->shutdown_state = ss->state; + return 0; +} + static struct mock_poison { struct cxl_dev_state *cxlds; u64 dpa; @@ -1583,6 +1603,9 @@ static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox, case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE: rc = mock_passphrase_secure_erase(mdata, cmd); break; + case CXL_MBOX_OP_SET_SHUTDOWN_STATE: + rc = mock_set_shutdown_state(mdata, cmd); + break; case CXL_MBOX_OP_GET_POISON: rc = mock_get_poison(cxlds, cmd); break; @@ -1670,6 +1693,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) struct cxl_dev_state *cxlds; struct cxl_mockmem_data *mdata; struct cxl_mailbox *cxl_mbox; + struct cxl_dpa_info range_info = { 0 }; int rc; mdata = devm_kzalloc(dev, sizeof(*mdata), GFP_KERNEL); @@ -1709,7 +1733,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf; INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mockmem_sanitize_work); - cxlds->serial = pdev->id; + cxlds->serial = pdev->id + 1; if (is_rcd(pdev)) cxlds->rcd = true; @@ -1730,7 +1754,11 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - rc = cxl_mem_create_range_info(mds); + rc = cxl_mem_dpa_fetch(mds, &range_info); + if (rc) + return rc; + + rc = cxl_dpa_setup(cxlds, &range_info); if (rc) return rc; diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index da53a709773a..c176487356e6 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -809,6 +809,10 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: test.log.extend(parse_diagnostic(lines)) if test.name != "" and not peek_test_name_match(lines, test): test.add_error(printer, 'missing subtest result line!') + elif not lines: + print_log(test.log, printer) + test.status = TestStatus.NO_TESTS + test.add_error(printer, 'No more test results!') else: parse_test_result(lines, test, expected_num, printer) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 5ff4f6ffd873..bbba921e0eac 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -371,8 +371,8 @@ class KUnitParserTest(unittest.TestCase): """ result = kunit_parser.parse_run_tests(output.splitlines(), stdout) # Missing test results after test plan should alert a suspected test crash. - self.assertEqual(kunit_parser.TestStatus.TEST_CRASHED, result.status) - self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, crashed=1, errors=1)) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=2)) def line_stream_from_strs(strs: Iterable[str]) -> kunit_parser.LineStream: return kunit_parser.LineStream(enumerate(strs, start=1)) diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 8b3591a51e1f..b2a6660bbd92 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -14,6 +14,7 @@ include ../shared/shared.mk main: $(OFILES) +xarray.o: ../../../lib/test_xarray.c idr-test.o: ../../../lib/test_ida.c idr-test: idr-test.o $(CORE_OFILES) diff --git a/tools/testing/rbtree/Makefile b/tools/testing/rbtree/Makefile new file mode 100644 index 000000000000..d7bbae2af4c7 --- /dev/null +++ b/tools/testing/rbtree/Makefile @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: GPL-2.0 + +.PHONY: clean + +TARGETS = rbtree_test interval_tree_test +OFILES = $(SHARED_OFILES) rbtree-shim.o interval_tree-shim.o maple-shim.o +DEPS = ../../../include/linux/rbtree.h \ + ../../../include/linux/rbtree_types.h \ + ../../../include/linux/rbtree_augmented.h \ + ../../../include/linux/interval_tree.h \ + ../../../include/linux/interval_tree_generic.h \ + ../../../lib/rbtree.c \ + ../../../lib/interval_tree.c + +targets: $(TARGETS) + +include ../shared/shared.mk + +ifeq ($(DEBUG), 1) + CFLAGS += -g +endif + +$(TARGETS): $(OFILES) + +rbtree-shim.o: $(DEPS) +rbtree_test.o: ../../../lib/rbtree_test.c +interval_tree-shim.o: $(DEPS) +interval_tree-shim.o: CFLAGS += -DCONFIG_INTERVAL_TREE_SPAN_ITER +interval_tree_test.o: ../../../lib/interval_tree_test.c +interval_tree_test.o: CFLAGS += -DCONFIG_INTERVAL_TREE_SPAN_ITER + +clean: + $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/* diff --git a/tools/testing/rbtree/interval_tree_test.c b/tools/testing/rbtree/interval_tree_test.c new file mode 100644 index 000000000000..49bc5b534330 --- /dev/null +++ b/tools/testing/rbtree/interval_tree_test.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * interval_tree.c: Userspace Interval Tree test-suite + * Copyright (c) 2025 Wei Yang <richard.weiyang@gmail.com> + */ +#include <linux/math64.h> +#include <linux/kern_levels.h> +#include "shared.h" +#include "maple-shared.h" + +#include "../../../lib/interval_tree_test.c" + +int usage(void) +{ + fprintf(stderr, "Userland interval tree test cases\n"); + fprintf(stderr, " -n: Number of nodes in the interval tree\n"); + fprintf(stderr, " -p: Number of iterations modifying the tree\n"); + fprintf(stderr, " -q: Number of searches to the interval tree\n"); + fprintf(stderr, " -s: Number of iterations searching the tree\n"); + fprintf(stderr, " -a: Searches will iterate all nodes in the tree\n"); + fprintf(stderr, " -m: Largest value for the interval's endpoint\n"); + fprintf(stderr, " -r: Random seed\n"); + exit(-1); +} + +void interval_tree_tests(void) +{ + interval_tree_test_init(); + interval_tree_test_exit(); +} + +int main(int argc, char **argv) +{ + int opt; + + while ((opt = getopt(argc, argv, "n:p:q:s:am:r:")) != -1) { + if (opt == 'n') + nnodes = strtoul(optarg, NULL, 0); + else if (opt == 'p') + perf_loops = strtoul(optarg, NULL, 0); + else if (opt == 'q') + nsearches = strtoul(optarg, NULL, 0); + else if (opt == 's') + search_loops = strtoul(optarg, NULL, 0); + else if (opt == 'a') + search_all = true; + else if (opt == 'm') + max_endpoint = strtoul(optarg, NULL, 0); + else if (opt == 'r') + seed = strtoul(optarg, NULL, 0); + else + usage(); + } + + maple_tree_init(); + interval_tree_tests(); + return 0; +} diff --git a/tools/testing/rbtree/rbtree_test.c b/tools/testing/rbtree/rbtree_test.c new file mode 100644 index 000000000000..585c970f679e --- /dev/null +++ b/tools/testing/rbtree/rbtree_test.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * rbtree_test.c: Userspace Red Black Tree test-suite + * Copyright (c) 2025 Wei Yang <richard.weiyang@gmail.com> + */ +#include <linux/init.h> +#include <linux/math64.h> +#include <linux/kern_levels.h> +#include "shared.h" + +#include "../../../lib/rbtree_test.c" + +int usage(void) +{ + fprintf(stderr, "Userland rbtree test cases\n"); + fprintf(stderr, " -n: Number of nodes in the rb-tree\n"); + fprintf(stderr, " -p: Number of iterations modifying the rb-tree\n"); + fprintf(stderr, " -c: Number of iterations modifying and verifying the rb-tree\n"); + fprintf(stderr, " -r: Random seed\n"); + exit(-1); +} + +void rbtree_tests(void) +{ + rbtree_test_init(); + rbtree_test_exit(); +} + +int main(int argc, char **argv) +{ + int opt; + + while ((opt = getopt(argc, argv, "n:p:c:r:")) != -1) { + if (opt == 'n') + nnodes = strtoul(optarg, NULL, 0); + else if (opt == 'p') + perf_loops = strtoul(optarg, NULL, 0); + else if (opt == 'c') + check_loops = strtoul(optarg, NULL, 0); + else if (opt == 'r') + seed = strtoul(optarg, NULL, 0); + else + usage(); + } + + rbtree_tests(); + return 0; +} diff --git a/tools/testing/rbtree/test.h b/tools/testing/rbtree/test.h new file mode 100644 index 000000000000..f1f1b545b55a --- /dev/null +++ b/tools/testing/rbtree/test.h @@ -0,0 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +void rbtree_tests(void); +void interval_tree_tests(void); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 2694344274bf..c77c8c8e3d9b 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -62,6 +62,7 @@ TARGETS += mount TARGETS += mount_setattr TARGETS += move_mount_set_group TARGETS += mqueue +TARGETS += mseal_system_mappings TARGETS += nci TARGETS += net TARGETS += net/af_unix diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 5680befae8c6..5e713ef7caa3 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -39,7 +39,6 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_STAT=y CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPUSETS=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_BLAKE2B=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 8bd1ebd7d6af..813143b4985d 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -223,7 +223,7 @@ static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node, if (bpf_cmp_likely(filepart_length, <=, MAX_PATH)) { payload += filepart_length; } - cgroup_node = BPF_CORE_READ(cgroup_node, parent); + cgroup_node = BPF_CORE_READ(cgroup_node, __parent); } return payload; } diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index fb07f5773888..7f3c233943b3 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -117,7 +117,7 @@ int BPF_PROG(handle_fexit_ret, int arg, struct file *ret) bpf_probe_read_kernel(&buf, 8, ret); bpf_probe_read_kernel(&buf, 8, (char *)ret + 256); - *(volatile long long *)ret; + *(volatile int *)ret; *(volatile int *)&ret->f_mode; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_extable.c b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c index e2a21fbd4e44..dcac69f5928a 100644 --- a/tools/testing/selftests/bpf/progs/test_subprogs_extable.c +++ b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c @@ -21,7 +21,7 @@ static __u64 test_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data) SEC("fexit/bpf_testmod_return_ptr") int BPF_PROG(handle_fexit_ret_subprogs, int arg, struct file *ret) { - *(volatile long *)ret; + *(volatile int *)ret; *(volatile int *)&ret->f_mode; bpf_for_each_map_elem(&test_array, test_cb, NULL, 0); triggered++; @@ -31,7 +31,7 @@ int BPF_PROG(handle_fexit_ret_subprogs, int arg, struct file *ret) SEC("fexit/bpf_testmod_return_ptr") int BPF_PROG(handle_fexit_ret_subprogs2, int arg, struct file *ret) { - *(volatile long *)ret; + *(volatile int *)ret; *(volatile int *)&ret->f_mode; bpf_for_each_map_elem(&test_array, test_cb, NULL, 0); triggered++; @@ -41,7 +41,7 @@ int BPF_PROG(handle_fexit_ret_subprogs2, int arg, struct file *ret) SEC("fexit/bpf_testmod_return_ptr") int BPF_PROG(handle_fexit_ret_subprogs3, int arg, struct file *ret) { - *(volatile long *)ret; + *(volatile int *)ret; *(volatile int *)&ret->f_mode; bpf_for_each_map_elem(&test_array, test_cb, NULL, 0); triggered++; diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c index a9be6ae49454..c258b0722e04 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c +++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c @@ -12,7 +12,7 @@ SEC("raw_tp") __arch_x86_64 __log_level(4) __msg("stack depth 8") __xlated("4: r5 = 5") -__xlated("5: w0 = ") +__xlated("5: r0 = ") __xlated("6: r0 = &(void __percpu *)(r0)") __xlated("7: r0 = *(u32 *)(r0 +0)") __xlated("8: exit") @@ -704,7 +704,7 @@ SEC("raw_tp") __arch_x86_64 __log_level(4) __msg("stack depth 32+0") __xlated("2: r1 = 1") -__xlated("3: w0 =") +__xlated("3: r0 =") __xlated("4: r0 = &(void __percpu *)(r0)") __xlated("5: r0 = *(u32 *)(r0 +0)") /* bpf_loop params setup */ @@ -753,7 +753,7 @@ __arch_x86_64 __log_level(4) __msg("stack depth 40+0") /* call bpf_get_smp_processor_id */ __xlated("2: r1 = 42") -__xlated("3: w0 =") +__xlated("3: r0 =") __xlated("4: r0 = &(void __percpu *)(r0)") __xlated("5: r0 = *(u32 *)(r0 +0)") /* call bpf_get_prandom_u32 */ diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c index b1fbdf119553..fc91b414364e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_private_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c @@ -27,7 +27,7 @@ __description("Private stack, single prog") __success __arch_x86_64 __jited(" movabsq $0x{{.*}}, %r9") -__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" addq %gs:{{.*}}, %r9") __jited(" movl $0x2a, %edi") __jited(" movq %rdi, -0x100(%r9)") __naked void private_stack_single_prog(void) @@ -74,7 +74,7 @@ __success __arch_x86_64 /* private stack fp for the main prog */ __jited(" movabsq $0x{{.*}}, %r9") -__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" addq %gs:{{.*}}, %r9") __jited(" movl $0x2a, %edi") __jited(" movq %rdi, -0x200(%r9)") __jited(" pushq %r9") @@ -122,7 +122,7 @@ __jited(" pushq %rbp") __jited(" movq %rsp, %rbp") __jited(" endbr64") __jited(" movabsq $0x{{.*}}, %r9") -__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" addq %gs:{{.*}}, %r9") __jited(" pushq %r9") __jited(" callq") __jited(" popq %r9") diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 400a696a0d21..a17256d9f88a 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -88,22 +88,32 @@ echo "" > test/cpuset.cpus # If isolated CPUs have been reserved at boot time (as shown in # cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8 # that will be used by this script for testing purpose. If not, some of -# the tests may fail incorrectly. These pre-isolated CPUs should stay in -# an isolated state throughout the testing process for now. +# the tests may fail incorrectly. Wait a bit and retry again just in case +# these isolated CPUs are leftover from previous run and have just been +# cleaned up earlier in this script. +# +# These pre-isolated CPUs should stay in an isolated state throughout the +# testing process for now. # BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated) +[[ -n "$BOOT_ISOLCPUS" ]] && { + sleep 0.5 + BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated) +} if [[ -n "$BOOT_ISOLCPUS" ]] then [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] && skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested" echo "Pre-isolated CPUs: $BOOT_ISOLCPUS" fi + cleanup() { online_cpus cd $CGROUP2 - rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1 - rmdir test > /dev/null 2>&1 + rmdir A1/A2/A3 A1/A2 A1 B1 test/A1 test/B1 test > /dev/null 2>&1 + rmdir rtest/p1/c11 rtest/p1/c12 rtest/p2/c21 \ + rtest/p2/c22 rtest/p1 rtest/p2 rtest > /dev/null 2>&1 [[ -n "$SCHED_DEBUG" ]] && echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose } @@ -173,14 +183,22 @@ test_add_proc() # # Cgroup test hierarchy # -# root -- A1 -- A2 -- A3 -# +- B1 +# root +# | +# +------+------+ +# | | +# A1 B1 +# | +# A2 +# | +# A3 # # P<v> = set cpus.partition (0:member, 1:root, 2:isolated) # C<l> = add cpu-list to cpuset.cpus # X<l> = add cpu-list to cpuset.cpus.exclusive # S<p> = use prefix in subtree_control # T = put a task into cgroup +# CX<l> = add cpu-list to both cpuset.cpus and cpuset.cpus.exclusive # O<c>=<v> = Write <v> to CPU online file of <c> # # ECPUs - effective CPUs of cpusets @@ -207,130 +225,129 @@ TEST_MATRIX=( " C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5" " C0-1 . . C2-3:P1 . . . C2 0 " " C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5" - "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1,A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1,A2:2-3" - "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3,A2:3 A1:P1,A2:P0" - "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4,A2:2" - "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:,B1:0-2 A1:P1,A2:P1" - "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3|XA2:2-3" + "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3|XA2:2-3" + "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:|A2:3|XA2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3|A2:3 A1:P1|A2:P0" + "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4|A2:2" + "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:|B1:0-2 A1:P1|A2:P1" + "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1" # CPU offlining cases: - " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1,B1:3" - "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1,A2:3" - "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1,A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0,A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1,A2:2-3" - "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P2" - "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P2" - "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2,A2: A1:P1,A2:P1" - "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3,A2:3 A1:P1,A2:P-1" - "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2,A2:2 A1:P1,A2:P-1" - "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" - "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1" - "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1" - "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" + " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1|B1:3" + "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1|A2:3" + "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1|A2:2-3" + "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0|A2:2-3" + "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1|A2:2-3" + "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" + "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" + "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:|A2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2|A2: A1:P1|A2:P1" + "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-1" + "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2|A2:2 A1:P1|A2:P-1" + "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1|A2:2|A3: A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" + "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1|A2:3|A3:3 A1:P1|A2:P1|A3:P-1" + "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1|A2:2|A3:2 A1:P1|A2:P1|A3:P-1" + "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # # Remote partition and cpuset.cpus.exclusive tests # - " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1,A2:2-3,A3:2-3 A1:P0,A2:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2,A2:3,A3:3 A1:P0,A2:P2 3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3,A2:1-3,A3:2-3,B1:2-3 A1:P0,A3:P0,B1:P-2" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1|A2:2-3|A3:2-3 A1:P0|A2:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2|A2:3|A3:3 A1:P0|A2:P2 3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3|A2:1-3|A3:2-3|B1:2-3 A1:P0|A3:P0|B1:P-2" " C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4" - " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1,A3:2-3 A2:P2,A3:P2 1-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3,B1:4-5 A3:P2,B1:P2 2-5" - " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4,A2:1-3,A3:1-3 A2:P2 1-3" - " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4,A2:4,A3:2-3 A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" + " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1|A3:2-3 A2:P2|A3:P2 1-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3|B1:4-5 A3:P2|B1:P2 2-5" + " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4|A2:1-3|A3:1-3 A2:P2 1-3" + " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4|A2:4|A3:2-3 A3:P2 2-3" # Nested remote/local partition tests - " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4,2-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1,A2:2-3,A3:2-3,B1:4 \ - A1:P0,A2:P1,A3:P0,B1:P1" - " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4,3" - " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \ - A1:P0,A2:P2,A3:P1 2-4,2-3" - " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1,A2:2,A3:3-4 \ - A1:P0,A2:P2,A3:P1 2" + " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4-5 \ + A1:P0|A2:P1|A3:P2|B1:P1 2-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4 \ + A1:P0|A2:P1|A3:P2|B1:P1 2-4|2-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1|A2:2-3|A3:2-3|B1:4 \ + A1:P0|A2:P1|A3:P0|B1:P1" + " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:2|A3:3|B1:4 \ + A1:P0|A2:P1|A3:P2|B1:P1 2-4|3" + " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1|A2:2-3|A3:4 \ + A1:P0|A2:P2|A3:P1 2-4|2-3" + " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1|A2:2|A3:3-4 \ + A1:P0|A2:P2|A3:P1 2" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ - . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \ - A1:P0,A2:P-2,A3:P-1" + . . X5 . . 0 A1:0-4|A2:1-4|A3:2-4 \ + A1:P0|A2:P-2|A3:P-1 ." " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ - . . . X1 . 0 A1:0-1,A2:2-4,A3:2-4 \ - A1:P0,A2:P2,A3:P-1 2-4" + . . . X1 . 0 A1:0-1|A2:2-4|A3:2-4 \ + A1:P0|A2:P2|A3:P-1 2-4" # Remote partition offline tests - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1,A2:1,A3:3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2,A2:1-2,A3: A1:P0,A3:P2 3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3," + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1|A2:1|A3:3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2|A2:1-2|A3: A1:P0|A3:P2 3" + " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2|A2:1-2|A3:1-2 A1:P0|A3:P-2 3|" # An invalidated remote partition cannot self-recover from hotplug - " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3,A2:1-3,A3:2 A1:P0,A3:P-2" + " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2 ." # cpus.exclusive.effective clearing test - " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3,A2:1-3,A3:2,XA1:" + " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3|A2:1-3|A3:2|XA1:" # Invalid to valid remote partition transition test - " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3,A2:1-3,XA2: A2:P-2" + " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2 ." " C0-3:S+ C1-3:X3:P2 - . . X2-3 P2 . . 0 A1:0-2,A2:3,XA2:3 A2:P2 3" + . . X2-3 P2 . . 0 A1:0-2|A2:3|XA2:3 A2:P2 3" # Invalid to valid local partition direct transition tests - " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3,XA1:1-3,A2:1-3:XA2: A1:P2,A2:P-2 1-3" - " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3" - " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4,B1:4-6 A1:P-2,B1:P0" - " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3,B1:4-6 A1:P2,B1:P0 0-3" - " C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3,B1:4-5 A1:P2,B1:P0 0-3" + " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3|XA1:1-3|A2:1-3:XA2: A1:P2|A2:P-2 1-3" + " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3" + " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:4-6 A1:P-2|B1:P0" + " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3" # Local partition invalidation tests " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . . . . 0 A1:1,A2:2,A3:3 A1:P2,A2:P2,A3:P2 1-3" + . . . . . 0 A1:1|A2:2|A3:3 A1:P2|A2:P2|A3:P2 1-3" " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . X4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" + . . X4 . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3" " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . C4:X . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" + . . C4:X . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3" # Local partition CPU change tests - " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2,A2:3-5 A1:P2,A2:P1 0-2" - " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3,A2:4-5 A1:P2,A2:P1 1-3" + " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2|A2:3-5 A1:P2|A2:P1 0-2" + " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3|A2:4-5 A1:P2|A2:P1 1-3" # cpus_allowed/exclusive_cpus update tests " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . X:C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \ - A1:P0,A3:P-2" + . X:C4 . P2 . 0 A1:4|A2:4|XA2:|XA3:|A3:4 \ + A1:P0|A3:P-2 ." " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \ - A1:P0,A3:P-2" + . X1 . P2 . 0 A1:0-3|A2:1-3|XA1:1|XA2:|XA3:|A3:2-3 \ + A1:P0|A3:P-2 ." " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . . X3 P2 . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3 \ - A1:P0,A3:P2 3" + . . X3 P2 . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3 \ + A1:P0|A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ - . . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \ - A1:P0,A3:P-2" + . . X3 . . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3|XA3:3 \ + A1:P0|A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ - . X4 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:4,XA2:,XA3 \ - A1:P0,A3:P-2" + . X4 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:4|XA2:|XA3 \ + A1:P0|A3:P-2" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- @@ -339,68 +356,127 @@ TEST_MATRIX=( # # Adding CPUs to partition root that are not in parent's # cpuset.cpus is allowed, but those extra CPUs are ignored. - "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:,A2:2-3 A1:P1,A2:P1" + "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:|A2:2-3 A1:P1|A2:P1" # Taking away all CPUs from parent or itself if there are tasks # will make the partition invalid. - "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1" - " C3:P1:S+ C3 . . T P1 . . 0 A1:3,A2:3 A1:P1,A2:P-1" - "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" - "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3|A2:2-3 A1:P1|A2:P-1" + " C3:P1:S+ C3 . . T P1 . . 0 A1:3|A2:3 A1:P1|A2:P-1" + "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" + "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" # Changing a partition root to member makes child partitions invalid - "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3,A2:3 A1:P0,A2:P-1" - "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1" + "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3|A2:3 A1:P0|A2:P-1" + "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P0|A3:P-1" # cpuset.cpus can contains cpus not in parent's cpuset.cpus as long # as they overlap. - "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2,A2:3 A1:P1,A2:P1" + "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2|A2:3 A1:P1|A2:P1" # Deletion of CPUs distributed to child cgroup is allowed. - "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5,A2:4-5" + "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5|A2:4-5" # To become a valid partition root, cpuset.cpus must overlap parent's # cpuset.cpus. - " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1,A2:0-1 A1:P1,A2:P-1" + " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1|A2:0-1 A1:P1|A2:P-1" # Enabling partition with child cpusets is allowed - " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1,A2:1 A1:P1" + " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1|A2:1 A1:P1" - # A partition root with non-partition root parent is invalid, but it + # A partition root with non-partition root parent is invalid| but it # can be made valid if its parent becomes a partition root too. - " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1,A2:1 A1:P0,A2:P-2" - " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0,A2:1 A1:P1,A2:P2" + " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2" + " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2 0-1|1" # A non-exclusive cpuset.cpus change will invalidate partition and its siblings - " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P0" - " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1" - " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1" + " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P0" + " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P-1" + " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P0|B1:P-1" # cpuset.cpus can overlap with sibling cpuset.cpus.exclusive but not subsumed by it - " C0-3 . . C4-5 X5 . . . 0 A1:0-3,B1:4-5" + " C0-3 . . C4-5 X5 . . . 0 A1:0-3|B1:4-5" # Child partition root that try to take all CPUs from parent partition # with tasks will remain invalid. - " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1" - " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1,A2:1-4 A1:P1,A2:P1" - " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1" + " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" + " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1|A2:1-4 A1:P1|A2:P1" + " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" # Clearing of cpuset.cpus with a preset cpuset.cpus.exclusive shouldn't # affect cpuset.cpus.exclusive.effective. - " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4,XA2:3" + " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4|XA2:3" + + # cpuset.cpus can contain CPUs that overlap a sibling cpuset with cpus.exclusive + # but creating a local partition out of it is not allowed. Similarly and change + # in cpuset.cpus of a local partition that overlaps sibling exclusive CPUs will + # invalidate it. + " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1 0 A1:1|A2:2-4|B1:5-6|XB1:5-6 \ + A1:P0|A2:P2:B1:P1 2-4" + " CX1-4:S+ CX2-4:P2 . C3-6 . . . P1 0 A1:1|A2:2-4|B1:5-6 \ + A1:P0|A2:P2:B1:P-1 2-4" + " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1:C3-6 0 A1:1|A2:2-4|B1:5-6 \ + A1:P0|A2:P2:B1:P-1 2-4" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # Failure cases: # A task cannot be added to a partition with no cpu - "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:,A2:3 A1:P1,A2:P1" + "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:|A2:3 A1:P1|A2:P1" # Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected - " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3,B1:4-5" + " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3|B1:4-5" # cpuset.cpus cannot be a subset of sibling cpuset.cpus.exclusive - " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3,B1:4-5" + " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3|B1:4-5" +) + +# +# Cpuset controller remote partition test matrix. +# +# Cgroup test hierarchy +# +# root +# | +# rtest (cpuset.cpus.exclusive=1-7) +# | +# +------+------+ +# | | +# p1 p2 +# +--+--+ +--+--+ +# | | | | +# c11 c12 c21 c22 +# +# REMOTE_TEST_MATRIX uses the same notational convention as TEST_MATRIX. +# Only CPUs 1-7 should be used. +# +REMOTE_TEST_MATRIX=( + # old-p1 old-p2 old-c11 old-c12 old-c21 old-c22 + # new-p1 new-p2 new-c11 new-c12 new-c21 new-c22 ECPUs Pstate ISOLCPUS + # ------ ------ ------- ------- ------- ------- ----- ------ -------- + " X1-3:S+ X4-6:S+ X1-2 X3 X4-5 X6 \ + . . P2 P2 P2 P2 c11:1-2|c12:3|c21:4-5|c22:6 \ + c11:P2|c12:P2|c21:P2|c22:P2 1-6" + " CX1-4:S+ . X1-2:P2 C3 . . \ + . . . C3-4 . . p1:3-4|c11:1-2|c12:3-4 \ + p1:P0|c11:P2|c12:P0 1-2" + " CX1-4:S+ . X1-2:P2 . . . \ + X2-4 . . . . . p1:1,3-4|c11:2 \ + p1:P0|c11:P2 2" + " CX1-5:S+ . X1-2:P2 X3-5:P1 . . \ + X2-4 . . . . . p1:1,5|c11:2|c12:3-4 \ + p1:P0|c11:P2|c12:P1 2" + " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \ + . . X2 . . . p1:1|c11:2|c12:3-4 \ + p1:P0|c11:P2|c12:P1 2" + # p1 as member, will get its effective CPUs from its parent rtest + " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \ + . . X1 CX2-4 . . p1:5-7|c11:1|c12:2-4 \ + p1:P0|c11:P2|c12:P1 1" + " CX1-4:S+ X5-6:P1:S+ . . . . \ + . . X1-2:P2 X4-5:P1 . X1-7:P2 p1:3|c11:1-2|c12:4:c22:5-6 \ + p1:P0|p2:P1|c11:P2|c12:P1|c22:P2 \ + 1-2,4-6|1-2,5-6" ) # @@ -453,25 +529,26 @@ set_ctrl_state() PFILE=$CGRP/cpuset.cpus.partition CFILE=$CGRP/cpuset.cpus XFILE=$CGRP/cpuset.cpus.exclusive - S=$(expr substr $CMD 1 1) - if [[ $S = S ]] - then - PREFIX=${CMD#?} + case $CMD in + S*) PREFIX=${CMD#?} COMM="echo ${PREFIX}${CTRL} > $SFILE" eval $COMM $REDIRECT - elif [[ $S = X ]] - then + ;; + X*) CPUS=${CMD#?} COMM="echo $CPUS > $XFILE" eval $COMM $REDIRECT - elif [[ $S = C ]] - then - CPUS=${CMD#?} + ;; + CX*) + CPUS=${CMD#??} + COMM="echo $CPUS > $CFILE; echo $CPUS > $XFILE" + eval $COMM $REDIRECT + ;; + C*) CPUS=${CMD#?} COMM="echo $CPUS > $CFILE" eval $COMM $REDIRECT - elif [[ $S = P ]] - then - VAL=${CMD#?} + ;; + P*) VAL=${CMD#?} case $VAL in 0) VAL=member ;; @@ -486,15 +563,17 @@ set_ctrl_state() esac COMM="echo $VAL > $PFILE" eval $COMM $REDIRECT - elif [[ $S = O ]] - then - VAL=${CMD#?} + ;; + O*) VAL=${CMD#?} write_cpu_online $VAL - elif [[ $S = T ]] - then - COMM="echo 0 > $TFILE" + ;; + T*) COMM="echo 0 > $TFILE" eval $COMM $REDIRECT - fi + ;; + *) echo "Unknown command: $CMD" + exit 1 + ;; + esac RET=$? [[ $RET -ne 0 ]] && { [[ -n "$SHOWERR" ]] && { @@ -532,21 +611,18 @@ online_cpus() } # -# Return 1 if the list of effective cpus isn't the same as the initial list. +# Remove all the test cgroup directories # reset_cgroup_states() { echo 0 > $CGROUP2/cgroup.procs online_cpus - rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1 - pause 0.02 - set_ctrl_state . R- - pause 0.01 + rmdir $RESET_LIST > /dev/null 2>&1 } dump_states() { - for DIR in . A1 A1/A2 A1/A2/A3 B1 + for DIR in $CGROUP_LIST do CPUS=$DIR/cpuset.cpus ECPUS=$DIR/cpuset.cpus.effective @@ -566,17 +642,33 @@ dump_states() } # +# Set the actual cgroup directory into $CGRP_DIR +# $1 - cgroup name +# +set_cgroup_dir() +{ + CGRP_DIR=$1 + [[ $CGRP_DIR = A2 ]] && CGRP_DIR=A1/A2 + [[ $CGRP_DIR = A3 ]] && CGRP_DIR=A1/A2/A3 + [[ $CGRP_DIR = c11 ]] && CGRP_DIR=p1/c11 + [[ $CGRP_DIR = c12 ]] && CGRP_DIR=p1/c12 + [[ $CGRP_DIR = c21 ]] && CGRP_DIR=p2/c21 + [[ $CGRP_DIR = c22 ]] && CGRP_DIR=p2/c22 +} + +# # Check effective cpus -# $1 - check string, format: <cgroup>:<cpu-list>[,<cgroup>:<cpu-list>]* +# $1 - check string, format: <cgroup>:<cpu-list>[|<cgroup>:<cpu-list>]* # check_effective_cpus() { CHK_STR=$1 - for CHK in $(echo $CHK_STR | sed -e "s/,/ /g") + for CHK in $(echo $CHK_STR | sed -e "s/|/ /g") do set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 - CPUS=$2 + EXPECTED_CPUS=$2 + ACTUAL_CPUS= if [[ $CGRP = X* ]] then CGRP=${CGRP#X} @@ -584,41 +676,39 @@ check_effective_cpus() else FILE=cpuset.cpus.effective fi - [[ $CGRP = A2 ]] && CGRP=A1/A2 - [[ $CGRP = A3 ]] && CGRP=A1/A2/A3 - [[ -e $CGRP/$FILE ]] || return 1 - [[ $CPUS = $(cat $CGRP/$FILE) ]] || return 1 + set_cgroup_dir $CGRP + [[ -e $CGRP_DIR/$FILE ]] || return 1 + ACTUAL_CPUS=$(cat $CGRP_DIR/$FILE) + [[ $EXPECTED_CPUS = $ACTUAL_CPUS ]] || return 1 done } # # Check cgroup states -# $1 - check string, format: <cgroup>:<state>[,<cgroup>:<state>]* +# $1 - check string, format: <cgroup>:<state>[|<cgroup>:<state>]* # check_cgroup_states() { CHK_STR=$1 - for CHK in $(echo $CHK_STR | sed -e "s/,/ /g") + for CHK in $(echo $CHK_STR | sed -e "s/|/ /g") do set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 - CGRP_DIR=$CGRP - STATE=$2 + EXPECTED_STATE=$2 FILE= - EVAL=$(expr substr $STATE 2 2) - [[ $CGRP = A2 ]] && CGRP_DIR=A1/A2 - [[ $CGRP = A3 ]] && CGRP_DIR=A1/A2/A3 + EVAL=$(expr substr $EXPECTED_STATE 2 2) - case $STATE in + set_cgroup_dir $CGRP + case $EXPECTED_STATE in P*) FILE=$CGRP_DIR/cpuset.cpus.partition ;; - *) echo "Unknown state: $STATE!" + *) echo "Unknown state: $EXPECTED_STATE!" exit 1 ;; esac - VAL=$(cat $FILE) + ACTUAL_STATE=$(cat $FILE) - case "$VAL" in + case "$ACTUAL_STATE" in member) VAL=0 ;; root) VAL=1 @@ -642,7 +732,7 @@ check_cgroup_states() [[ $VAL -eq 1 && $VERBOSE -gt 0 ]] && { DOMS=$(cat $CGRP_DIR/cpuset.cpus.effective) [[ -n "$DOMS" ]] && - echo " [$CGRP] sched-domain: $DOMS" > $CONSOLE + echo " [$CGRP_DIR] sched-domain: $DOMS" > $CONSOLE } done return 0 @@ -665,22 +755,22 @@ check_cgroup_states() # check_isolcpus() { - EXPECT_VAL=$1 - ISOLCPUS= + EXPECTED_ISOLCPUS=$1 + ISCPUS=${CGROUP2}/cpuset.cpus.isolated + ISOLCPUS=$(cat $ISCPUS) LASTISOLCPU= SCHED_DOMAINS=/sys/kernel/debug/sched/domains - ISCPUS=${CGROUP2}/cpuset.cpus.isolated - if [[ $EXPECT_VAL = . ]] + if [[ $EXPECTED_ISOLCPUS = . ]] then - EXPECT_VAL= - EXPECT_VAL2= - elif [[ $(expr $EXPECT_VAL : ".*,.*") > 0 ]] + EXPECTED_ISOLCPUS= + EXPECTED_SDOMAIN= + elif [[ $(expr $EXPECTED_ISOLCPUS : ".*|.*") > 0 ]] then - set -- $(echo $EXPECT_VAL | sed -e "s/,/ /g") - EXPECT_VAL=$1 - EXPECT_VAL2=$2 + set -- $(echo $EXPECTED_ISOLCPUS | sed -e "s/|/ /g") + EXPECTED_ISOLCPUS=$2 + EXPECTED_SDOMAIN=$1 else - EXPECT_VAL2=$EXPECT_VAL + EXPECTED_SDOMAIN=$EXPECTED_ISOLCPUS fi # @@ -689,20 +779,21 @@ check_isolcpus() # to make appending those CPUs easier. # [[ -n "$BOOT_ISOLCPUS" ]] && { - EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS} - EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS} + EXPECTED_ISOLCPUS=${EXPECTED_ISOLCPUS:+${EXPECTED_ISOLCPUS},}${BOOT_ISOLCPUS} + EXPECTED_SDOMAIN=${EXPECTED_SDOMAIN:+${EXPECTED_SDOMAIN},}${BOOT_ISOLCPUS} } # # Check cpuset.cpus.isolated cpumask # - [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && { + [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && { # Take a 50ms pause and try again pause 0.05 ISOLCPUS=$(cat $ISCPUS) } - [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && return 1 + [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && return 1 ISOLCPUS= + EXPECTED_ISOLCPUS=$EXPECTED_SDOMAIN # # Use the sched domain in debugfs to check isolated CPUs, if available @@ -736,7 +827,7 @@ check_isolcpus() done [[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU - [[ "$EXPECT_VAL" = "$ISOLCPUS" ]] + [[ "$EXPECTED_SDOMAIN" = "$ISOLCPUS" ]] } test_fail() @@ -774,6 +865,63 @@ null_isolcpus_check() } # +# Check state transition test result +# $1 - Test number +# $2 - Expected effective CPU values +# $3 - Expected partition states +# $4 - Expected isolated CPUs +# +check_test_results() +{ + _NR=$1 + _ECPUS="$2" + _PSTATES="$3" + _ISOLCPUS="$4" + + [[ -n "$_ECPUS" && "$_ECPUS" != . ]] && { + check_effective_cpus $_ECPUS + [[ $? -ne 0 ]] && test_fail $_NR "effective CPU" \ + "Cgroup $CGRP: expected $EXPECTED_CPUS, got $ACTUAL_CPUS" + } + + [[ -n "$_PSTATES" && "$_PSTATES" != . ]] && { + check_cgroup_states $_PSTATES + [[ $? -ne 0 ]] && test_fail $_NR states \ + "Cgroup $CGRP: expected $EXPECTED_STATE, got $ACTUAL_STATE" + } + + # Compare the expected isolated CPUs with the actual ones, + # if available + [[ -n "$_ISOLCPUS" ]] && { + check_isolcpus $_ISOLCPUS + [[ $? -ne 0 ]] && { + [[ -n "$BOOT_ISOLCPUS" ]] && _ISOLCPUS=${_ISOLCPUS},${BOOT_ISOLCPUS} + test_fail $_NR "isolated CPU" \ + "Expect $_ISOLCPUS, get $ISOLCPUS instead" + } + } + reset_cgroup_states + # + # Check to see if effective cpu list changes + # + _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective) + RETRY=0 + while [[ $_NEWLIST != $CPULIST && $RETRY -lt 8 ]] + do + # Wait a bit longer & recheck a few times + pause 0.02 + ((RETRY++)) + _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective) + done + [[ $_NEWLIST != $CPULIST ]] && { + echo "Effective cpus changed to $_NEWLIST after test $_NR!" + exit 1 + } + null_isolcpus_check + [[ $VERBOSE -gt 0 ]] && echo "Test $I done." +} + +# # Run cpuset state transition test # $1 - test matrix name # @@ -785,6 +933,8 @@ run_state_test() { TEST=$1 CONTROLLER=cpuset + CGROUP_LIST=". A1 A1/A2 A1/A2/A3 B1" + RESET_LIST="A1/A2/A3 A1/A2 A1 B1" I=0 eval CNT="\${#$TEST[@]}" @@ -812,10 +962,11 @@ run_state_test() STATES=${11} ICPUS=${12} - set_ctrl_state_noerr B1 $OLD_B1 set_ctrl_state_noerr A1 $OLD_A1 set_ctrl_state_noerr A1/A2 $OLD_A2 set_ctrl_state_noerr A1/A2/A3 $OLD_A3 + set_ctrl_state_noerr B1 $OLD_B1 + RETVAL=0 set_ctrl_state A1 $NEW_A1; ((RETVAL += $?)) set_ctrl_state A1/A2 $NEW_A2; ((RETVAL += $?)) @@ -824,47 +975,79 @@ run_state_test() [[ $RETVAL -ne $RESULT ]] && test_fail $I result - [[ -n "$ECPUS" && "$ECPUS" != . ]] && { - check_effective_cpus $ECPUS - [[ $? -ne 0 ]] && test_fail $I "effective CPU" - } + check_test_results $I "$ECPUS" "$STATES" "$ICPUS" + ((I++)) + done + echo "All $I tests of $TEST PASSED." +} - [[ -n "$STATES" && "$STATES" != . ]] && { - check_cgroup_states $STATES - [[ $? -ne 0 ]] && test_fail $I states - } +# +# Run cpuset remote partition state transition test +# $1 - test matrix name +# +run_remote_state_test() +{ + TEST=$1 + CONTROLLER=cpuset + [[ -d rtest ]] || mkdir rtest + cd rtest + echo +cpuset > cgroup.subtree_control + echo "1-7" > cpuset.cpus + echo "1-7" > cpuset.cpus.exclusive + CGROUP_LIST=".. . p1 p2 p1/c11 p1/c12 p2/c21 p2/c22" + RESET_LIST="p1/c11 p1/c12 p2/c21 p2/c22 p1 p2" + I=0 + eval CNT="\${#$TEST[@]}" - # Compare the expected isolated CPUs with the actual ones, - # if available - [[ -n "$ICPUS" ]] && { - check_isolcpus $ICPUS - [[ $? -ne 0 ]] && { - [[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS} - test_fail $I "isolated CPU" \ - "Expect $ICPUS, get $ISOLCPUS instead" - } - } - reset_cgroup_states - # - # Check to see if effective cpu list changes - # - NEWLIST=$(cat cpuset.cpus.effective) - RETRY=0 - while [[ $NEWLIST != $CPULIST && $RETRY -lt 8 ]] - do - # Wait a bit longer & recheck a few times - pause 0.02 - ((RETRY++)) - NEWLIST=$(cat cpuset.cpus.effective) - done - [[ $NEWLIST != $CPULIST ]] && { - echo "Effective cpus changed to $NEWLIST after test $I!" - exit 1 + reset_cgroup_states + console_msg "Running remote partition state transition test ..." + + while [[ $I -lt $CNT ]] + do + echo "Running test $I ..." > $CONSOLE + [[ $VERBOSE -gt 1 ]] && { + echo "" + eval echo \${$TEST[$I]} } - null_isolcpus_check - [[ $VERBOSE -gt 0 ]] && echo "Test $I done." + eval set -- "\${$TEST[$I]}" + OLD_p1=$1 + OLD_p2=$2 + OLD_c11=$3 + OLD_c12=$4 + OLD_c21=$5 + OLD_c22=$6 + NEW_p1=$7 + NEW_p2=$8 + NEW_c11=$9 + NEW_c12=${10} + NEW_c21=${11} + NEW_c22=${12} + ECPUS=${13} + STATES=${14} + ICPUS=${15} + + set_ctrl_state_noerr p1 $OLD_p1 + set_ctrl_state_noerr p2 $OLD_p2 + set_ctrl_state_noerr p1/c11 $OLD_c11 + set_ctrl_state_noerr p1/c12 $OLD_c12 + set_ctrl_state_noerr p2/c21 $OLD_c21 + set_ctrl_state_noerr p2/c22 $OLD_c22 + + RETVAL=0 + set_ctrl_state p1 $NEW_p1 ; ((RETVAL += $?)) + set_ctrl_state p2 $NEW_p2 ; ((RETVAL += $?)) + set_ctrl_state p1/c11 $NEW_c11; ((RETVAL += $?)) + set_ctrl_state p1/c12 $NEW_c12; ((RETVAL += $?)) + set_ctrl_state p2/c21 $NEW_c21; ((RETVAL += $?)) + set_ctrl_state p2/c22 $NEW_c22; ((RETVAL += $?)) + + [[ $RETVAL -ne 0 ]] && test_fail $I result + + check_test_results $I "$ECPUS" "$STATES" "$ICPUS" ((I++)) done + cd .. + rmdir rtest echo "All $I tests of $TEST PASSED." } @@ -932,6 +1115,7 @@ test_isolated() echo $$ > $CGROUP2/cgroup.procs [[ -d A1 ]] && rmdir A1 null_isolcpus_check + pause 0.05 } # @@ -997,10 +1181,13 @@ test_inotify() else echo "Inotify test PASSED" fi + echo member > cpuset.cpus.partition + echo "" > cpuset.cpus } trap cleanup 0 2 3 6 run_state_test TEST_MATRIX +run_remote_state_test REMOTE_TEST_MATRIX test_isolated test_inotify echo "All tests PASSED." diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h index 3d2663fe50ba..eeca8005723f 100644 --- a/tools/testing/selftests/clone3/clone3_selftests.h +++ b/tools/testing/selftests/clone3/clone3_selftests.h @@ -16,7 +16,7 @@ #define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) #ifndef __NR_clone3 -#define __NR_clone3 -1 +#define __NR_clone3 435 #endif struct __clone_args { diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 7cc74faed743..8b7f6acad15f 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -20,7 +20,7 @@ def _get_hds_mode(cfg, netnl) -> str: def _xdp_onoff(cfg): - prog = cfg.rpath("../../net/lib/xdp_dummy.bpf.o") + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" ip("link set dev %s xdp obj %s sec xdp" % (cfg.ifname, prog)) ip("link set dev %s xdp off" % cfg.ifname) diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py index 701aca1361e0..cd23af875317 100755 --- a/tools/testing/selftests/drivers/net/hw/csum.py +++ b/tools/testing/selftests/drivers/net/hw/csum.py @@ -88,7 +88,7 @@ def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: check_nic_features(cfg) - cfg.bin_local = cfg.rpath("../../../net/lib/csum") + cfg.bin_local = cfg.net_lib_dir / "csum" cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) cases = [] diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index d301d9b356f7..9f271ab6ec04 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -27,7 +27,7 @@ def _set_flow_rule(cfg, chan): def test_zcrx(cfg) -> None: - cfg.require_v6() + cfg.require_ipver('6') combined_chans = _get_combined_channels(cfg) if combined_chans < 2: @@ -40,7 +40,7 @@ def test_zcrx(cfg) -> None: flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1}" - tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_v6} -p 9999 -l 12840" + tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p 9999 -l 12840" with bkg(rx_cmd, host=cfg.remote, exit_wait=True): wait_port_listen(9999, proto="tcp", host=cfg.remote) cmd(tx_cmd) @@ -51,7 +51,7 @@ def test_zcrx(cfg) -> None: def test_zcrx_oneshot(cfg) -> None: - cfg.require_v6() + cfg.require_ipver('6') combined_chans = _get_combined_channels(cfg) if combined_chans < 2: @@ -64,7 +64,7 @@ def test_zcrx_oneshot(cfg) -> None: flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1} -o 4" - tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_v6} -p 9999 -l 4096 -z 16384" + tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p 9999 -l 4096 -z 16384" with bkg(rx_cmd, host=cfg.remote, exit_wait=True): wait_port_listen(9999, proto="tcp", host=cfg.remote) cmd(tx_cmd) diff --git a/tools/testing/selftests/drivers/net/hw/irq.py b/tools/testing/selftests/drivers/net/hw/irq.py index 42ab98370245..0699d6a8b4e2 100755 --- a/tools/testing/selftests/drivers/net/hw/irq.py +++ b/tools/testing/selftests/drivers/net/hw/irq.py @@ -69,7 +69,7 @@ def check_reconfig_queues(cfg) -> None: def check_reconfig_xdp(cfg) -> None: def reconfig(cfg) -> None: ip(f"link set dev %s xdp obj %s sec xdp" % - (cfg.ifname, cfg.rpath("xdp_dummy.bpf.o"))) + (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o")) ip(f"link set dev %s xdp off" % cfg.ifname) _check_reconfig(cfg, reconfig) diff --git a/tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c b/tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c deleted file mode 100644 index d988b2e0cee8..000000000000 --- a/tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#define KBUILD_MODNAME "xdp_dummy" -#include <linux/bpf.h> -#include <bpf/bpf_helpers.h> - -SEC("xdp") -int xdp_dummy_prog(struct xdp_md *ctx) -{ - return XDP_PASS; -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index fd4d674e6c72..ad5ff645183a 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -13,22 +13,17 @@ from .remote import Remote class NetDrvEnvBase: """ Base class for a NIC / host envirnoments + + Attributes: + test_dir: Path to the source directory of the test + net_lib_dir: Path to the net/lib directory """ def __init__(self, src_path): - self.src_path = src_path - self.env = self._load_env_file() - - def rpath(self, path): - """ - Get an absolute path to a file based on a path relative to the directory - containing the test which constructed env. + self.src_path = Path(src_path) + self.test_dir = self.src_path.parent.resolve() + self.net_lib_dir = (Path(__file__).parent / "../../../../net/lib").resolve() - For example, if the test.py is in the same directory as - a binary (built from helper.c), the test can use env.rpath("helper") - to get the absolute path to the binary - """ - src_dir = Path(self.src_path).parent.resolve() - return (src_dir / path).as_posix() + self.env = self._load_env_file() def _load_env_file(self): env = os.environ.copy() diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index 93120e86e102..4b6822866066 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -56,8 +56,7 @@ def _set_offload_checksum(cfg, netnl, on) -> None: return def _set_xdp_generic_sb_on(cfg) -> None: - test_dir = os.path.dirname(os.path.realpath(__file__)) - prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True) defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off") @@ -66,8 +65,7 @@ def _set_xdp_generic_sb_on(cfg) -> None: time.sleep(10) def _set_xdp_generic_mb_on(cfg) -> None: - test_dir = os.path.dirname(os.path.realpath(__file__)) - prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog)) @@ -77,8 +75,7 @@ def _set_xdp_generic_mb_on(cfg) -> None: time.sleep(10) def _set_xdp_native_sb_on(cfg) -> None: - test_dir = os.path.dirname(os.path.realpath(__file__)) - prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") @@ -95,8 +92,7 @@ def _set_xdp_native_sb_on(cfg) -> None: time.sleep(10) def _set_xdp_native_mb_on(cfg) -> None: - test_dir = os.path.dirname(os.path.realpath(__file__)) - prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) try: @@ -109,8 +105,7 @@ def _set_xdp_native_mb_on(cfg) -> None: time.sleep(10) def _set_xdp_offload_on(cfg) -> None: - test_dir = os.path.dirname(os.path.realpath(__file__)) - prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True) try: cmd(f"ip link set dev {cfg.ifname} xdpoffload obj {prog} sec xdp", shell=True) diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index cae923f84f69..06abd3f233e1 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -26,13 +26,13 @@ def nl_get_queues(cfg, nl, qtype='rx'): def check_xsk(cfg, nl, xdp_queue_id=0) -> None: # Probe for support - xdp = cmd(cfg.rpath("xdp_helper") + ' - -', fail=False) + xdp = cmd(f'{cfg.test_dir / "xdp_helper"} - -', fail=False) if xdp.ret == 255: raise KsftSkipEx('AF_XDP unsupported') elif xdp.ret > 0: raise KsftFailEx('unable to create AF_XDP socket') - with bkg(f'{cfg.rpath("xdp_helper")} {cfg.ifindex} {xdp_queue_id}', + with bkg(f'{cfg.test_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}', ksft_wait=3): rx = tx = False diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common index 45b5570441ce..b1f40857307d 100644 --- a/tools/testing/selftests/hid/config.common +++ b/tools/testing/selftests/hid/config.common @@ -39,7 +39,6 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_STAT=y CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPUSETS=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_BLAKE2B=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_CRYPTO_SEQIV=y diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index a1b2b657999d..1a8e85afe9aa 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -342,12 +342,14 @@ FIXTURE(iommufd_ioas) uint32_t hwpt_id; uint32_t device_id; uint64_t base_iova; + uint32_t device_pasid_id; }; FIXTURE_VARIANT(iommufd_ioas) { unsigned int mock_domains; unsigned int memory_limit; + bool pasid_capable; }; FIXTURE_SETUP(iommufd_ioas) @@ -372,6 +374,12 @@ FIXTURE_SETUP(iommufd_ioas) IOMMU_TEST_DEV_CACHE_DEFAULT); self->base_iova = MOCK_APERTURE_START; } + + if (variant->pasid_capable) + test_cmd_mock_domain_flags(self->ioas_id, + MOCK_FLAGS_DEVICE_PASID, + NULL, NULL, + &self->device_pasid_id); } FIXTURE_TEARDOWN(iommufd_ioas) @@ -387,6 +395,7 @@ FIXTURE_VARIANT_ADD(iommufd_ioas, no_domain) FIXTURE_VARIANT_ADD(iommufd_ioas, mock_domain) { .mock_domains = 1, + .pasid_capable = true, }; FIXTURE_VARIANT_ADD(iommufd_ioas, two_mock_domain) @@ -439,6 +448,10 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) &test_hwpt_id); test_err_hwpt_alloc(EINVAL, self->device_id, self->device_id, 0, &test_hwpt_id); + test_err_hwpt_alloc(EOPNOTSUPP, self->device_id, self->ioas_id, + IOMMU_HWPT_ALLOC_NEST_PARENT | + IOMMU_HWPT_FAULT_ID_VALID, + &test_hwpt_id); test_cmd_hwpt_alloc(self->device_id, self->ioas_id, IOMMU_HWPT_ALLOC_NEST_PARENT, @@ -748,6 +761,8 @@ TEST_F(iommufd_ioas, get_hw_info) } buffer_smaller; if (self->device_id) { + uint8_t max_pasid = 0; + /* Provide a zero-size user_buffer */ test_cmd_get_hw_info(self->device_id, NULL, 0); /* Provide a user_buffer with exact size */ @@ -762,6 +777,13 @@ TEST_F(iommufd_ioas, get_hw_info) * the fields within the size range still gets updated. */ test_cmd_get_hw_info(self->device_id, &buffer_smaller, sizeof(buffer_smaller)); + test_cmd_get_hw_info_pasid(self->device_id, &max_pasid); + ASSERT_EQ(0, max_pasid); + if (variant->pasid_capable) { + test_cmd_get_hw_info_pasid(self->device_pasid_id, + &max_pasid); + ASSERT_EQ(MOCK_PASID_WIDTH, max_pasid); + } } else { test_err_get_hw_info(ENOENT, self->device_id, &buffer_exact, sizeof(buffer_exact)); @@ -2736,6 +2758,7 @@ TEST_F(iommufd_viommu, viommu_alloc_nested_iopf) uint32_t iopf_hwpt_id; uint32_t fault_id; uint32_t fault_fd; + uint32_t vdev_id; if (self->device_id) { test_ioctl_fault_alloc(&fault_id, &fault_fd); @@ -2752,6 +2775,10 @@ TEST_F(iommufd_viommu, viommu_alloc_nested_iopf) &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); + /* Must allocate vdevice before attaching to a nested hwpt */ + test_err_mock_domain_replace(ENOENT, self->stdev_id, + iopf_hwpt_id); + test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id); EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, iopf_hwpt_id)); @@ -2769,15 +2796,46 @@ TEST_F(iommufd_viommu, vdevice_alloc) uint32_t viommu_id = self->viommu_id; uint32_t dev_id = self->device_id; uint32_t vdev_id = 0; + uint32_t veventq_id; + uint32_t veventq_fd; + int prev_seq = -1; if (dev_id) { + /* Must allocate vdevice before attaching to a nested hwpt */ + test_err_mock_domain_replace(ENOENT, self->stdev_id, + self->nested_hwpt_id); + + /* Allocate a vEVENTQ with veventq_depth=2 */ + test_cmd_veventq_alloc(viommu_id, IOMMU_VEVENTQ_TYPE_SELFTEST, + &veventq_id, &veventq_fd); + test_err_veventq_alloc(EEXIST, viommu_id, + IOMMU_VEVENTQ_TYPE_SELFTEST, NULL, NULL); /* Set vdev_id to 0x99, unset it, and set to 0x88 */ test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); + test_cmd_mock_domain_replace(self->stdev_id, + self->nested_hwpt_id); + test_cmd_trigger_vevents(dev_id, 1); + test_cmd_read_vevents(veventq_fd, 1, 0x99, &prev_seq); test_err_vdevice_alloc(EEXIST, viommu_id, dev_id, 0x99, &vdev_id); + test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id); test_ioctl_destroy(vdev_id); + + /* Try again with 0x88 */ test_cmd_vdevice_alloc(viommu_id, dev_id, 0x88, &vdev_id); + test_cmd_mock_domain_replace(self->stdev_id, + self->nested_hwpt_id); + /* Trigger an overflow with three events */ + test_cmd_trigger_vevents(dev_id, 3); + test_err_read_vevents(EOVERFLOW, veventq_fd, 3, 0x88, + &prev_seq); + /* Overflow must be gone after the previous reads */ + test_cmd_trigger_vevents(dev_id, 1); + test_cmd_read_vevents(veventq_fd, 1, 0x88, &prev_seq); + close(veventq_fd); + test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id); test_ioctl_destroy(vdev_id); + test_ioctl_destroy(veventq_id); } else { test_err_vdevice_alloc(ENOENT, viommu_id, dev_id, 0x99, NULL); } @@ -2956,4 +3014,311 @@ TEST_F(iommufd_viommu, vdevice_cache) } } +FIXTURE(iommufd_device_pasid) +{ + int fd; + uint32_t ioas_id; + uint32_t hwpt_id; + uint32_t stdev_id; + uint32_t device_id; + uint32_t no_pasid_stdev_id; + uint32_t no_pasid_device_id; +}; + +FIXTURE_VARIANT(iommufd_device_pasid) +{ + bool pasid_capable; +}; + +FIXTURE_SETUP(iommufd_device_pasid) +{ + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + test_ioctl_ioas_alloc(&self->ioas_id); + + test_cmd_mock_domain_flags(self->ioas_id, + MOCK_FLAGS_DEVICE_PASID, + &self->stdev_id, &self->hwpt_id, + &self->device_id); + if (!variant->pasid_capable) + test_cmd_mock_domain_flags(self->ioas_id, 0, + &self->no_pasid_stdev_id, NULL, + &self->no_pasid_device_id); +} + +FIXTURE_TEARDOWN(iommufd_device_pasid) +{ + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(iommufd_device_pasid, no_pasid) +{ + .pasid_capable = false, +}; + +FIXTURE_VARIANT_ADD(iommufd_device_pasid, has_pasid) +{ + .pasid_capable = true, +}; + +TEST_F(iommufd_device_pasid, pasid_attach) +{ + struct iommu_hwpt_selftest data = { + .iotlb = IOMMU_TEST_IOTLB_DEFAULT, + }; + uint32_t nested_hwpt_id[3] = {}; + uint32_t parent_hwpt_id = 0; + uint32_t fault_id, fault_fd; + uint32_t s2_hwpt_id = 0; + uint32_t iopf_hwpt_id; + uint32_t pasid = 100; + uint32_t viommu_id; + + /* + * Negative, detach pasid without attaching, this is not expected. + * But it should not result in failure anyway. + */ + test_cmd_pasid_detach(pasid); + + /* Allocate two nested hwpts sharing one common parent hwpt */ + test_cmd_hwpt_alloc(self->device_id, self->ioas_id, + IOMMU_HWPT_ALLOC_NEST_PARENT, + &parent_hwpt_id); + test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, + IOMMU_HWPT_ALLOC_PASID, + &nested_hwpt_id[0], + IOMMU_HWPT_DATA_SELFTEST, + &data, sizeof(data)); + test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, + IOMMU_HWPT_ALLOC_PASID, + &nested_hwpt_id[1], + IOMMU_HWPT_DATA_SELFTEST, + &data, sizeof(data)); + + /* Fault related preparation */ + test_ioctl_fault_alloc(&fault_id, &fault_fd); + test_cmd_hwpt_alloc_iopf(self->device_id, parent_hwpt_id, fault_id, + IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID, + &iopf_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); + + /* Allocate a regular nested hwpt based on viommu */ + test_cmd_viommu_alloc(self->device_id, parent_hwpt_id, + IOMMU_VIOMMU_TYPE_SELFTEST, + &viommu_id); + test_cmd_hwpt_alloc_nested(self->device_id, viommu_id, + IOMMU_HWPT_ALLOC_PASID, + &nested_hwpt_id[2], + IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); + + test_cmd_hwpt_alloc(self->device_id, self->ioas_id, + IOMMU_HWPT_ALLOC_PASID, + &s2_hwpt_id); + + /* Attach RID to non-pasid compat domain, */ + test_cmd_mock_domain_replace(self->stdev_id, parent_hwpt_id); + /* then attach to pasid should fail */ + test_err_pasid_attach(EINVAL, pasid, s2_hwpt_id); + + /* Attach RID to pasid compat domain, */ + test_cmd_mock_domain_replace(self->stdev_id, s2_hwpt_id); + /* then attach to pasid should succeed, */ + test_cmd_pasid_attach(pasid, nested_hwpt_id[0]); + /* but attach RID to non-pasid compat domain should fail now. */ + test_err_mock_domain_replace(EINVAL, self->stdev_id, parent_hwpt_id); + /* + * Detach hwpt from pasid 100, and check if the pasid 100 + * has null domain. + */ + test_cmd_pasid_detach(pasid); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, 0)); + /* RID is attached to pasid-comapt domain, pasid path is not used */ + + if (!variant->pasid_capable) { + /* + * PASID-compatible domain can be used by non-PASID-capable + * device. + */ + test_cmd_mock_domain_replace(self->no_pasid_stdev_id, nested_hwpt_id[0]); + test_cmd_mock_domain_replace(self->no_pasid_stdev_id, self->ioas_id); + /* + * Attach hwpt to pasid 100 of non-PASID-capable device, + * should fail, no matter domain is pasid-comapt or not. + */ + EXPECT_ERRNO(EINVAL, + _test_cmd_pasid_attach(self->fd, self->no_pasid_stdev_id, + pasid, parent_hwpt_id)); + EXPECT_ERRNO(EINVAL, + _test_cmd_pasid_attach(self->fd, self->no_pasid_stdev_id, + pasid, s2_hwpt_id)); + } + + /* + * Attach non pasid compat hwpt to pasid-capable device, should + * fail, and have null domain. + */ + test_err_pasid_attach(EINVAL, pasid, parent_hwpt_id); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, 0)); + + /* + * Attach ioas to pasid 100, should fail, domain should + * be null. + */ + test_err_pasid_attach(EINVAL, pasid, self->ioas_id); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, 0)); + + /* + * Attach the s2_hwpt to pasid 100, should succeed, domain should + * be valid. + */ + test_cmd_pasid_attach(pasid, s2_hwpt_id); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, s2_hwpt_id)); + + /* + * Try attach pasid 100 with another hwpt, should FAIL + * as attach does not allow overwrite, use REPLACE instead. + */ + test_err_pasid_attach(EBUSY, pasid, nested_hwpt_id[0]); + + /* + * Detach hwpt from pasid 100 for next test, should succeed, + * and have null domain. + */ + test_cmd_pasid_detach(pasid); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, 0)); + + /* + * Attach nested hwpt to pasid 100, should succeed, domain + * should be valid. + */ + test_cmd_pasid_attach(pasid, nested_hwpt_id[0]); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, nested_hwpt_id[0])); + + /* Attach to pasid 100 which has been attached, should fail. */ + test_err_pasid_attach(EBUSY, pasid, nested_hwpt_id[0]); + + /* cleanup pasid 100 */ + test_cmd_pasid_detach(pasid); + + /* Replace tests */ + + pasid = 200; + /* + * Replace pasid 200 without attaching it, should fail + * with -EINVAL. + */ + test_err_pasid_replace(EINVAL, pasid, s2_hwpt_id); + + /* + * Attach the s2 hwpt to pasid 200, should succeed, domain should + * be valid. + */ + test_cmd_pasid_attach(pasid, s2_hwpt_id); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, s2_hwpt_id)); + + /* + * Replace pasid 200 with self->ioas_id, should fail + * and domain should be the prior s2 hwpt. + */ + test_err_pasid_replace(EINVAL, pasid, self->ioas_id); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, s2_hwpt_id)); + + /* + * Replace a nested hwpt for pasid 200, should succeed, + * and have valid domain. + */ + test_cmd_pasid_replace(pasid, nested_hwpt_id[0]); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, nested_hwpt_id[0])); + + /* + * Replace with another nested hwpt for pasid 200, should + * succeed, and have valid domain. + */ + test_cmd_pasid_replace(pasid, nested_hwpt_id[1]); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, nested_hwpt_id[1])); + + /* cleanup pasid 200 */ + test_cmd_pasid_detach(pasid); + + /* Negative Tests for pasid replace, use pasid 1024 */ + + /* + * Attach the s2 hwpt to pasid 1024, should succeed, domain should + * be valid. + */ + pasid = 1024; + test_cmd_pasid_attach(pasid, s2_hwpt_id); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, s2_hwpt_id)); + + /* + * Replace pasid 1024 with nested_hwpt_id[0], should fail, + * but have the old valid domain. This is a designed + * negative case. Normally, this shall succeed. + */ + test_err_pasid_replace(ENOMEM, pasid, nested_hwpt_id[0]); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, s2_hwpt_id)); + + /* cleanup pasid 1024 */ + test_cmd_pasid_detach(pasid); + + /* Attach to iopf-capable hwpt */ + + /* + * Attach an iopf hwpt to pasid 2048, should succeed, domain should + * be valid. + */ + pasid = 2048; + test_cmd_pasid_attach(pasid, iopf_hwpt_id); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, iopf_hwpt_id)); + + test_cmd_trigger_iopf_pasid(self->device_id, pasid, fault_fd); + + /* + * Replace with s2_hwpt_id for pasid 2048, should + * succeed, and have valid domain. + */ + test_cmd_pasid_replace(pasid, s2_hwpt_id); + ASSERT_EQ(0, + test_cmd_pasid_check_hwpt(self->fd, self->stdev_id, + pasid, s2_hwpt_id)); + + /* cleanup pasid 2048 */ + test_cmd_pasid_detach(pasid); + + test_ioctl_destroy(iopf_hwpt_id); + close(fault_fd); + test_ioctl_destroy(fault_id); + + /* Detach the s2_hwpt_id from RID */ + test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index 64b1f8e1b0cf..e11ec4b121fc 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -209,12 +209,16 @@ FIXTURE(basic_fail_nth) { int fd; uint32_t access_id; + uint32_t stdev_id; + uint32_t pasid; }; FIXTURE_SETUP(basic_fail_nth) { self->fd = -1; self->access_id = 0; + self->stdev_id = 0; + self->pasid = 0; //test should use a non-zero value } FIXTURE_TEARDOWN(basic_fail_nth) @@ -226,6 +230,8 @@ FIXTURE_TEARDOWN(basic_fail_nth) rc = _test_cmd_destroy_access(self->access_id); assert(rc == 0); } + if (self->pasid && self->stdev_id) + _test_cmd_pasid_detach(self->fd, self->stdev_id, self->pasid); teardown_iommufd(self->fd, _metadata); } @@ -620,10 +626,11 @@ TEST_FAIL_NTH(basic_fail_nth, device) }; struct iommu_test_hw_info info; uint32_t fault_id, fault_fd; + uint32_t veventq_id, veventq_fd; uint32_t fault_hwpt_id; + uint32_t test_hwpt_id; uint32_t ioas_id; uint32_t ioas_id2; - uint32_t stdev_id; uint32_t idev_id; uint32_t hwpt_id; uint32_t viommu_id; @@ -654,25 +661,30 @@ TEST_FAIL_NTH(basic_fail_nth, device) fail_nth_enable(); - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, NULL, - &idev_id)) + if (_test_cmd_mock_domain_flags(self->fd, ioas_id, + MOCK_FLAGS_DEVICE_PASID, + &self->stdev_id, NULL, &idev_id)) return -1; - if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info), NULL)) + if (_test_cmd_get_hw_info(self->fd, idev_id, &info, + sizeof(info), NULL, NULL)) return -1; - if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, 0, &hwpt_id, + if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, + IOMMU_HWPT_ALLOC_PASID, &hwpt_id, IOMMU_HWPT_DATA_NONE, 0, 0)) return -1; - if (_test_cmd_mock_domain_replace(self->fd, stdev_id, ioas_id2, NULL)) + if (_test_cmd_mock_domain_replace(self->fd, self->stdev_id, ioas_id2, NULL)) return -1; - if (_test_cmd_mock_domain_replace(self->fd, stdev_id, hwpt_id, NULL)) + if (_test_cmd_mock_domain_replace(self->fd, self->stdev_id, hwpt_id, NULL)) return -1; if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, - IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id, + IOMMU_HWPT_ALLOC_NEST_PARENT | + IOMMU_HWPT_ALLOC_PASID, + &hwpt_id, IOMMU_HWPT_DATA_NONE, 0, 0)) return -1; @@ -692,6 +704,37 @@ TEST_FAIL_NTH(basic_fail_nth, device) IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data))) return -1; + if (_test_cmd_veventq_alloc(self->fd, viommu_id, + IOMMU_VEVENTQ_TYPE_SELFTEST, &veventq_id, + &veventq_fd)) + return -1; + close(veventq_fd); + + if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, + IOMMU_HWPT_ALLOC_PASID, + &test_hwpt_id, + IOMMU_HWPT_DATA_NONE, 0, 0)) + return -1; + + /* Tests for pasid attach/replace/detach */ + + self->pasid = 200; + + if (_test_cmd_pasid_attach(self->fd, self->stdev_id, + self->pasid, hwpt_id)) { + self->pasid = 0; + return -1; + } + + if (_test_cmd_pasid_replace(self->fd, self->stdev_id, + self->pasid, test_hwpt_id)) + return -1; + + if (_test_cmd_pasid_detach(self->fd, self->stdev_id, self->pasid)) + return -1; + + self->pasid = 0; + return 0; } diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index d979f5b0efe8..72f6636e5d90 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -9,6 +9,7 @@ #include <sys/ioctl.h> #include <stdint.h> #include <assert.h> +#include <poll.h> #include "../kselftest_harness.h" #include "../../../../drivers/iommu/iommufd/iommufd_test.h" @@ -757,7 +758,8 @@ static void teardown_iommufd(int fd, struct __test_metadata *_metadata) /* @data can be NULL */ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data, - size_t data_len, uint32_t *capabilities) + size_t data_len, uint32_t *capabilities, + uint8_t *max_pasid) { struct iommu_test_hw_info *info = (struct iommu_test_hw_info *)data; struct iommu_hw_info cmd = { @@ -802,6 +804,9 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data, assert(!info->flags); } + if (max_pasid) + *max_pasid = cmd.out_max_pasid_log2; + if (capabilities) *capabilities = cmd.out_capabilities; @@ -810,14 +815,19 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data, #define test_cmd_get_hw_info(device_id, data, data_len) \ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data, \ - data_len, NULL)) + data_len, NULL, NULL)) #define test_err_get_hw_info(_errno, device_id, data, data_len) \ EXPECT_ERRNO(_errno, _test_cmd_get_hw_info(self->fd, device_id, data, \ - data_len, NULL)) + data_len, NULL, NULL)) #define test_cmd_get_hw_capabilities(device_id, caps, mask) \ - ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, 0, &caps)) + ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, \ + 0, &caps, NULL)) + +#define test_cmd_get_hw_info_pasid(device_id, max_pasid) \ + ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, \ + 0, NULL, max_pasid)) static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd) { @@ -842,14 +852,15 @@ static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd) ASSERT_NE(0, *(fault_fd)); \ }) -static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 fault_fd) +static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 pasid, + __u32 fault_fd) { struct iommu_test_cmd trigger_iopf_cmd = { .size = sizeof(trigger_iopf_cmd), .op = IOMMU_TEST_OP_TRIGGER_IOPF, .trigger_iopf = { .dev_id = device_id, - .pasid = 0x1, + .pasid = pasid, .grpid = 0x2, .perm = IOMMU_PGFAULT_PERM_READ | IOMMU_PGFAULT_PERM_WRITE, .addr = 0xdeadbeaf, @@ -880,7 +891,10 @@ static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 fault_fd) } #define test_cmd_trigger_iopf(device_id, fault_fd) \ - ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, fault_fd)) + ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, 0x1, fault_fd)) +#define test_cmd_trigger_iopf_pasid(device_id, pasid, fault_fd) \ + ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, \ + pasid, fault_fd)) static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id, __u32 type, __u32 flags, __u32 *viommu_id) @@ -936,3 +950,204 @@ static int _test_cmd_vdevice_alloc(int fd, __u32 viommu_id, __u32 idev_id, EXPECT_ERRNO(_errno, \ _test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, \ virt_id, vdev_id)) + +static int _test_cmd_veventq_alloc(int fd, __u32 viommu_id, __u32 type, + __u32 *veventq_id, __u32 *veventq_fd) +{ + struct iommu_veventq_alloc cmd = { + .size = sizeof(cmd), + .type = type, + .veventq_depth = 2, + .viommu_id = viommu_id, + }; + int ret; + + ret = ioctl(fd, IOMMU_VEVENTQ_ALLOC, &cmd); + if (ret) + return ret; + if (veventq_id) + *veventq_id = cmd.out_veventq_id; + if (veventq_fd) + *veventq_fd = cmd.out_veventq_fd; + return 0; +} + +#define test_cmd_veventq_alloc(viommu_id, type, veventq_id, veventq_fd) \ + ASSERT_EQ(0, _test_cmd_veventq_alloc(self->fd, viommu_id, type, \ + veventq_id, veventq_fd)) +#define test_err_veventq_alloc(_errno, viommu_id, type, veventq_id, \ + veventq_fd) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_veventq_alloc(self->fd, viommu_id, type, \ + veventq_id, veventq_fd)) + +static int _test_cmd_trigger_vevents(int fd, __u32 dev_id, __u32 nvevents) +{ + struct iommu_test_cmd trigger_vevent_cmd = { + .size = sizeof(trigger_vevent_cmd), + .op = IOMMU_TEST_OP_TRIGGER_VEVENT, + .trigger_vevent = { + .dev_id = dev_id, + }, + }; + int ret; + + while (nvevents--) { + ret = ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT), + &trigger_vevent_cmd); + if (ret < 0) + return -1; + } + return ret; +} + +#define test_cmd_trigger_vevents(dev_id, nvevents) \ + ASSERT_EQ(0, _test_cmd_trigger_vevents(self->fd, dev_id, nvevents)) + +static int _test_cmd_read_vevents(int fd, __u32 event_fd, __u32 nvevents, + __u32 virt_id, int *prev_seq) +{ + struct pollfd pollfd = { .fd = event_fd, .events = POLLIN }; + struct iommu_viommu_event_selftest *event; + struct iommufd_vevent_header *hdr; + ssize_t bytes; + void *data; + int ret, i; + + ret = poll(&pollfd, 1, 1000); + if (ret < 0) + return -1; + + data = calloc(nvevents, sizeof(*hdr) + sizeof(*event)); + if (!data) { + errno = ENOMEM; + return -1; + } + + bytes = read(event_fd, data, + nvevents * (sizeof(*hdr) + sizeof(*event))); + if (bytes <= 0) { + errno = EFAULT; + ret = -1; + goto out_free; + } + + for (i = 0; i < nvevents; i++) { + hdr = data + i * (sizeof(*hdr) + sizeof(*event)); + + if (hdr->flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS || + hdr->sequence - *prev_seq > 1) { + *prev_seq = hdr->sequence; + errno = EOVERFLOW; + ret = -1; + goto out_free; + } + *prev_seq = hdr->sequence; + event = data + sizeof(*hdr); + if (event->virt_id != virt_id) { + errno = EINVAL; + ret = -1; + goto out_free; + } + } + + ret = 0; +out_free: + free(data); + return ret; +} + +#define test_cmd_read_vevents(event_fd, nvevents, virt_id, prev_seq) \ + ASSERT_EQ(0, _test_cmd_read_vevents(self->fd, event_fd, nvevents, \ + virt_id, prev_seq)) +#define test_err_read_vevents(_errno, event_fd, nvevents, virt_id, prev_seq) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_read_vevents(self->fd, event_fd, nvevents, \ + virt_id, prev_seq)) + +static int _test_cmd_pasid_attach(int fd, __u32 stdev_id, __u32 pasid, + __u32 pt_id) +{ + struct iommu_test_cmd test_attach = { + .size = sizeof(test_attach), + .op = IOMMU_TEST_OP_PASID_ATTACH, + .id = stdev_id, + .pasid_attach = { + .pasid = pasid, + .pt_id = pt_id, + }, + }; + + return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_ATTACH), + &test_attach); +} + +#define test_cmd_pasid_attach(pasid, hwpt_id) \ + ASSERT_EQ(0, _test_cmd_pasid_attach(self->fd, self->stdev_id, \ + pasid, hwpt_id)) + +#define test_err_pasid_attach(_errno, pasid, hwpt_id) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_pasid_attach(self->fd, self->stdev_id, \ + pasid, hwpt_id)) + +static int _test_cmd_pasid_replace(int fd, __u32 stdev_id, __u32 pasid, + __u32 pt_id) +{ + struct iommu_test_cmd test_replace = { + .size = sizeof(test_replace), + .op = IOMMU_TEST_OP_PASID_REPLACE, + .id = stdev_id, + .pasid_replace = { + .pasid = pasid, + .pt_id = pt_id, + }, + }; + + return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_REPLACE), + &test_replace); +} + +#define test_cmd_pasid_replace(pasid, hwpt_id) \ + ASSERT_EQ(0, _test_cmd_pasid_replace(self->fd, self->stdev_id, \ + pasid, hwpt_id)) + +#define test_err_pasid_replace(_errno, pasid, hwpt_id) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_pasid_replace(self->fd, self->stdev_id, \ + pasid, hwpt_id)) + +static int _test_cmd_pasid_detach(int fd, __u32 stdev_id, __u32 pasid) +{ + struct iommu_test_cmd test_detach = { + .size = sizeof(test_detach), + .op = IOMMU_TEST_OP_PASID_DETACH, + .id = stdev_id, + .pasid_detach = { + .pasid = pasid, + }, + }; + + return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_DETACH), + &test_detach); +} + +#define test_cmd_pasid_detach(pasid) \ + ASSERT_EQ(0, _test_cmd_pasid_detach(self->fd, self->stdev_id, pasid)) + +static int test_cmd_pasid_check_hwpt(int fd, __u32 stdev_id, __u32 pasid, + __u32 hwpt_id) +{ + struct iommu_test_cmd test_pasid_check = { + .size = sizeof(test_pasid_check), + .op = IOMMU_TEST_OP_PASID_CHECK_HWPT, + .id = stdev_id, + .pasid_check = { + .pasid = pasid, + .hwpt_id = hwpt_id, + }, + }; + + return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_CHECK_HWPT), + &test_pasid_check); +} diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index f773f8f99249..f62b0a5aba35 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -50,8 +50,18 @@ LIBKVM_riscv += lib/riscv/ucall.c # Non-compiled test targets TEST_PROGS_x86 += x86/nx_huge_pages_test.sh +# Compiled test targets valid on all architectures with libkvm support +TEST_GEN_PROGS_COMMON = demand_paging_test +TEST_GEN_PROGS_COMMON += dirty_log_test +TEST_GEN_PROGS_COMMON += guest_print_test +TEST_GEN_PROGS_COMMON += kvm_binary_stats_test +TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus +TEST_GEN_PROGS_COMMON += kvm_page_table_test +TEST_GEN_PROGS_COMMON += set_memory_region_test + # Compiled test targets -TEST_GEN_PROGS_x86 = x86/cpuid_test +TEST_GEN_PROGS_x86 = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_x86 += x86/cpuid_test TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test TEST_GEN_PROGS_x86 += x86/feature_msrs_test @@ -119,27 +129,21 @@ TEST_GEN_PROGS_x86 += x86/triple_fault_event_test TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test TEST_GEN_PROGS_x86 += access_tracking_perf_test TEST_GEN_PROGS_x86 += coalesced_io_test -TEST_GEN_PROGS_x86 += demand_paging_test -TEST_GEN_PROGS_x86 += dirty_log_test TEST_GEN_PROGS_x86 += dirty_log_perf_test TEST_GEN_PROGS_x86 += guest_memfd_test -TEST_GEN_PROGS_x86 += guest_print_test TEST_GEN_PROGS_x86 += hardware_disable_test -TEST_GEN_PROGS_x86 += kvm_create_max_vcpus -TEST_GEN_PROGS_x86 += kvm_page_table_test TEST_GEN_PROGS_x86 += memslot_modification_stress_test TEST_GEN_PROGS_x86 += memslot_perf_test TEST_GEN_PROGS_x86 += mmu_stress_test TEST_GEN_PROGS_x86 += rseq_test -TEST_GEN_PROGS_x86 += set_memory_region_test TEST_GEN_PROGS_x86 += steal_time -TEST_GEN_PROGS_x86 += kvm_binary_stats_test TEST_GEN_PROGS_x86 += system_counter_offset_test TEST_GEN_PROGS_x86 += pre_fault_memory_test # Compiled outputs used by test targets TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test +TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases TEST_GEN_PROGS_arm64 += arm64/debug-exceptions @@ -158,22 +162,16 @@ TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3 TEST_GEN_PROGS_arm64 += access_tracking_perf_test TEST_GEN_PROGS_arm64 += arch_timer TEST_GEN_PROGS_arm64 += coalesced_io_test -TEST_GEN_PROGS_arm64 += demand_paging_test -TEST_GEN_PROGS_arm64 += dirty_log_test TEST_GEN_PROGS_arm64 += dirty_log_perf_test -TEST_GEN_PROGS_arm64 += guest_print_test TEST_GEN_PROGS_arm64 += get-reg-list -TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus -TEST_GEN_PROGS_arm64 += kvm_page_table_test TEST_GEN_PROGS_arm64 += memslot_modification_stress_test TEST_GEN_PROGS_arm64 += memslot_perf_test TEST_GEN_PROGS_arm64 += mmu_stress_test TEST_GEN_PROGS_arm64 += rseq_test -TEST_GEN_PROGS_arm64 += set_memory_region_test TEST_GEN_PROGS_arm64 += steal_time -TEST_GEN_PROGS_arm64 += kvm_binary_stats_test -TEST_GEN_PROGS_s390 = s390/memop +TEST_GEN_PROGS_s390 = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_s390 += s390/memop TEST_GEN_PROGS_s390 += s390/resets TEST_GEN_PROGS_s390 += s390/sync_regs_test TEST_GEN_PROGS_s390 += s390/tprot @@ -182,27 +180,14 @@ TEST_GEN_PROGS_s390 += s390/debug_test TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test TEST_GEN_PROGS_s390 += s390/shared_zeropage_test TEST_GEN_PROGS_s390 += s390/ucontrol_test -TEST_GEN_PROGS_s390 += demand_paging_test -TEST_GEN_PROGS_s390 += dirty_log_test -TEST_GEN_PROGS_s390 += guest_print_test -TEST_GEN_PROGS_s390 += kvm_create_max_vcpus -TEST_GEN_PROGS_s390 += kvm_page_table_test TEST_GEN_PROGS_s390 += rseq_test -TEST_GEN_PROGS_s390 += set_memory_region_test -TEST_GEN_PROGS_s390 += kvm_binary_stats_test +TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test TEST_GEN_PROGS_riscv += riscv/ebreak_test TEST_GEN_PROGS_riscv += arch_timer TEST_GEN_PROGS_riscv += coalesced_io_test -TEST_GEN_PROGS_riscv += demand_paging_test -TEST_GEN_PROGS_riscv += dirty_log_test TEST_GEN_PROGS_riscv += get-reg-list -TEST_GEN_PROGS_riscv += guest_print_test -TEST_GEN_PROGS_riscv += kvm_binary_stats_test -TEST_GEN_PROGS_riscv += kvm_create_max_vcpus -TEST_GEN_PROGS_riscv += kvm_page_table_test -TEST_GEN_PROGS_riscv += set_memory_region_test TEST_GEN_PROGS_riscv += steal_time SPLIT_TESTS += arch_timer diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c index ec33a8f9c908..dc6559dad9d8 100644 --- a/tools/testing/selftests/kvm/arm64/page_fault_test.c +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c @@ -199,7 +199,7 @@ static bool guest_set_ha(void) if (hadbs == 0) return false; - tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; + tcr = read_sysreg(tcr_el1) | TCR_HA; write_sysreg(tcr, tcr_el1); isb(); diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 1e8d0d531fbd..b0fc0f945766 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -62,6 +62,67 @@ MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) +/* TCR_EL1 specific flags */ +#define TCR_T0SZ_OFFSET 0 +#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET) + +#define TCR_IRGN0_SHIFT 8 +#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT) + +#define TCR_ORGN0_SHIFT 10 +#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT) + +#define TCR_SH0_SHIFT 12 +#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT) +#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT) + +#define TCR_TG0_SHIFT 14 +#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT) +#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT) +#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT) +#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT) + +#define TCR_IPS_SHIFT 32 +#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT) +#define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT) +#define TCR_IPS_48_BITS (UL(5) << TCR_IPS_SHIFT) +#define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT) +#define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT) + +#define TCR_HA (UL(1) << 39) +#define TCR_DS (UL(1) << 59) + +/* + * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). + */ +#define PTE_ATTRINDX(t) ((t) << 2) +#define PTE_ATTRINDX_MASK GENMASK(4, 2) +#define PTE_ATTRINDX_SHIFT 2 + +#define PTE_VALID BIT(0) +#define PGD_TYPE_TABLE BIT(1) +#define PUD_TYPE_TABLE BIT(1) +#define PMD_TYPE_TABLE BIT(1) +#define PTE_TYPE_PAGE BIT(1) + +#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */ +#define PTE_AF BIT(10) + +#define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift)) +#define PTE_ADDR_51_48 GENMASK(15, 12) +#define PTE_ADDR_51_48_SHIFT 12 +#define PTE_ADDR_MASK_LPA2(page_shift) GENMASK(49, (page_shift)) +#define PTE_ADDR_51_50_LPA2 GENMASK(9, 8) +#define PTE_ADDR_51_50_LPA2_SHIFT 8 + void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init); struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, struct kvm_vcpu_init *init, void *guest_code); @@ -102,12 +163,6 @@ enum { (v) == VECTOR_SYNC_LOWER_64 || \ (v) == VECTOR_SYNC_LOWER_32) -/* Access flag */ -#define PTE_AF (1ULL << 10) - -/* Access flag update enable/disable */ -#define TCR_EL1_HA (1ULL << 39) - void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, uint32_t *ipa16k, uint32_t *ipa64k); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 7ba3aa3755f3..9d69904cb608 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -72,13 +72,13 @@ static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs) uint64_t pte; if (use_lpa2_pte_format(vm)) { - pte = pa & GENMASK(49, vm->page_shift); - pte |= FIELD_GET(GENMASK(51, 50), pa) << 8; - attrs &= ~GENMASK(9, 8); + pte = pa & PTE_ADDR_MASK_LPA2(vm->page_shift); + pte |= FIELD_GET(GENMASK(51, 50), pa) << PTE_ADDR_51_50_LPA2_SHIFT; + attrs &= ~PTE_ADDR_51_50_LPA2; } else { - pte = pa & GENMASK(47, vm->page_shift); + pte = pa & PTE_ADDR_MASK(vm->page_shift); if (vm->page_shift == 16) - pte |= FIELD_GET(GENMASK(51, 48), pa) << 12; + pte |= FIELD_GET(GENMASK(51, 48), pa) << PTE_ADDR_51_48_SHIFT; } pte |= attrs; @@ -90,12 +90,12 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte) uint64_t pa; if (use_lpa2_pte_format(vm)) { - pa = pte & GENMASK(49, vm->page_shift); - pa |= FIELD_GET(GENMASK(9, 8), pte) << 50; + pa = pte & PTE_ADDR_MASK_LPA2(vm->page_shift); + pa |= FIELD_GET(PTE_ADDR_51_50_LPA2, pte) << 50; } else { - pa = pte & GENMASK(47, vm->page_shift); + pa = pte & PTE_ADDR_MASK(vm->page_shift); if (vm->page_shift == 16) - pa |= FIELD_GET(GENMASK(15, 12), pte) << 48; + pa |= FIELD_GET(PTE_ADDR_51_48, pte) << 48; } return pa; @@ -128,7 +128,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint64_t flags) { - uint8_t attr_idx = flags & 7; + uint8_t attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT); + uint64_t pg_attr; uint64_t *ptep; TEST_ASSERT((vaddr % vm->page_size) == 0, @@ -147,18 +148,21 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PGD_TYPE_TABLE | PTE_VALID); switch (vm->pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PUD_TYPE_TABLE | PTE_VALID); /* fall through */ case 3: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PMD_TYPE_TABLE | PTE_VALID); /* fall through */ case 2: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8; @@ -167,7 +171,11 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, TEST_FAIL("Page table levels must be 2, 3, or 4"); } - *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */ + pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID; + if (!use_lpa2_pte_format(vm)) + pg_attr |= PTE_SHARED; + + *ptep = addr_pte(vm, paddr, pg_attr); } void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) @@ -293,20 +301,20 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) case VM_MODE_P48V48_64K: case VM_MODE_P40V48_64K: case VM_MODE_P36V48_64K: - tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ + tcr_el1 |= TCR_TG0_64K; break; case VM_MODE_P52V48_16K: case VM_MODE_P48V48_16K: case VM_MODE_P40V48_16K: case VM_MODE_P36V48_16K: case VM_MODE_P36V47_16K: - tcr_el1 |= 2ul << 14; /* TG0 = 16KB */ + tcr_el1 |= TCR_TG0_16K; break; case VM_MODE_P52V48_4K: case VM_MODE_P48V48_4K: case VM_MODE_P40V48_4K: case VM_MODE_P36V48_4K: - tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ + tcr_el1 |= TCR_TG0_4K; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); @@ -319,35 +327,35 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) case VM_MODE_P52V48_4K: case VM_MODE_P52V48_16K: case VM_MODE_P52V48_64K: - tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ + tcr_el1 |= TCR_IPS_52_BITS; ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2; break; case VM_MODE_P48V48_4K: case VM_MODE_P48V48_16K: case VM_MODE_P48V48_64K: - tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ + tcr_el1 |= TCR_IPS_48_BITS; break; case VM_MODE_P40V48_4K: case VM_MODE_P40V48_16K: case VM_MODE_P40V48_64K: - tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ + tcr_el1 |= TCR_IPS_40_BITS; break; case VM_MODE_P36V48_4K: case VM_MODE_P36V48_16K: case VM_MODE_P36V48_64K: case VM_MODE_P36V47_16K: - tcr_el1 |= 1ul << 32; /* IPS = 36 bits */ + tcr_el1 |= TCR_IPS_36_BITS; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } - sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */; - /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */; - tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12); - tcr_el1 |= (64 - vm->va_bits) /* T0SZ */; + sctlr_el1 |= SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_I; + + tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER; + tcr_el1 |= TCR_T0SZ(vm->va_bits); if (use_lpa2_pte_format(vm)) - tcr_el1 |= (1ul << 59) /* DS */; + tcr_el1 |= TCR_DS; vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 279ad8946040..815bc45dd8dc 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -2019,9 +2019,8 @@ static struct exit_reason { KVM_EXIT_STRING(RISCV_SBI), KVM_EXIT_STRING(RISCV_CSR), KVM_EXIT_STRING(NOTIFY), -#ifdef KVM_EXIT_MEMORY_NOT_PRESENT - KVM_EXIT_STRING(MEMORY_NOT_PRESENT), -#endif + KVM_EXIT_STRING(LOONGARCH_IOCSR), + KVM_EXIT_STRING(MEMORY_FAULT), }; /* diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 8515921dfdbf..569f2d67c9b8 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -53,8 +53,10 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVVPTC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAAMO: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZABHA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZALRSC: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB: @@ -434,8 +436,10 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), KVM_ISA_EXT_ARR(SVVPTC), + KVM_ISA_EXT_ARR(ZAAMO), KVM_ISA_EXT_ARR(ZABHA), KVM_ISA_EXT_ARR(ZACAS), + KVM_ISA_EXT_ARR(ZALRSC), KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), KVM_ISA_EXT_ARR(ZBB), @@ -974,8 +978,10 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL); KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT); KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT); KVM_ISA_EXT_SIMPLE_CONFIG(svvptc, SVVPTC); +KVM_ISA_EXT_SIMPLE_CONFIG(zaamo, ZAAMO); KVM_ISA_EXT_SIMPLE_CONFIG(zabha, ZABHA); KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS); +KVM_ISA_EXT_SIMPLE_CONFIG(zalrsc, ZALRSC); KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS); KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA); KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB); @@ -1045,8 +1051,10 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_svnapot, &config_svpbmt, &config_svvptc, + &config_zaamo, &config_zabha, &config_zacas, + &config_zalrsc, &config_zawrs, &config_zba, &config_zbb, diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index e5898678bfab..1375fca80bcd 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -196,25 +196,27 @@ static void calc_min_max_cpu(void) static void help(const char *name) { puts(""); - printf("usage: %s [-h] [-u]\n", name); + printf("usage: %s [-h] [-u] [-l latency]\n", name); printf(" -u: Don't sanity check the number of successful KVM_RUNs\n"); + printf(" -l: Set /dev/cpu_dma_latency to suppress deep sleep states\n"); puts(""); exit(0); } int main(int argc, char *argv[]) { + int r, i, snapshot, opt, fd = -1, latency = -1; bool skip_sanity_check = false; - int r, i, snapshot; struct kvm_vm *vm; struct kvm_vcpu *vcpu; u32 cpu, rseq_cpu; - int opt; - while ((opt = getopt(argc, argv, "hu")) != -1) { + while ((opt = getopt(argc, argv, "hl:u")) != -1) { switch (opt) { case 'u': skip_sanity_check = true; + case 'l': + latency = atoi_paranoid(optarg); break; case 'h': default: @@ -243,6 +245,20 @@ int main(int argc, char *argv[]) pthread_create(&migration_thread, NULL, migration_worker, (void *)(unsigned long)syscall(SYS_gettid)); + if (latency >= 0) { + /* + * Writes to cpu_dma_latency persist only while the file is + * open, i.e. it allows userspace to provide guaranteed latency + * while running a workload. Keep the file open until the test + * completes, otherwise writing cpu_dma_latency is meaningless. + */ + fd = open("/dev/cpu_dma_latency", O_RDWR); + TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("open() /dev/cpu_dma_latency", fd)); + + r = write(fd, &latency, 4); + TEST_ASSERT(r >= 1, "Error setting /dev/cpu_dma_latency"); + } + for (i = 0; !done; i++) { vcpu_run(vcpu); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, @@ -278,6 +294,9 @@ int main(int argc, char *argv[]) "rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu); } + if (fd > 0) + close(fd); + /* * Sanity check that the test was able to enter the guest a reasonable * number of times, e.g. didn't get stalled too often/long waiting for @@ -293,8 +312,8 @@ int main(int argc, char *argv[]) TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2), "Only performed %d KVM_RUNs, task stalled too much?\n\n" " Try disabling deep sleep states to reduce CPU wakeup latency,\n" - " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n" - " or run with -u to disable this sanity check.", i); + " e.g. via cpuidle.off=1 or via -l <latency>, or run with -u to\n" + " disable this sanity check.", i); pthread_join(migration_thread, NULL); diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index 2b550eff35f1..390ae2d87493 100644 --- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -7,6 +7,7 @@ #include "kvm_util.h" #include "processor.h" +#include "kselftest.h" #define CPUID_MWAIT (1u << 3) @@ -14,6 +15,8 @@ enum monitor_mwait_testcases { MWAIT_QUIRK_DISABLED = BIT(0), MISC_ENABLES_QUIRK_DISABLED = BIT(1), MWAIT_DISABLED = BIT(2), + CPUID_DISABLED = BIT(3), + TEST_MAX = CPUID_DISABLED * 2 - 1, }; /* @@ -35,11 +38,19 @@ do { \ testcase, vector); \ } while (0) -static void guest_monitor_wait(int testcase) +static void guest_monitor_wait(void *arg) { + int testcase = (int) (long) arg; u8 vector; - GUEST_SYNC(testcase); + u64 val = rdmsr(MSR_IA32_MISC_ENABLE) & ~MSR_IA32_MISC_ENABLE_MWAIT; + if (!(testcase & MWAIT_DISABLED)) + val |= MSR_IA32_MISC_ENABLE_MWAIT; + wrmsr(MSR_IA32_MISC_ENABLE, val); + + __GUEST_ASSERT(this_cpu_has(X86_FEATURE_MWAIT) == !(testcase & MWAIT_DISABLED), + "Expected CPUID.MWAIT %s\n", + (testcase & MWAIT_DISABLED) ? "cleared" : "set"); /* * Arbitrarily MONITOR this function, SVM performs fault checks before @@ -50,19 +61,6 @@ static void guest_monitor_wait(int testcase) vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0)); GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector); -} - -static void guest_code(void) -{ - guest_monitor_wait(MWAIT_DISABLED); - - guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); - - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED); - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED); - - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED); GUEST_DONE(); } @@ -74,56 +72,64 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; struct ucall uc; int testcase; + char test[80]; - TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + ksft_print_header(); + ksft_set_plan(12); + for (testcase = 0; testcase <= TEST_MAX; testcase++) { + vm = vm_create_with_one_vcpu(&vcpu, guest_monitor_wait); + vcpu_args_set(vcpu, 1, (void *)(long)testcase); + + disabled_quirks = 0; + if (testcase & MWAIT_QUIRK_DISABLED) { + disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; + strcpy(test, "MWAIT can fault"); + } else { + strcpy(test, "MWAIT never faults"); + } + if (testcase & MISC_ENABLES_QUIRK_DISABLED) { + disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; + strcat(test, ", MISC_ENABLE updates CPUID"); + } else { + strcat(test, ", no CPUID updates"); + } + + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); + + if (!(testcase & MISC_ENABLES_QUIRK_DISABLED) && + (!!(testcase & CPUID_DISABLED) ^ !!(testcase & MWAIT_DISABLED))) + continue; + + if (testcase & CPUID_DISABLED) { + strcat(test, ", CPUID clear"); + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + } else { + strcat(test, ", CPUID set"); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + } + + if (testcase & MWAIT_DISABLED) + strcat(test, ", MWAIT disabled"); - while (1) { vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - testcase = uc.args[1]; - break; case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - goto done; + /* Detected in vcpu_run */ + break; case UCALL_DONE: - goto done; + ksft_test_result_pass("%s\n", test); + break; default: TEST_FAIL("Unknown ucall %lu", uc.cmd); - goto done; - } - - disabled_quirks = 0; - if (testcase & MWAIT_QUIRK_DISABLED) - disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; - if (testcase & MISC_ENABLES_QUIRK_DISABLED) - disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; - vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); - - /* - * If the MISC_ENABLES quirk (KVM neglects to update CPUID to - * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT - * bit in MISC_ENABLES accordingly. If the quirk is enabled, - * the only valid configuration is MWAIT disabled, as CPUID - * can't be manually changed after running the vCPU. - */ - if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) { - TEST_ASSERT(testcase & MWAIT_DISABLED, - "Can't toggle CPUID features after running vCPU"); - continue; + break; } - - vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE, - (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT); + kvm_vm_free(vm); } + ksft_finished(); -done: - kvm_vm_free(vm); return 0; } diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index 121000c28c10..c5241b193db8 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -57,4 +57,4 @@ droppable hugetlb_dio pkey_sighandler_tests_32 pkey_sighandler_tests_64 -guard-pages +guard-regions diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 63ce39d024bb..8270895039d1 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -97,7 +97,7 @@ TEST_GEN_FILES += hugetlb_fault_after_madv TEST_GEN_FILES += hugetlb_madv_vs_map TEST_GEN_FILES += hugetlb_dio TEST_GEN_FILES += droppable -TEST_GEN_FILES += guard-pages +TEST_GEN_FILES += guard-regions ifneq ($(ARCH),arm64) TEST_GEN_FILES += soft-dirty diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 9446673645eb..f0cb14ea8608 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -876,7 +876,7 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) mremap_size = thpsize / 2; mremap_mem = mmap(NULL, mremap_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (mem == MAP_FAILED) { + if (mremap_mem == MAP_FAILED) { ksft_test_result_fail("mmap() failed\n"); goto munmap; } diff --git a/tools/testing/selftests/mm/guard-pages.c b/tools/testing/selftests/mm/guard-regions.c index 525c50d3ec23..b3d0e2771096 100644 --- a/tools/testing/selftests/mm/guard-pages.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -6,6 +6,7 @@ #include <assert.h> #include <errno.h> #include <fcntl.h> +#include <linux/limits.h> #include <linux/userfaultfd.h> #include <setjmp.h> #include <signal.h> @@ -18,6 +19,7 @@ #include <sys/syscall.h> #include <sys/uio.h> #include <unistd.h> +#include "vm_util.h" #include "../pidfd/pidfd.h" @@ -39,6 +41,79 @@ static sigjmp_buf signal_jmp_buf; */ #define FORCE_READ(x) (*(volatile typeof(x) *)x) +/* + * How is the test backing the mapping being tested? + */ +enum backing_type { + ANON_BACKED, + SHMEM_BACKED, + LOCAL_FILE_BACKED, +}; + +FIXTURE(guard_regions) +{ + unsigned long page_size; + char path[PATH_MAX]; + int fd; +}; + +FIXTURE_VARIANT(guard_regions) +{ + enum backing_type backing; +}; + +FIXTURE_VARIANT_ADD(guard_regions, anon) +{ + .backing = ANON_BACKED, +}; + +FIXTURE_VARIANT_ADD(guard_regions, shmem) +{ + .backing = SHMEM_BACKED, +}; + +FIXTURE_VARIANT_ADD(guard_regions, file) +{ + .backing = LOCAL_FILE_BACKED, +}; + +static bool is_anon_backed(const FIXTURE_VARIANT(guard_regions) * variant) +{ + switch (variant->backing) { + case ANON_BACKED: + case SHMEM_BACKED: + return true; + default: + return false; + } +} + +static void *mmap_(FIXTURE_DATA(guard_regions) * self, + const FIXTURE_VARIANT(guard_regions) * variant, + void *addr, size_t length, int prot, int extra_flags, + off_t offset) +{ + int fd; + int flags = extra_flags; + + switch (variant->backing) { + case ANON_BACKED: + flags |= MAP_PRIVATE | MAP_ANON; + fd = -1; + break; + case SHMEM_BACKED: + case LOCAL_FILE_BACKED: + flags |= MAP_SHARED; + fd = self->fd; + break; + default: + ksft_exit_fail(); + break; + } + + return mmap(addr, length, prot, flags, fd, offset); +} + static int userfaultfd(int flags) { return syscall(SYS_userfaultfd, flags); @@ -104,12 +179,7 @@ static bool try_read_write_buf(char *ptr) return try_read_buf(ptr) && try_write_buf(ptr); } -FIXTURE(guard_pages) -{ - unsigned long page_size; -}; - -FIXTURE_SETUP(guard_pages) +static void setup_sighandler(void) { struct sigaction act = { .sa_handler = &handle_fatal, @@ -119,11 +189,9 @@ FIXTURE_SETUP(guard_pages) sigemptyset(&act.sa_mask); if (sigaction(SIGSEGV, &act, NULL)) ksft_exit_fail_perror("sigaction"); +} - self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); -}; - -FIXTURE_TEARDOWN(guard_pages) +static void teardown_sighandler(void) { struct sigaction act = { .sa_handler = SIG_DFL, @@ -134,15 +202,109 @@ FIXTURE_TEARDOWN(guard_pages) sigaction(SIGSEGV, &act, NULL); } -TEST_F(guard_pages, basic) +static int open_file(const char *prefix, char *path) +{ + int fd; + + snprintf(path, PATH_MAX, "%sguard_regions_test_file_XXXXXX", prefix); + fd = mkstemp(path); + if (fd < 0) + ksft_exit_fail_perror("mkstemp"); + + return fd; +} + +/* Establish a varying pattern in a buffer. */ +static void set_pattern(char *ptr, size_t num_pages, size_t page_size) +{ + size_t i; + + for (i = 0; i < num_pages; i++) { + char *ptr2 = &ptr[i * page_size]; + + memset(ptr2, 'a' + (i % 26), page_size); + } +} + +/* + * Check that a buffer contains the pattern set by set_pattern(), starting at a + * page offset of pgoff within the buffer. + */ +static bool check_pattern_offset(char *ptr, size_t num_pages, size_t page_size, + size_t pgoff) +{ + size_t i; + + for (i = 0; i < num_pages * page_size; i++) { + size_t offset = pgoff * page_size + i; + char actual = ptr[offset]; + char expected = 'a' + ((offset / page_size) % 26); + + if (actual != expected) + return false; + } + + return true; +} + +/* Check that a buffer contains the pattern set by set_pattern(). */ +static bool check_pattern(char *ptr, size_t num_pages, size_t page_size) +{ + return check_pattern_offset(ptr, num_pages, page_size, 0); +} + +/* Determine if a buffer contains only repetitions of a specified char. */ +static bool is_buf_eq(char *buf, size_t size, char chr) +{ + size_t i; + + for (i = 0; i < size; i++) { + if (buf[i] != chr) + return false; + } + + return true; +} + +FIXTURE_SETUP(guard_regions) +{ + self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); + setup_sighandler(); + + if (variant->backing == ANON_BACKED) + return; + + self->fd = open_file( + variant->backing == SHMEM_BACKED ? "/tmp/" : "", + self->path); + + /* We truncate file to at least 100 pages, tests can modify as needed. */ + ASSERT_EQ(ftruncate(self->fd, 100 * self->page_size), 0); +}; + +FIXTURE_TEARDOWN_PARENT(guard_regions) +{ + teardown_sighandler(); + + if (variant->backing == ANON_BACKED) + return; + + if (self->fd >= 0) + close(self->fd); + + if (self->path[0] != '\0') + unlink(self->path); +} + +TEST_F(guard_regions, basic) { const unsigned long NUM_PAGES = 10; const unsigned long page_size = self->page_size; char *ptr; int i; - ptr = mmap(NULL, NUM_PAGES * page_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, -1, 0); + ptr = mmap_(self, variant, NULL, NUM_PAGES * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Trivially assert we can touch the first page. */ @@ -228,32 +390,30 @@ TEST_F(guard_pages, basic) } /* Assert that operations applied across multiple VMAs work as expected. */ -TEST_F(guard_pages, multi_vma) +TEST_F(guard_regions, multi_vma) { const unsigned long page_size = self->page_size; char *ptr_region, *ptr, *ptr1, *ptr2, *ptr3; int i; /* Reserve a 100 page region over which we can install VMAs. */ - ptr_region = mmap(NULL, 100 * page_size, PROT_NONE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_region = mmap_(self, variant, NULL, 100 * page_size, + PROT_NONE, 0, 0); ASSERT_NE(ptr_region, MAP_FAILED); /* Place a VMA of 10 pages size at the start of the region. */ - ptr1 = mmap(ptr_region, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr1 = mmap_(self, variant, ptr_region, 10 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr1, MAP_FAILED); /* Place a VMA of 5 pages size 50 pages into the region. */ - ptr2 = mmap(&ptr_region[50 * page_size], 5 * page_size, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr2 = mmap_(self, variant, &ptr_region[50 * page_size], 5 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr2, MAP_FAILED); /* Place a VMA of 20 pages size at the end of the region. */ - ptr3 = mmap(&ptr_region[80 * page_size], 20 * page_size, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr3 = mmap_(self, variant, &ptr_region[80 * page_size], 20 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr3, MAP_FAILED); /* Unmap gaps. */ @@ -323,13 +483,11 @@ TEST_F(guard_pages, multi_vma) } /* Now map incompatible VMAs in the gaps. */ - ptr = mmap(&ptr_region[10 * page_size], 40 * page_size, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, &ptr_region[10 * page_size], 40 * page_size, + PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED, 0); ASSERT_NE(ptr, MAP_FAILED); - ptr = mmap(&ptr_region[55 * page_size], 25 * page_size, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, &ptr_region[55 * page_size], 25 * page_size, + PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED, 0); ASSERT_NE(ptr, MAP_FAILED); /* @@ -364,7 +522,7 @@ TEST_F(guard_pages, multi_vma) * Assert that batched operations performed using process_madvise() work as * expected. */ -TEST_F(guard_pages, process_madvise) +TEST_F(guard_regions, process_madvise) { const unsigned long page_size = self->page_size; char *ptr_region, *ptr1, *ptr2, *ptr3; @@ -372,8 +530,8 @@ TEST_F(guard_pages, process_madvise) struct iovec vec[6]; /* Reserve region to map over. */ - ptr_region = mmap(NULL, 100 * page_size, PROT_NONE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_region = mmap_(self, variant, NULL, 100 * page_size, + PROT_NONE, 0, 0); ASSERT_NE(ptr_region, MAP_FAILED); /* @@ -381,9 +539,8 @@ TEST_F(guard_pages, process_madvise) * overwrite existing entries and test this code path against * overwriting existing entries. */ - ptr1 = mmap(&ptr_region[page_size], 10 * page_size, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE | MAP_POPULATE, -1, 0); + ptr1 = mmap_(self, variant, &ptr_region[page_size], 10 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED | MAP_POPULATE, 0); ASSERT_NE(ptr1, MAP_FAILED); /* We want guard markers at start/end of each VMA. */ vec[0].iov_base = ptr1; @@ -392,9 +549,8 @@ TEST_F(guard_pages, process_madvise) vec[1].iov_len = page_size; /* 5 pages offset 50 pages into reserve region. */ - ptr2 = mmap(&ptr_region[50 * page_size], 5 * page_size, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr2 = mmap_(self, variant, &ptr_region[50 * page_size], 5 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr2, MAP_FAILED); vec[2].iov_base = ptr2; vec[2].iov_len = page_size; @@ -402,9 +558,8 @@ TEST_F(guard_pages, process_madvise) vec[3].iov_len = page_size; /* 20 pages offset 79 pages into reserve region. */ - ptr3 = mmap(&ptr_region[79 * page_size], 20 * page_size, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr3 = mmap_(self, variant, &ptr_region[79 * page_size], 20 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr3, MAP_FAILED); vec[4].iov_base = ptr3; vec[4].iov_len = page_size; @@ -459,13 +614,13 @@ TEST_F(guard_pages, process_madvise) } /* Assert that unmapping ranges does not leave guard markers behind. */ -TEST_F(guard_pages, munmap) +TEST_F(guard_regions, munmap) { const unsigned long page_size = self->page_size; char *ptr, *ptr_new1, *ptr_new2; - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Guard first and last pages. */ @@ -481,11 +636,11 @@ TEST_F(guard_pages, munmap) ASSERT_EQ(munmap(&ptr[9 * page_size], page_size), 0); /* Map over them.*/ - ptr_new1 = mmap(ptr, page_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_new1 = mmap_(self, variant, ptr, page_size, PROT_READ | PROT_WRITE, + MAP_FIXED, 0); ASSERT_NE(ptr_new1, MAP_FAILED); - ptr_new2 = mmap(&ptr[9 * page_size], page_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_new2 = mmap_(self, variant, &ptr[9 * page_size], page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr_new2, MAP_FAILED); /* Assert that they are now not guarded. */ @@ -497,14 +652,14 @@ TEST_F(guard_pages, munmap) } /* Assert that mprotect() operations have no bearing on guard markers. */ -TEST_F(guard_pages, mprotect) +TEST_F(guard_regions, mprotect) { const unsigned long page_size = self->page_size; char *ptr; int i; - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Guard the middle of the range. */ @@ -545,14 +700,14 @@ TEST_F(guard_pages, mprotect) } /* Split and merge VMAs and make sure guard pages still behave. */ -TEST_F(guard_pages, split_merge) +TEST_F(guard_regions, split_merge) { const unsigned long page_size = self->page_size; char *ptr, *ptr_new; int i; - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Guard the whole range. */ @@ -593,14 +748,14 @@ TEST_F(guard_pages, split_merge) } /* Now map them again - the unmap will have cleared the guards. */ - ptr_new = mmap(&ptr[2 * page_size], page_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_new = mmap_(self, variant, &ptr[2 * page_size], page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr_new, MAP_FAILED); - ptr_new = mmap(&ptr[5 * page_size], page_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_new = mmap_(self, variant, &ptr[5 * page_size], page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr_new, MAP_FAILED); - ptr_new = mmap(&ptr[8 * page_size], page_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_new = mmap_(self, variant, &ptr[8 * page_size], page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr_new, MAP_FAILED); /* Now make sure guard pages are established. */ @@ -676,14 +831,14 @@ TEST_F(guard_pages, split_merge) } /* Assert that MADV_DONTNEED does not remove guard markers. */ -TEST_F(guard_pages, dontneed) +TEST_F(guard_regions, dontneed) { const unsigned long page_size = self->page_size; char *ptr; int i; - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Back the whole range. */ @@ -713,8 +868,16 @@ TEST_F(guard_pages, dontneed) ASSERT_FALSE(result); } else { ASSERT_TRUE(result); - /* Make sure we really did get reset to zero page. */ - ASSERT_EQ(*curr, '\0'); + switch (variant->backing) { + case ANON_BACKED: + /* If anon, then we get a zero page. */ + ASSERT_EQ(*curr, '\0'); + break; + default: + /* Otherwise, we get the file data. */ + ASSERT_EQ(*curr, 'y'); + break; + } } /* Now write... */ @@ -729,14 +892,14 @@ TEST_F(guard_pages, dontneed) } /* Assert that mlock()'ed pages work correctly with guard markers. */ -TEST_F(guard_pages, mlock) +TEST_F(guard_regions, mlock) { const unsigned long page_size = self->page_size; char *ptr; int i; - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Populate. */ @@ -802,14 +965,14 @@ TEST_F(guard_pages, mlock) * * - Moving a mapping alone should retain markers as they are. */ -TEST_F(guard_pages, mremap_move) +TEST_F(guard_regions, mremap_move) { const unsigned long page_size = self->page_size; char *ptr, *ptr_new; /* Map 5 pages. */ - ptr = mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 5 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Place guard markers at both ends of the 5 page span. */ @@ -823,8 +986,7 @@ TEST_F(guard_pages, mremap_move) /* Map a new region we will move this range into. Doing this ensures * that we have reserved a range to map into. */ - ptr_new = mmap(NULL, 5 * page_size, PROT_NONE, MAP_ANON | MAP_PRIVATE, - -1, 0); + ptr_new = mmap_(self, variant, NULL, 5 * page_size, PROT_NONE, 0, 0); ASSERT_NE(ptr_new, MAP_FAILED); ASSERT_EQ(mremap(ptr, 5 * page_size, 5 * page_size, @@ -849,14 +1011,14 @@ TEST_F(guard_pages, mremap_move) * will have to remove guard pages manually to fix up (they'd have to do the * same if it were a PROT_NONE mapping). */ -TEST_F(guard_pages, mremap_expand) +TEST_F(guard_regions, mremap_expand) { const unsigned long page_size = self->page_size; char *ptr, *ptr_new; /* Map 10 pages... */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* ...But unmap the last 5 so we can ensure we can expand into them. */ ASSERT_EQ(munmap(&ptr[5 * page_size], 5 * page_size), 0); @@ -880,8 +1042,7 @@ TEST_F(guard_pages, mremap_expand) ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); /* Reserve a region which we can move to and expand into. */ - ptr_new = mmap(NULL, 20 * page_size, PROT_NONE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_new = mmap_(self, variant, NULL, 20 * page_size, PROT_NONE, 0, 0); ASSERT_NE(ptr_new, MAP_FAILED); /* Now move and expand into it. */ @@ -912,15 +1073,15 @@ TEST_F(guard_pages, mremap_expand) * if the user were using a PROT_NONE mapping they'd have to manually fix this * up also so this is OK. */ -TEST_F(guard_pages, mremap_shrink) +TEST_F(guard_regions, mremap_shrink) { const unsigned long page_size = self->page_size; char *ptr; int i; /* Map 5 pages. */ - ptr = mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 5 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Place guard markers at both ends of the 5 page span. */ @@ -976,7 +1137,7 @@ TEST_F(guard_pages, mremap_shrink) * Assert that forking a process with VMAs that do not have VM_WIPEONFORK set * retain guard pages. */ -TEST_F(guard_pages, fork) +TEST_F(guard_regions, fork) { const unsigned long page_size = self->page_size; char *ptr; @@ -984,8 +1145,8 @@ TEST_F(guard_pages, fork) int i; /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Establish guard pages in the first 5 pages. */ @@ -1031,16 +1192,19 @@ TEST_F(guard_pages, fork) * Assert expected behaviour after we fork populated ranges of anonymous memory * and then guard and unguard the range. */ -TEST_F(guard_pages, fork_cow) +TEST_F(guard_regions, fork_cow) { const unsigned long page_size = self->page_size; char *ptr; pid_t pid; int i; + if (variant->backing != ANON_BACKED) + SKIP(return, "CoW only supported on anon mappings"); + /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Populate range. */ @@ -1102,16 +1266,19 @@ TEST_F(guard_pages, fork_cow) * Assert that forking a process with VMAs that do have VM_WIPEONFORK set * behave as expected. */ -TEST_F(guard_pages, fork_wipeonfork) +TEST_F(guard_regions, fork_wipeonfork) { const unsigned long page_size = self->page_size; char *ptr; pid_t pid; int i; + if (variant->backing != ANON_BACKED) + SKIP(return, "Wipe on fork only supported on anon mappings"); + /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Mark wipe on fork. */ @@ -1152,15 +1319,18 @@ TEST_F(guard_pages, fork_wipeonfork) } /* Ensure that MADV_FREE retains guard entries as expected. */ -TEST_F(guard_pages, lazyfree) +TEST_F(guard_regions, lazyfree) { const unsigned long page_size = self->page_size; char *ptr; int i; + if (variant->backing != ANON_BACKED) + SKIP(return, "MADV_FREE only supported on anon mappings"); + /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Guard range. */ @@ -1188,14 +1358,14 @@ TEST_F(guard_pages, lazyfree) } /* Ensure that MADV_POPULATE_READ, MADV_POPULATE_WRITE behave as expected. */ -TEST_F(guard_pages, populate) +TEST_F(guard_regions, populate) { const unsigned long page_size = self->page_size; char *ptr; /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Guard range. */ @@ -1214,15 +1384,15 @@ TEST_F(guard_pages, populate) } /* Ensure that MADV_COLD, MADV_PAGEOUT do not remove guard markers. */ -TEST_F(guard_pages, cold_pageout) +TEST_F(guard_regions, cold_pageout) { const unsigned long page_size = self->page_size; char *ptr; int i; /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Guard range. */ @@ -1260,7 +1430,7 @@ TEST_F(guard_pages, cold_pageout) } /* Ensure that guard pages do not break userfaultd. */ -TEST_F(guard_pages, uffd) +TEST_F(guard_regions, uffd) { const unsigned long page_size = self->page_size; int uffd; @@ -1273,6 +1443,9 @@ TEST_F(guard_pages, uffd) struct uffdio_register reg; struct uffdio_range range; + if (!is_anon_backed(variant)) + SKIP(return, "uffd only works on anon backing"); + /* Set up uffd. */ uffd = userfaultfd(0); if (uffd == -1 && errno == EPERM) @@ -1282,8 +1455,8 @@ TEST_F(guard_pages, uffd) ASSERT_EQ(ioctl(uffd, UFFDIO_API, &api), 0); /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Register the range with uffd. */ @@ -1309,4 +1482,593 @@ TEST_F(guard_pages, uffd) ASSERT_EQ(munmap(ptr, 10 * page_size), 0); } +/* + * Mark a region within a file-backed mapping using MADV_SEQUENTIAL so we + * aggressively read-ahead, then install guard regions and assert that it + * behaves correctly. + * + * We page out using MADV_PAGEOUT before checking guard regions so we drop page + * cache folios, meaning we maximise the possibility of some broken readahead. + */ +TEST_F(guard_regions, madvise_sequential) +{ + char *ptr; + int i; + const unsigned long page_size = self->page_size; + + if (variant->backing == ANON_BACKED) + SKIP(return, "MADV_SEQUENTIAL meaningful only for file-backed"); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Establish a pattern of data in the file. */ + set_pattern(ptr, 10, page_size); + ASSERT_TRUE(check_pattern(ptr, 10, page_size)); + + /* Mark it as being accessed sequentially. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_SEQUENTIAL), 0); + + /* Mark every other page a guard page. */ + for (i = 0; i < 10; i += 2) { + char *ptr2 = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr2, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now page it out. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0); + + /* Now make sure pages are as expected. */ + for (i = 0; i < 10; i++) { + char *chrp = &ptr[i * page_size]; + + if (i % 2 == 0) { + bool result = try_read_write_buf(chrp); + + ASSERT_FALSE(result); + } else { + ASSERT_EQ(*chrp, 'a' + i); + } + } + + /* Now remove guard pages. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Now make sure all data is as expected. */ + if (!check_pattern(ptr, 10, page_size)) + ASSERT_TRUE(false); + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Check that file-backed mappings implement guard regions with MAP_PRIVATE + * correctly. + */ +TEST_F(guard_regions, map_private) +{ + const unsigned long page_size = self->page_size; + char *ptr_shared, *ptr_private; + int i; + + if (variant->backing == ANON_BACKED) + SKIP(return, "MAP_PRIVATE test specific to file-backed"); + + ptr_shared = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr_shared, MAP_FAILED); + + /* Manually mmap(), do not use mmap_() wrapper so we can force MAP_PRIVATE. */ + ptr_private = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, self->fd, 0); + ASSERT_NE(ptr_private, MAP_FAILED); + + /* Set pattern in shared mapping. */ + set_pattern(ptr_shared, 10, page_size); + + /* Install guard regions in every other page in the shared mapping. */ + for (i = 0; i < 10; i += 2) { + char *ptr = &ptr_shared[i * page_size]; + + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + } + + for (i = 0; i < 10; i++) { + /* Every even shared page should be guarded. */ + ASSERT_EQ(try_read_buf(&ptr_shared[i * page_size]), i % 2 != 0); + /* Private mappings should always be readable. */ + ASSERT_TRUE(try_read_buf(&ptr_private[i * page_size])); + } + + /* Install guard regions in every other page in the private mapping. */ + for (i = 0; i < 10; i += 2) { + char *ptr = &ptr_private[i * page_size]; + + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + } + + for (i = 0; i < 10; i++) { + /* Every even shared page should be guarded. */ + ASSERT_EQ(try_read_buf(&ptr_shared[i * page_size]), i % 2 != 0); + /* Every odd private page should be guarded. */ + ASSERT_EQ(try_read_buf(&ptr_private[i * page_size]), i % 2 != 0); + } + + /* Remove guard regions from shared mapping. */ + ASSERT_EQ(madvise(ptr_shared, 10 * page_size, MADV_GUARD_REMOVE), 0); + + for (i = 0; i < 10; i++) { + /* Shared mappings should always be readable. */ + ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size])); + /* Every even private page should be guarded. */ + ASSERT_EQ(try_read_buf(&ptr_private[i * page_size]), i % 2 != 0); + } + + /* Remove guard regions from private mapping. */ + ASSERT_EQ(madvise(ptr_private, 10 * page_size, MADV_GUARD_REMOVE), 0); + + for (i = 0; i < 10; i++) { + /* Shared mappings should always be readable. */ + ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size])); + /* Private mappings should always be readable. */ + ASSERT_TRUE(try_read_buf(&ptr_private[i * page_size])); + } + + /* Ensure patterns are intact. */ + ASSERT_TRUE(check_pattern(ptr_shared, 10, page_size)); + ASSERT_TRUE(check_pattern(ptr_private, 10, page_size)); + + /* Now write out every other page to MAP_PRIVATE. */ + for (i = 0; i < 10; i += 2) { + char *ptr = &ptr_private[i * page_size]; + + memset(ptr, 'a' + i, page_size); + } + + /* + * At this point the mapping is: + * + * 0123456789 + * SPSPSPSPSP + * + * Where S = shared, P = private mappings. + */ + + /* Now mark the beginning of the mapping guarded. */ + ASSERT_EQ(madvise(ptr_private, 5 * page_size, MADV_GUARD_INSTALL), 0); + + /* + * This renders the mapping: + * + * 0123456789 + * xxxxxPSPSP + */ + + for (i = 0; i < 10; i++) { + char *ptr = &ptr_private[i * page_size]; + + /* Ensure guard regions as expected. */ + ASSERT_EQ(try_read_buf(ptr), i >= 5); + /* The shared mapping should always succeed. */ + ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size])); + } + + /* Remove the guard regions altogether. */ + ASSERT_EQ(madvise(ptr_private, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* + * + * We now expect the mapping to be: + * + * 0123456789 + * SSSSSPSPSP + * + * As we removed guard regions, the private pages from the first 5 will + * have been zapped, so on fault will reestablish the shared mapping. + */ + + for (i = 0; i < 10; i++) { + char *ptr = &ptr_private[i * page_size]; + + /* + * Assert that shared mappings in the MAP_PRIVATE mapping match + * the shared mapping. + */ + if (i < 5 || i % 2 == 0) { + char *ptr_s = &ptr_shared[i * page_size]; + + ASSERT_EQ(memcmp(ptr, ptr_s, page_size), 0); + continue; + } + + /* Everything else is a private mapping. */ + ASSERT_TRUE(is_buf_eq(ptr, page_size, 'a' + i)); + } + + ASSERT_EQ(munmap(ptr_shared, 10 * page_size), 0); + ASSERT_EQ(munmap(ptr_private, 10 * page_size), 0); +} + +/* Test that guard regions established over a read-only mapping function correctly. */ +TEST_F(guard_regions, readonly_file) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing == ANON_BACKED) + SKIP(return, "Read-only test specific to file-backed"); + + /* Map shared so we can populate with pattern, populate it, unmap. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + set_pattern(ptr, 10, page_size); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); + /* Close the fd so we can re-open read-only. */ + ASSERT_EQ(close(self->fd), 0); + + /* Re-open read-only. */ + self->fd = open(self->path, O_RDONLY); + ASSERT_NE(self->fd, -1); + /* Re-map read-only. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Mark every other page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_pg = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_pg, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Assert that the guard regions are in place.*/ + for (i = 0; i < 10; i++) { + char *ptr_pg = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_pg), i % 2 != 0); + } + + /* Remove guard regions. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Ensure the data is as expected. */ + ASSERT_TRUE(check_pattern(ptr, 10, page_size)); + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +TEST_F(guard_regions, fault_around) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing == ANON_BACKED) + SKIP(return, "Fault-around test specific to file-backed"); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Establish a pattern in the backing file. */ + set_pattern(ptr, 10, page_size); + + /* + * Now drop it from the page cache so we get major faults when next we + * map it. + */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0); + + /* Unmap and remap 'to be sure'. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Now make every even page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now fault in every odd page. This should trigger fault-around. */ + for (i = 1; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_buf(ptr_p)); + } + + /* Finally, ensure that guard regions are intact as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0); + } + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +TEST_F(guard_regions, truncation) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing == ANON_BACKED) + SKIP(return, "Truncation test specific to file-backed"); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Establish a pattern in the backing file, just so there is data + * there. + */ + set_pattern(ptr, 10, page_size); + + /* Now make every even page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now assert things are as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0); + } + + /* Now truncate to actually used size (initialised to 100). */ + ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0); + + /* Here the guard regions will remain intact. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0); + } + + /* Now truncate to half the size, then truncate again to the full size. */ + ASSERT_EQ(ftruncate(self->fd, 5 * page_size), 0); + ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0); + + /* Again, guard pages will remain intact. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0); + } + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +TEST_F(guard_regions, hole_punch) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing == ANON_BACKED) + SKIP(return, "Truncation test specific to file-backed"); + + /* Establish pattern in mapping. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + set_pattern(ptr, 10, page_size); + + /* Install a guard region in the middle of the mapping. */ + ASSERT_EQ(madvise(&ptr[3 * page_size], 4 * page_size, + MADV_GUARD_INSTALL), 0); + + /* + * The buffer will now be: + * + * 0123456789 + * ***xxxx*** + * + * Where * is data and x is the guard region. + */ + + /* Ensure established. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i < 3 || i >= 7); + } + + /* Now hole punch the guarded region. */ + ASSERT_EQ(madvise(&ptr[3 * page_size], 4 * page_size, + MADV_REMOVE), 0); + + /* Ensure guard regions remain. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i < 3 || i >= 7); + } + + /* Now remove guard region throughout. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Check that the pattern exists in non-hole punched region. */ + ASSERT_TRUE(check_pattern(ptr, 3, page_size)); + /* Check that hole punched region is zeroed. */ + ASSERT_TRUE(is_buf_eq(&ptr[3 * page_size], 4 * page_size, '\0')); + /* Check that the pattern exists in the remainder of the file. */ + ASSERT_TRUE(check_pattern_offset(ptr, 3, page_size, 7)); + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Ensure that a memfd works correctly with guard regions, that we can write + * seal it then open the mapping read-only and still establish guard regions + * within, remove those guard regions and have everything work correctly. + */ +TEST_F(guard_regions, memfd_write_seal) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing != SHMEM_BACKED) + SKIP(return, "memfd write seal test specific to shmem"); + + /* OK, we need a memfd, so close existing one. */ + ASSERT_EQ(close(self->fd), 0); + + /* Create and truncate memfd. */ + self->fd = memfd_create("guard_regions_memfd_seals_test", + MFD_ALLOW_SEALING); + ASSERT_NE(self->fd, -1); + ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0); + + /* Map, set pattern, unmap. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + set_pattern(ptr, 10, page_size); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); + + /* Write-seal the memfd. */ + ASSERT_EQ(fcntl(self->fd, F_ADD_SEALS, F_SEAL_WRITE), 0); + + /* Now map the memfd readonly. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Ensure pattern is as expected. */ + ASSERT_TRUE(check_pattern(ptr, 10, page_size)); + + /* Now make every even page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now assert things are as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0); + } + + /* Now remove guard regions. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Ensure pattern is as expected. */ + ASSERT_TRUE(check_pattern(ptr, 10, page_size)); + + /* Ensure write seal intact. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_FALSE(try_write_buf(ptr_p)); + } + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + + +/* + * Since we are now permitted to establish guard regions in read-only anonymous + * mappings, for the sake of thoroughness, though it probably has no practical + * use, test that guard regions function with a mapping to the anonymous zero + * page. + */ +TEST_F(guard_regions, anon_zeropage) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (!is_anon_backed(variant)) + SKIP(return, "anon zero page test specific to anon/shmem"); + + /* Obtain a read-only i.e. anon zero page mapping. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Now make every even page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now assert things are as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0); + } + + /* Now remove all guard regions. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Now assert things are as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_buf(ptr_p)); + } + + /* Ensure zero page...*/ + ASSERT_TRUE(is_buf_eq(ptr, 10 * page_size, '\0')); + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Assert that /proc/$pid/pagemap correctly identifies guard region ranges. + */ +TEST_F(guard_regions, pagemap) +{ + const unsigned long page_size = self->page_size; + int proc_fd; + char *ptr; + int i; + + proc_fd = open("/proc/self/pagemap", O_RDONLY); + ASSERT_NE(proc_fd, -1); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Read from pagemap, and assert no guard regions are detected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + unsigned long entry = pagemap_get_entry(proc_fd, ptr_p); + unsigned long masked = entry & PM_GUARD_REGION; + + ASSERT_EQ(masked, 0); + } + + /* Install a guard region in every other page. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Re-read from pagemap, and assert guard regions are detected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + unsigned long entry = pagemap_get_entry(proc_fd, ptr_p); + unsigned long masked = entry & PM_GUARD_REGION; + + ASSERT_EQ(masked, i % 2 == 0 ? PM_GUARD_REGION : 0); + } + + ASSERT_EQ(close(proc_fd), 0); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 9423ad439a61..21595b20bbc3 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -96,13 +96,17 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) int ret; if (ftruncate(fd, size)) { - ksft_test_result_fail("ftruncate() failed\n"); + if (errno == ENOENT) { + skip_test_dodgy_fs("ftruncate()"); + } else { + ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno)); + } return; } if (fallocate(fd, 0, 0, size)) { if (size == pagesize) - ksft_test_result_fail("fallocate() failed\n"); + ksft_test_result_fail("fallocate() failed (%s)\n", strerror(errno)); else ksft_test_result_skip("need more free huge pages\n"); return; @@ -112,7 +116,7 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) shared ? MAP_SHARED : MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { if (size == pagesize || shared) - ksft_test_result_fail("mmap() failed\n"); + ksft_test_result_fail("mmap() failed (%s)\n", strerror(errno)); else ksft_test_result_skip("need more free huge pages\n"); return; @@ -130,7 +134,7 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) */ ret = mprotect(mem, size, PROT_READ); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_test_result_fail("mprotect() failed (%s)\n", strerror(errno)); goto munmap; } /* FALLTHROUGH */ @@ -165,18 +169,20 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0; ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); if (ret && errno == EINVAL) { - ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); + ksft_test_result_skip("PIN_LONGTERM_TEST_START failed (EINVAL)n"); break; } else if (ret && errno == EFAULT) { ksft_test_result(!should_work, "Should have failed\n"); break; } else if (ret) { - ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); + ksft_test_result_fail("PIN_LONGTERM_TEST_START failed (%s)\n", + strerror(errno)); break; } if (ioctl(gup_fd, PIN_LONGTERM_TEST_STOP)) - ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); + ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed (%s)\n", + strerror(errno)); /* * TODO: if the kernel ever supports long-term R/W pinning on @@ -202,7 +208,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* Skip on errors, as we might just lack kernel support. */ ret = io_uring_queue_init(1, &ring, 0); if (ret < 0) { - ksft_test_result_skip("io_uring_queue_init() failed\n"); + ksft_test_result_skip("io_uring_queue_init() failed (%s)\n", + strerror(-ret)); break; } /* @@ -215,13 +222,15 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* Only new kernels return EFAULT. */ if (ret && (errno == ENOSPC || errno == EOPNOTSUPP || errno == EFAULT)) { - ksft_test_result(!should_work, "Should have failed\n"); + ksft_test_result(!should_work, "Should have failed (%s)\n", + strerror(errno)); } else if (ret) { /* * We might just lack support or have insufficient * MEMLOCK limits. */ - ksft_test_result_skip("io_uring_register_buffers() failed\n"); + ksft_test_result_skip("io_uring_register_buffers() failed (%s)\n", + strerror(-ret)); } else { ksft_test_result(should_work, "Should have worked\n"); io_uring_unregister_buffers(&ring); @@ -249,7 +258,7 @@ static void run_with_memfd(test_fn fn, const char *desc) fd = memfd_create("test", 0); if (fd < 0) { - ksft_test_result_fail("memfd_create() failed\n"); + ksft_test_result_fail("memfd_create() failed (%s)\n", strerror(errno)); return; } @@ -266,13 +275,13 @@ static void run_with_tmpfile(test_fn fn, const char *desc) file = tmpfile(); if (!file) { - ksft_test_result_fail("tmpfile() failed\n"); + ksft_test_result_fail("tmpfile() failed (%s)\n", strerror(errno)); return; } fd = fileno(file); if (fd < 0) { - ksft_test_result_fail("fileno() failed\n"); + ksft_test_result_fail("fileno() failed (%s)\n", strerror(errno)); goto close; } @@ -290,12 +299,12 @@ static void run_with_local_tmpfile(test_fn fn, const char *desc) fd = mkstemp(filename); if (fd < 0) { - ksft_test_result_fail("mkstemp() failed\n"); + ksft_test_result_fail("mkstemp() failed (%s)\n", strerror(errno)); return; } if (unlink(filename)) { - ksft_test_result_fail("unlink() failed\n"); + ksft_test_result_fail("unlink() failed (%s)\n", strerror(errno)); goto close; } @@ -317,7 +326,7 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc, fd = memfd_create("test", flags); if (fd < 0) { - ksft_test_result_skip("memfd_create() failed\n"); + ksft_test_result_skip("memfd_create() failed (%s)\n", strerror(errno)); return; } diff --git a/tools/testing/selftests/mm/map_populate.c b/tools/testing/selftests/mm/map_populate.c index 5c8a53869b1b..9df2636c829b 100644 --- a/tools/testing/selftests/mm/map_populate.c +++ b/tools/testing/selftests/mm/map_populate.c @@ -18,6 +18,8 @@ #include <unistd.h> #include "../kselftest.h" +#include "vm_util.h" + #define MMAP_SZ 4096 #define BUG_ON(condition, description) \ @@ -87,6 +89,9 @@ int main(int argc, char **argv) BUG_ON(!ftmp, "tmpfile()"); ret = ftruncate(fileno(ftmp), MMAP_SZ); + if (ret < 0 && errno == ENOENT) { + skip_test_dodgy_fs("ftruncate()"); + } BUG_ON(ret, "ftruncate()"); smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE, diff --git a/tools/testing/selftests/mm/mlock-random-test.c b/tools/testing/selftests/mm/mlock-random-test.c index 1cd80b0f76c3..b8d7e966f44c 100644 --- a/tools/testing/selftests/mm/mlock-random-test.c +++ b/tools/testing/selftests/mm/mlock-random-test.c @@ -161,9 +161,9 @@ static void test_mlock_within_limit(char *p, int alloc_size) MLOCK_ONFAULT); if (ret) - ksft_exit_fail_msg("%s() failure at |%p(%d)| mlock:|%p(%d)|\n", + ksft_exit_fail_msg("%s() failure (%s) at |%p(%d)| mlock:|%p(%d)|\n", is_mlock ? "mlock" : "mlock2", - p, alloc_size, + strerror(errno), p, alloc_size, p + start_offset, lock_size); } diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h index 4417eaa5cfb7..81e77fa41901 100644 --- a/tools/testing/selftests/mm/mlock2.h +++ b/tools/testing/selftests/mm/mlock2.h @@ -6,7 +6,13 @@ static int mlock2_(void *start, size_t len, int flags) { - return syscall(__NR_mlock2, start, len, flags); + int ret = syscall(__NR_mlock2, start, len, flags); + + if (ret) { + errno = ret; + return -1; + } + return 0; } static FILE *seek_to_smaps_entry(unsigned long addr) diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 7cc71d942f83..9aff33b10999 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -187,9 +187,10 @@ if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then printf "Not enough huge pages available (%d < %d)\n" \ "$freepgs" "$needpgs" fi + HAVE_HUGEPAGES=1 else echo "no hugetlbfs support in kernel?" - exit 1 + HAVE_HUGEPAGES=0 fi # filter 64bit architectures @@ -218,13 +219,20 @@ pretty_name() { # Usage: run_test [test binary] [arbitrary test arguments...] run_test() { if test_selected ${CATEGORY}; then + local skip=0 + # On memory constrainted systems some tests can fail to allocate hugepages. # perform some cleanup before the test for a higher success rate. if [ ${CATEGORY} == "thp" -o ${CATEGORY} == "hugetlb" ]; then - echo 3 > /proc/sys/vm/drop_caches - sleep 2 - echo 1 > /proc/sys/vm/compact_memory - sleep 2 + if [ "${HAVE_HUGEPAGES}" = "1" ]; then + echo 3 > /proc/sys/vm/drop_caches + sleep 2 + echo 1 > /proc/sys/vm/compact_memory + sleep 2 + else + echo "hugepages not supported" | tap_prefix + skip=1 + fi fi local test=$(pretty_name "$*") @@ -232,8 +240,12 @@ run_test() { local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -) printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" | tap_prefix - ("$@" 2>&1) | tap_prefix - local ret=${PIPESTATUS[0]} + if [ "${skip}" != "1" ]; then + ("$@" 2>&1) | tap_prefix + local ret=${PIPESTATUS[0]} + else + local ret=$ksft_skip + fi count_total=$(( count_total + 1 )) if [ $ret -eq 0 ]; then count_pass=$(( count_pass + 1 )) @@ -271,13 +283,15 @@ CATEGORY="hugetlb" run_test ./hugepage-vmemmap CATEGORY="hugetlb" run_test ./hugetlb-madvise CATEGORY="hugetlb" run_test ./hugetlb_dio -nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages) -# For this test, we need one and just one huge page -echo 1 > /proc/sys/vm/nr_hugepages -CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv -CATEGORY="hugetlb" run_test ./hugetlb_madv_vs_map -# Restore the previous number of huge pages, since further tests rely on it -echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages +if [ "${HAVE_HUGEPAGES}" = "1" ]; then + nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages) + # For this test, we need one and just one huge page + echo 1 > /proc/sys/vm/nr_hugepages + CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv + CATEGORY="hugetlb" run_test ./hugetlb_madv_vs_map + # Restore the previous number of huge pages, since further tests rely on it + echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages +fi if test_selected "hugetlb"; then echo "NOTE: These hugetlb tests provide minimal coverage. Use" | tap_prefix @@ -311,14 +325,35 @@ CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 3 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem-private 20 16 -CATEGORY="userfaultfd" run_test ./uffd-wp-mremap +# uffd-wp-mremap requires at least one page of each size. +have_all_size_hugepgs=true +declare -A nr_size_hugepgs +for f in /sys/kernel/mm/hugepages/**/nr_hugepages; do + old=$(cat $f) + nr_size_hugepgs["$f"]="$old" + if [ "$old" == 0 ]; then + echo 1 > "$f" + fi + if [ $(cat "$f") == 0 ]; then + have_all_size_hugepgs=false + break + fi +done +if $have_all_size_hugepgs; then + CATEGORY="userfaultfd" run_test ./uffd-wp-mremap +else + echo "# SKIP ./uffd-wp-mremap" +fi #cleanup +for f in "${!nr_size_hugepgs[@]}"; do + echo "${nr_size_hugepgs["$f"]}" > "$f" +done echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages CATEGORY="compaction" run_test ./compaction_test -if command -v sudo &> /dev/null; +if command -v sudo &> /dev/null && sudo -u nobody ls ./on-fault-limit >/dev/null; then CATEGORY="mlock" run_test sudo -u nobody ./on-fault-limit else @@ -381,19 +416,21 @@ CATEGORY="mremap" run_test ./mremap_dontunmap CATEGORY="hmm" run_test bash ./test_hmm.sh smoke # MADV_GUARD_INSTALL and MADV_GUARD_REMOVE tests -CATEGORY="madv_guard" run_test ./guard-pages +CATEGORY="madv_guard" run_test ./guard-regions # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate if [ -x ./memfd_secret ] then -(echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix +(echo 0 > /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix CATEGORY="memfd_secret" run_test ./memfd_secret fi # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 -CATEGORY="ksm" run_test ./ksm_tests -H -s 100 +if [ "${HAVE_HUGEPAGES}" = "1" ]; then + CATEGORY="ksm" run_test ./ksm_tests -H -s 100 +fi # KSM KSM_MERGE_TIME test with size of 100 CATEGORY="ksm" run_test ./ksm_tests -P -s 100 # KSM MADV_MERGEABLE test with 10 identical pages @@ -442,15 +479,17 @@ CATEGORY="thp" run_test ./transhuge-stress -d 20 # Try to create XFS if not provided if [ -z "${SPLIT_HUGE_PAGE_TEST_XFS_PATH}" ]; then - if test_selected "thp"; then - if grep xfs /proc/filesystems &>/dev/null; then - XFS_IMG=$(mktemp /tmp/xfs_img_XXXXXX) - SPLIT_HUGE_PAGE_TEST_XFS_PATH=$(mktemp -d /tmp/xfs_dir_XXXXXX) - truncate -s 314572800 ${XFS_IMG} - mkfs.xfs -q ${XFS_IMG} - mount -o loop ${XFS_IMG} ${SPLIT_HUGE_PAGE_TEST_XFS_PATH} - MOUNTED_XFS=1 - fi + if [ "${HAVE_HUGEPAGES}" = "1" ]; then + if test_selected "thp"; then + if grep xfs /proc/filesystems &>/dev/null; then + XFS_IMG=$(mktemp /tmp/xfs_img_XXXXXX) + SPLIT_HUGE_PAGE_TEST_XFS_PATH=$(mktemp -d /tmp/xfs_dir_XXXXXX) + truncate -s 314572800 ${XFS_IMG} + mkfs.xfs -q ${XFS_IMG} + mount -o loop ${XFS_IMG} ${SPLIT_HUGE_PAGE_TEST_XFS_PATH} + MOUNTED_XFS=1 + fi + fi fi fi diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index 3f353f3d070f..aa7400ed0e99 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -5,6 +5,7 @@ */ #define _GNU_SOURCE +#include <assert.h> #include <stdio.h> #include <stdlib.h> #include <stdarg.h> @@ -14,6 +15,7 @@ #include <fcntl.h> #include <sys/mman.h> #include <sys/mount.h> +#include <sys/param.h> #include <malloc.h> #include <stdbool.h> #include <time.h> @@ -261,18 +263,32 @@ void split_pte_mapped_thp(void) close(kpageflags_fd); } -void split_file_backed_thp(void) +void split_file_backed_thp(int order) { int status; int fd; - ssize_t num_written; char tmpfs_template[] = "/tmp/thp_split_XXXXXX"; const char *tmpfs_loc = mkdtemp(tmpfs_template); char testfile[INPUT_MAX]; + ssize_t num_written, num_read; + char *file_buf1, *file_buf2; uint64_t pgoff_start = 0, pgoff_end = 1024; + int i; ksft_print_msg("Please enable pr_debug in split_huge_pages_in_file() for more info.\n"); + file_buf1 = (char *)malloc(pmd_pagesize); + file_buf2 = (char *)malloc(pmd_pagesize); + + if (!file_buf1 || !file_buf2) { + ksft_print_msg("cannot allocate file buffers\n"); + goto out; + } + + for (i = 0; i < pmd_pagesize; i++) + file_buf1[i] = (char)i; + memset(file_buf2, 0, pmd_pagesize); + status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m"); if (status) @@ -281,26 +297,45 @@ void split_file_backed_thp(void) status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc); if (status >= INPUT_MAX) { ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n"); + goto cleanup; } - fd = open(testfile, O_CREAT|O_WRONLY, 0664); + fd = open(testfile, O_CREAT|O_RDWR, 0664); if (fd == -1) { ksft_perror("Cannot open testing file"); goto cleanup; } - /* write something to the file, so a file-backed THP can be allocated */ - num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1); - close(fd); + /* write pmd size data to the file, so a file-backed THP can be allocated */ + num_written = write(fd, file_buf1, pmd_pagesize); - if (num_written < 1) { - ksft_perror("Fail to write data to testing file"); - goto cleanup; + if (num_written == -1 || num_written != pmd_pagesize) { + ksft_perror("Failed to write data to testing file"); + goto close_file; } /* split the file-backed THP */ - write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, 0); + write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, order); + + /* check file content after split */ + status = lseek(fd, 0, SEEK_SET); + if (status == -1) { + ksft_perror("Cannot lseek file"); + goto close_file; + } + + num_read = read(fd, file_buf2, num_written); + if (num_read == -1 || num_read != num_written) { + ksft_perror("Cannot read file content back"); + goto close_file; + } + + if (strncmp(file_buf1, file_buf2, pmd_pagesize) != 0) { + ksft_print_msg("File content changed\n"); + goto close_file; + } + close(fd); status = unlink(testfile); if (status) { ksft_perror("Cannot remove testing file"); @@ -318,12 +353,15 @@ void split_file_backed_thp(void) ksft_exit_fail_msg("cannot remove tmp dir: %s\n", strerror(errno)); ksft_print_msg("Please check dmesg for more information\n"); - ksft_test_result_pass("File-backed THP split test done\n"); + ksft_test_result_pass("File-backed THP split to order %d test done\n", order); return; +close_file: + close(fd); cleanup: umount(tmpfs_loc); rmdir(tmpfs_loc); +out: ksft_exit_fail_msg("Error occurred\n"); } @@ -361,6 +399,7 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, { size_t i; int dummy = 0; + unsigned char buf[1024]; srand(time(NULL)); @@ -368,11 +407,12 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, if (*fd == -1) ksft_exit_fail_msg("Failed to create a file at %s\n", testfile); - for (i = 0; i < fd_size; i++) { - unsigned char byte = (unsigned char)i; + assert(fd_size % sizeof(buf) == 0); + for (i = 0; i < sizeof(buf); i++) + buf[i] = (unsigned char)i; + for (i = 0; i < fd_size; i += sizeof(buf)) + write(*fd, buf, sizeof(buf)); - write(*fd, &byte, sizeof(byte)); - } close(*fd); sync(); *fd = open("/proc/sys/vm/drop_caches", O_WRONLY); @@ -420,7 +460,8 @@ err_out_unlink: return -1; } -void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_loc) +void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, + int order, int offset) { int fd; char *addr; @@ -438,7 +479,12 @@ void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_l return; err = 0; - write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order); + if (offset == -1) + write_debugfs(PID_FMT, getpid(), (uint64_t)addr, + (uint64_t)addr + fd_size, order); + else + write_debugfs(PID_FMT, getpid(), (uint64_t)addr, + (uint64_t)addr + fd_size, order, offset); for (i = 0; i < fd_size; i++) if (*(addr + i) != (char)i) { @@ -457,9 +503,15 @@ out: munmap(addr, fd_size); close(fd); unlink(testfile); - if (err) - ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order); - ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order); + if (offset == -1) { + if (err) + ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order); + ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order); + } else { + if (err) + ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d at in-folio offset %d failed\n", order, offset); + ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d at in-folio offset %d passed\n", order, offset); + } } int main(int argc, char **argv) @@ -470,6 +522,7 @@ int main(int argc, char **argv) char fs_loc_template[] = "/tmp/thp_fs_XXXXXX"; const char *fs_loc; bool created_tmp; + int offset; ksft_print_header(); @@ -481,7 +534,7 @@ int main(int argc, char **argv) if (argc > 1) optional_xfs_path = argv[1]; - ksft_set_plan(1+8+2+9); + ksft_set_plan(1+8+1+9+9+8*4+2); pagesize = getpagesize(); pageshift = ffs(pagesize) - 1; @@ -498,12 +551,19 @@ int main(int argc, char **argv) split_pmd_thp_to_order(i); split_pte_mapped_thp(); - split_file_backed_thp(); + for (i = 0; i < 9; i++) + split_file_backed_thp(i); created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template, &fs_loc); for (i = 8; i >= 0; i--) - split_thp_in_pagecache_to_order(fd_size, i, fs_loc); + split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, -1); + + for (i = 0; i < 9; i++) + for (offset = 0; + offset < pmd_pagesize / pagesize; + offset += MAX(pmd_pagesize / pagesize / 4, 1 << i)) + split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, offset); cleanup_thp_fs(fs_loc, created_tmp); ksft_finished(); diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index e4370b79b62f..cd5174d735be 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -127,7 +127,7 @@ void test_mmap(unsigned long size, unsigned flags) show(size); ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES, - "%s mmap\n", __func__); + "%s mmap %lu\n", __func__, size); if (munmap(map, size * NUM_PAGES)) ksft_exit_fail_msg("%s: unmap %s\n", __func__, strerror(errno)); @@ -165,7 +165,7 @@ void test_shmget(unsigned long size, unsigned flags) show(size); ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES, - "%s: mmap\n", __func__); + "%s: mmap %lu\n", __func__, size); if (shmdt(map)) ksft_exit_fail_msg("%s: shmdt: %s\n", __func__, strerror(errno)); } diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index 7ad6ba660c7d..a37088a23ffe 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -10,7 +10,7 @@ #define BASE_PMD_ADDR ((void *)(1UL << 30)) volatile bool test_uffdio_copy_eexist = true; -unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; +unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; int uffd = -1, uffd_flags, finished, *pipefd, test_type; bool map_shared; @@ -269,7 +269,7 @@ void uffd_test_ctx_clear(void) size_t i; if (pipefd) { - for (i = 0; i < nr_cpus * 2; ++i) { + for (i = 0; i < nr_parallel * 2; ++i) { if (close(pipefd[i])) err("close pipefd"); } @@ -323,7 +323,7 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) ret = userfaultfd_open(&features); if (ret) { if (errmsg) - *errmsg = "possible lack of priviledge"; + *errmsg = "possible lack of privilege"; return ret; } @@ -348,7 +348,7 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) /* * After initialization of area_src, we must explicitly release pages * for area_dst to make sure it's fully empty. Otherwise we could have - * some area_dst pages be errornously initialized with zero pages, + * some area_dst pages be erroneously initialized with zero pages, * hence we could hit memory corruption later in the test. * * One example is when THP is globally enabled, above allocate_area() @@ -365,10 +365,10 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) */ uffd_test_ops->release_pages(area_dst); - pipefd = malloc(sizeof(int) * nr_cpus * 2); + pipefd = malloc(sizeof(int) * nr_parallel * 2); if (!pipefd) err("pipefd"); - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_parallel; cpu++) if (pipe2(&pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK)) err("pipe"); diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index a70ae10b5f62..7700cbfa3975 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -98,7 +98,7 @@ struct uffd_test_case_ops { }; typedef struct uffd_test_case_ops uffd_test_case_ops_t; -extern unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; +extern unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; extern char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; extern int uffd, uffd_flags, finished, *pipefd, test_type; extern bool map_shared; diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index 944d559ade21..40af7f67c407 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -180,12 +180,12 @@ static void *background_thread(void *arg) static int stress(struct uffd_args *args) { unsigned long cpu; - pthread_t locking_threads[nr_cpus]; - pthread_t uffd_threads[nr_cpus]; - pthread_t background_threads[nr_cpus]; + pthread_t locking_threads[nr_parallel]; + pthread_t uffd_threads[nr_parallel]; + pthread_t background_threads[nr_parallel]; finished = 0; - for (cpu = 0; cpu < nr_cpus; cpu++) { + for (cpu = 0; cpu < nr_parallel; cpu++) { if (pthread_create(&locking_threads[cpu], &attr, locking_thread, (void *)cpu)) return 1; @@ -203,7 +203,7 @@ static int stress(struct uffd_args *args) background_thread, (void *)cpu)) return 1; } - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_parallel; cpu++) if (pthread_join(background_threads[cpu], NULL)) return 1; @@ -219,11 +219,11 @@ static int stress(struct uffd_args *args) uffd_test_ops->release_pages(area_src); finished = 1; - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_parallel; cpu++) if (pthread_join(locking_threads[cpu], NULL)) return 1; - for (cpu = 0; cpu < nr_cpus; cpu++) { + for (cpu = 0; cpu < nr_parallel; cpu++) { char c; if (bounces & BOUNCE_POLL) { if (write(pipefd[cpu*2+1], &c, 1) != 1) @@ -246,11 +246,11 @@ static int userfaultfd_stress(void) { void *area; unsigned long nr; - struct uffd_args args[nr_cpus]; + struct uffd_args args[nr_parallel]; uint64_t mem_size = nr_pages * page_size; int flags = 0; - memset(args, 0, sizeof(struct uffd_args) * nr_cpus); + memset(args, 0, sizeof(struct uffd_args) * nr_parallel); if (features & UFFD_FEATURE_WP_UNPOPULATED && test_type == TEST_ANON) flags = UFFD_FEATURE_WP_UNPOPULATED; @@ -325,7 +325,7 @@ static int userfaultfd_stress(void) */ uffd_test_ops->release_pages(area_dst); - uffd_stats_reset(args, nr_cpus); + uffd_stats_reset(args, nr_parallel); /* bounce pass */ if (stress(args)) { @@ -359,7 +359,7 @@ static int userfaultfd_stress(void) swap(area_src_alias, area_dst_alias); - uffd_stats_report(args, nr_cpus); + uffd_stats_report(args, nr_parallel); } uffd_test_ctx_clear(); @@ -412,8 +412,8 @@ static void parse_test_type_arg(const char *raw_type) * feature. */ - if (uffd_get_features(&features)) - err("failed to get available features"); + if (uffd_get_features(&features) && errno == ENOENT) + ksft_exit_skip("failed to get available features (%d)\n", errno); test_uffdio_wp = test_uffdio_wp && (features & UFFD_FEATURE_PAGEFAULT_FLAG_WP); @@ -435,6 +435,7 @@ static void sigalrm(int sig) int main(int argc, char **argv) { + unsigned long nr_cpus; size_t bytes; if (argc < 4) @@ -454,10 +455,19 @@ int main(int argc, char **argv) } nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (nr_cpus > 32) { + /* Don't let calculation below go to zero. */ + ksft_print_msg("_SC_NPROCESSORS_ONLN (%lu) too large, capping nr_threads to 32\n", + nr_cpus); + nr_parallel = 32; + } else { + nr_parallel = nr_cpus; + } - nr_pages_per_cpu = bytes / page_size / nr_cpus; + nr_pages_per_cpu = bytes / page_size / nr_parallel; if (!nr_pages_per_cpu) { - _err("invalid MiB"); + _err("pages_per_cpu = 0, cannot test (%lu / %lu / %lu)", + bytes, page_size, nr_parallel); usage(); } @@ -466,7 +476,7 @@ int main(int argc, char **argv) _err("invalid bounces"); usage(); } - nr_pages = nr_pages_per_cpu * nr_cpus; + nr_pages = nr_pages_per_cpu * nr_parallel; printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", nr_pages, nr_pages_per_cpu); diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 74c8bc02b506..e8fd9011c2a3 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -26,6 +26,8 @@ #define ALIGN_UP(x, align_to) \ ((__typeof__(x))((((unsigned long)(x)) + ((align_to)-1)) & ~((align_to)-1))) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + struct mem_type { const char *name; unsigned int mem_flag; @@ -196,9 +198,10 @@ uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test, else page_size = psize(); - nr_pages = UFFD_TEST_MEM_SIZE / page_size; + /* Ensure we have at least 2 pages */ + nr_pages = MAX(UFFD_TEST_MEM_SIZE, page_size * 2) / page_size; /* TODO: remove this global var.. it's so ugly */ - nr_cpus = 1; + nr_parallel = 1; /* Initialize test arguments */ args->mem_type = mem_type; diff --git a/tools/testing/selftests/mm/uffd-wp-mremap.c b/tools/testing/selftests/mm/uffd-wp-mremap.c index 2c4f984bd73c..c2ba7d46c7b4 100644 --- a/tools/testing/selftests/mm/uffd-wp-mremap.c +++ b/tools/testing/selftests/mm/uffd-wp-mremap.c @@ -182,7 +182,10 @@ static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb /* Register range for uffd-wp. */ if (userfaultfd_open(&features)) { - ksft_test_result_fail("userfaultfd_open() failed\n"); + if (errno == ENOENT) + ksft_test_result_skip("userfaultfd not available\n"); + else + ksft_test_result_fail("userfaultfd_open() failed\n"); goto out; } if (uffd_register(uffd, mem, size, false, true, false)) { diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh index 2c725773cd79..1f92e8caceac 100755 --- a/tools/testing/selftests/mm/va_high_addr_switch.sh +++ b/tools/testing/selftests/mm/va_high_addr_switch.sh @@ -41,6 +41,31 @@ check_supported_x86_64() fi } +check_supported_ppc64() +{ + local config="/proc/config.gz" + [[ -f "${config}" ]] || config="/boot/config-$(uname -r)" + [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot" + + local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2) + if [[ "${pg_table_levels}" -lt 5 ]]; then + echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" + exit $ksft_skip + fi + + local mmu_support=$(grep -m1 "mmu" /proc/cpuinfo | awk '{print $3}') + if [[ "$mmu_support" != "radix" ]]; then + echo "$0: System does not use Radix MMU, required for 5-level paging" + exit $ksft_skip + fi + + local hugepages_total=$(awk '/HugePages_Total/ {print $2}' /proc/meminfo) + if [[ "${hugepages_total}" -eq 0 ]]; then + echo "$0: HugePages are not enabled, required for some tests" + exit $ksft_skip + fi +} + check_test_requirements() { # The test supports x86_64 and powerpc64. We currently have no useful @@ -50,6 +75,9 @@ check_test_requirements() "x86_64") check_supported_x86_64 ;; + "ppc64le"|"ppc64") + check_supported_ppc64 + ;; *) return 0 ;; diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index b60ac68a9dc8..6effafdc4d8a 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -5,11 +5,13 @@ #include <err.h> #include <strings.h> /* ffsl() */ #include <unistd.h> /* _SC_PAGESIZE */ +#include "../kselftest.h" #define BIT_ULL(nr) (1ULL << (nr)) #define PM_SOFT_DIRTY BIT_ULL(55) #define PM_MMAP_EXCLUSIVE BIT_ULL(56) #define PM_UFFD_WP BIT_ULL(57) +#define PM_GUARD_REGION BIT_ULL(58) #define PM_FILE BIT_ULL(61) #define PM_SWAP BIT_ULL(62) #define PM_PRESENT BIT_ULL(63) @@ -31,6 +33,23 @@ static inline unsigned int pshift(void) return __page_shift; } +/* + * Plan 9 FS has bugs (at least on QEMU) where certain operations fail with + * ENOENT on unlinked files. See + * https://gitlab.com/qemu-project/qemu/-/issues/103 for some info about such + * bugs. There are rumours of NFS implementations with similar bugs. + * + * Ideally, tests should just detect filesystems known to have such issues and + * bail early. But 9pfs has the additional "feature" that it causes fstatfs to + * pass through the f_type field from the host filesystem. To avoid having to + * scrape /proc/mounts or some other hackery, tests can call this function when + * it seems such a bug might have been encountered. + */ +static inline void skip_test_dodgy_fs(const char *op_name) +{ + ksft_test_result_skip("%s failed with ENOENT. Filesystem might be buggy (9pfs?)\n", op_name); +} + uint64_t pagemap_get_entry(int fd, char *start); bool pagemap_is_softdirty(int fd, char *start); bool pagemap_is_swapped(int fd, char *start); diff --git a/tools/testing/selftests/mseal_system_mappings/.gitignore b/tools/testing/selftests/mseal_system_mappings/.gitignore new file mode 100644 index 000000000000..319c497a595e --- /dev/null +++ b/tools/testing/selftests/mseal_system_mappings/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +sysmap_is_sealed diff --git a/tools/testing/selftests/mseal_system_mappings/Makefile b/tools/testing/selftests/mseal_system_mappings/Makefile new file mode 100644 index 000000000000..2b4504e2f52f --- /dev/null +++ b/tools/testing/selftests/mseal_system_mappings/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only +CFLAGS += -std=c99 -pthread -Wall $(KHDR_INCLUDES) + +TEST_GEN_PROGS := sysmap_is_sealed + +include ../lib.mk diff --git a/tools/testing/selftests/mseal_system_mappings/config b/tools/testing/selftests/mseal_system_mappings/config new file mode 100644 index 000000000000..675cb9f37b86 --- /dev/null +++ b/tools/testing/selftests/mseal_system_mappings/config @@ -0,0 +1 @@ +CONFIG_MSEAL_SYSTEM_MAPPINGS=y diff --git a/tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c b/tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c new file mode 100644 index 000000000000..0d2af30c3bf5 --- /dev/null +++ b/tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * test system mappings are sealed when + * KCONFIG_MSEAL_SYSTEM_MAPPINGS=y + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> +#include <stdbool.h> + +#include "../kselftest.h" +#include "../kselftest_harness.h" + +#define VMFLAGS "VmFlags:" +#define MSEAL_FLAGS "sl" +#define MAX_LINE_LEN 512 + +bool has_mapping(char *name, FILE *maps) +{ + char line[MAX_LINE_LEN]; + + while (fgets(line, sizeof(line), maps)) { + if (strstr(line, name)) + return true; + } + + return false; +} + +bool mapping_is_sealed(char *name, FILE *maps) +{ + char line[MAX_LINE_LEN]; + + while (fgets(line, sizeof(line), maps)) { + if (!strncmp(line, VMFLAGS, strlen(VMFLAGS))) { + if (strstr(line, MSEAL_FLAGS)) + return true; + + return false; + } + } + + return false; +} + +FIXTURE(basic) { + FILE *maps; +}; + +FIXTURE_SETUP(basic) +{ + self->maps = fopen("/proc/self/smaps", "r"); + if (!self->maps) + SKIP(return, "Could not open /proc/self/smap, errno=%d", + errno); +}; + +FIXTURE_TEARDOWN(basic) +{ + if (self->maps) + fclose(self->maps); +}; + +FIXTURE_VARIANT(basic) +{ + char *name; + bool sealed; +}; + +FIXTURE_VARIANT_ADD(basic, vdso) { + .name = "[vdso]", + .sealed = true, +}; + +FIXTURE_VARIANT_ADD(basic, vvar) { + .name = "[vvar]", + .sealed = true, +}; + +FIXTURE_VARIANT_ADD(basic, vvar_vclock) { + .name = "[vvar_vclock]", + .sealed = true, +}; + +FIXTURE_VARIANT_ADD(basic, sigpage) { + .name = "[sigpage]", + .sealed = true, +}; + +FIXTURE_VARIANT_ADD(basic, vectors) { + .name = "[vectors]", + .sealed = true, +}; + +FIXTURE_VARIANT_ADD(basic, uprobes) { + .name = "[uprobes]", + .sealed = true, +}; + +FIXTURE_VARIANT_ADD(basic, stack) { + .name = "[stack]", + .sealed = false, +}; + +TEST_F(basic, check_sealed) +{ + if (!has_mapping(variant->name, self->maps)) { + SKIP(return, "could not find the mapping, %s", + variant->name); + } + + EXPECT_EQ(variant->sealed, + mapping_is_sealed(variant->name, self->maps)); +}; + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh index d458b45c775b..3ef209cacb8e 100755 --- a/tools/testing/selftests/net/amt.sh +++ b/tools/testing/selftests/net/amt.sh @@ -194,15 +194,21 @@ test_remote_ip() send_mcast_torture4() { - ip netns exec "${SOURCE}" bash -c \ - 'cat /dev/urandom | head -c 1G | nc -w 1 -u 239.0.0.1 4001' + for i in `seq 10`; do + ip netns exec "${SOURCE}" bash -c \ + 'cat /dev/urandom | head -c 100M | nc -w 1 -u 239.0.0.1 4001' + echo -n "." + done } send_mcast_torture6() { - ip netns exec "${SOURCE}" bash -c \ - 'cat /dev/urandom | head -c 1G | nc -w 1 -u ff0e::5:6 6001' + for i in `seq 10`; do + ip netns exec "${SOURCE}" bash -c \ + 'cat /dev/urandom | head -c 100M | nc -w 1 -u ff0e::5:6 6001' + echo -n "." + done } check_features() @@ -278,10 +284,12 @@ wait $pid || err=$? if [ $err -eq 1 ]; then ERR=1 fi +printf "TEST: %-50s" "IPv4 amt traffic forwarding torture" send_mcast_torture4 -printf "TEST: %-60s [ OK ]\n" "IPv4 amt traffic forwarding torture" +printf " [ OK ]\n" +printf "TEST: %-50s" "IPv6 amt traffic forwarding torture" send_mcast_torture6 -printf "TEST: %-60s [ OK ]\n" "IPv6 amt traffic forwarding torture" +printf " [ OK ]\n" sleep 5 if [ "${ERR}" -eq 1 ]; then echo "Some tests failed." >&2 diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 975be4fdbcdb..701905eeff66 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -222,6 +222,31 @@ setup_ns() NS_LIST+=("${ns_list[@]}") } +# Create netdevsim with given id and net namespace. +create_netdevsim() { + local id="$1" + local ns="$2" + + modprobe netdevsim &> /dev/null + udevadm settle + + echo "$id 1" | ip netns exec $ns tee /sys/bus/netdevsim/new_device >/dev/null + local dev=$(ip netns exec $ns ls /sys/bus/netdevsim/devices/netdevsim$id/net) + ip -netns $ns link set dev $dev name nsim$id + ip -netns $ns link set dev nsim$id up + + echo nsim$id +} + +# Remove netdevsim with given id. +cleanup_netdevsim() { + local id="$1" + + if [ -d "/sys/bus/netdevsim/devices/netdevsim$id/net" ]; then + echo "$id" > /sys/bus/netdevsim/del_device + fi +} + tc_rule_stats_get() { local dev=$1; shift diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore index 49daae73c41e..833279fb34e2 100644 --- a/tools/testing/selftests/net/mptcp/.gitignore +++ b/tools/testing/selftests/net/mptcp/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only mptcp_connect +mptcp_diag mptcp_inq mptcp_sockopt pm_nl_ctl diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index d240d02fa443..c83a8b47bbdf 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -1270,7 +1270,7 @@ int main_loop(void) if (cfg_input && cfg_sockopt_types.mptfo) { fd_in = open(cfg_input, O_RDONLY); - if (fd < 0) + if (fd_in < 0) xerror("can't open %s:%d", cfg_input, errno); } @@ -1293,13 +1293,13 @@ again: if (cfg_input && !cfg_sockopt_types.mptfo) { fd_in = open(cfg_input, O_RDONLY); - if (fd < 0) + if (fd_in < 0) xerror("can't open %s:%d", cfg_input, errno); } ret = copyfd_io(fd_in, fd, 1, 0, &winfo); if (ret) - return ret; + goto out; if (cfg_truncate > 0) { shutdown(fd, SHUT_WR); @@ -1320,7 +1320,10 @@ again: close(fd); } - return 0; +out: + if (cfg_input) + close(fd_in); + return ret; } int parse_proto(const char *proto) diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh index 0be1905d1f2f..38871bdef67f 100755 --- a/tools/testing/selftests/net/netns-name.sh +++ b/tools/testing/selftests/net/netns-name.sh @@ -7,10 +7,12 @@ set -o pipefail DEV=dummy-dev0 DEV2=dummy-dev1 ALT_NAME=some-alt-name +NSIM_ADDR=2025 RET_CODE=0 cleanup() { + cleanup_netdevsim $NSIM_ADDR cleanup_ns $NS $test_ns } @@ -25,12 +27,15 @@ setup_ns NS test_ns # # Test basic move without a rename +# Use netdevsim because it has extra asserts for notifiers. # -ip -netns $NS link add name $DEV type dummy || fail -ip -netns $NS link set dev $DEV netns $test_ns || + +nsim=$(create_netdevsim $NSIM_ADDR $NS) +ip -netns $NS link set dev $nsim netns $test_ns || fail "Can't perform a netns move" -ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move" -ip -netns $test_ns link del $DEV || fail +ip -netns $test_ns link show dev $nsim >> /dev/null || + fail "Device not found after move" +cleanup_netdevsim $NSIM_ADDR # # Test move with a conflict diff --git a/tools/testing/selftests/net/rtnetlink.py b/tools/testing/selftests/net/rtnetlink.py index 80950888800b..e9ad5e88da97 100755 --- a/tools/testing/selftests/net/rtnetlink.py +++ b/tools/testing/selftests/net/rtnetlink.py @@ -12,10 +12,10 @@ def dump_mcaddr_check(rtnl: RtnlAddrFamily) -> None: At least the loopback interface should have this address. """ - addresses = rtnl.getmaddrs({"ifa-family": socket.AF_INET}, dump=True) + addresses = rtnl.getmulticast({"ifa-family": socket.AF_INET}, dump=True) all_host_multicasts = [ - addr for addr in addresses if addr['ifa-multicast'] == IPV4_ALL_HOSTS_MULTICAST + addr for addr in addresses if addr['multicast'] == IPV4_ALL_HOSTS_MULTICAST ] ksft_ge(len(all_host_multicasts), 1, diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c index 73b2f2276f3f..2c73bea698a6 100644 --- a/tools/testing/selftests/net/tcp_ao/self-connect.c +++ b/tools/testing/selftests/net/tcp_ao/self-connect.c @@ -16,6 +16,9 @@ static void __setup_lo_intf(const char *lo_intf, if (link_set_up(lo_intf)) test_error("Failed to bring %s up", lo_intf); + + if (ip_route_add(lo_intf, TEST_FAMILY, local_addr, local_addr)) + test_error("Failed to add a local route %s", lo_intf); } static void setup_lo_intf(const char *lo_intf) diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index c51ea90a1395..815fad8c53a8 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.bpf.o" +BPF_FILE="lib/xdp_dummy.bpf.o" cleanup() { local -r jobs="$(jobs -p)" diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index 17404f49cdb6..5f3d1a110d11 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.bpf.o" +BPF_FILE="lib/xdp_dummy.bpf.o" cleanup() { local -r jobs="$(jobs -p)" diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 550d8eb3e224..f22f6c66997e 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -3,7 +3,7 @@ source net_helper.sh -BPF_FILE="xdp_dummy.bpf.o" +BPF_FILE="lib/xdp_dummy.bpf.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 readonly DST=1 diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 6bb7dfaa30b6..9709dd067c72 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -BPF_FILE="xdp_dummy.bpf.o" +BPF_FILE="lib/xdp_dummy.bpf.o" readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" readonly BASE=`basename $STATS` readonly SRC=2 diff --git a/tools/testing/selftests/net/xdp_dummy.bpf.c b/tools/testing/selftests/net/xdp_dummy.bpf.c deleted file mode 100644 index d988b2e0cee8..000000000000 --- a/tools/testing/selftests/net/xdp_dummy.bpf.c +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#define KBUILD_MODNAME "xdp_dummy" -#include <linux/bpf.h> -#include <bpf/bpf_helpers.h> - -SEC("xdp") -int xdp_dummy_prog(struct xdp_md *ctx) -{ - return XDP_PASS; -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index cec22aa11cdf..55bcf81a2b9a 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -32,19 +32,19 @@ #endif #ifndef __NR_pidfd_open -#define __NR_pidfd_open -1 +#define __NR_pidfd_open 434 #endif #ifndef __NR_pidfd_send_signal -#define __NR_pidfd_send_signal -1 +#define __NR_pidfd_send_signal 424 #endif #ifndef __NR_clone3 -#define __NR_clone3 -1 +#define __NR_clone3 435 #endif #ifndef __NR_pidfd_getfd -#define __NR_pidfd_getfd -1 +#define __NR_pidfd_getfd 438 #endif #ifndef PIDFD_NONBLOCK diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c index a40541bb7c7d..5e96ef785d0d 100644 --- a/tools/testing/selftests/riscv/hwprobe/cbo.c +++ b/tools/testing/selftests/riscv/hwprobe/cbo.c @@ -50,6 +50,14 @@ static void cbo_clean(char *base) { cbo_insn(base, 1); } static void cbo_flush(char *base) { cbo_insn(base, 2); } static void cbo_zero(char *base) { cbo_insn(base, 4); } +static void test_no_cbo_inval(void *arg) +{ + ksft_print_msg("Testing cbo.inval instruction remain privileged\n"); + illegal_insn = false; + cbo_inval(&mem[0]); + ksft_test_result(illegal_insn, "No cbo.inval\n"); +} + static void test_no_zicbom(void *arg) { ksft_print_msg("Testing Zicbom instructions remain privileged\n"); @@ -61,10 +69,6 @@ static void test_no_zicbom(void *arg) illegal_insn = false; cbo_flush(&mem[0]); ksft_test_result(illegal_insn, "No cbo.flush\n"); - - illegal_insn = false; - cbo_inval(&mem[0]); - ksft_test_result(illegal_insn, "No cbo.inval\n"); } static void test_no_zicboz(void *arg) @@ -81,6 +85,30 @@ static bool is_power_of_2(__u64 n) return n != 0 && (n & (n - 1)) == 0; } +static void test_zicbom(void *arg) +{ + struct riscv_hwprobe pair = { + .key = RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE, + }; + cpu_set_t *cpus = (cpu_set_t *)arg; + __u64 block_size; + long rc; + + rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)cpus, 0); + block_size = pair.value; + ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE && + is_power_of_2(block_size), "Zicbom block size\n"); + ksft_print_msg("Zicbom block size: %llu\n", block_size); + + illegal_insn = false; + cbo_clean(&mem[block_size]); + ksft_test_result(!illegal_insn, "cbo.clean\n"); + + illegal_insn = false; + cbo_flush(&mem[block_size]); + ksft_test_result(!illegal_insn, "cbo.flush\n"); +} + static void test_zicboz(void *arg) { struct riscv_hwprobe pair = { @@ -129,7 +157,7 @@ static void test_zicboz(void *arg) ksft_test_result_pass("cbo.zero check\n"); } -static void check_no_zicboz_cpus(cpu_set_t *cpus) +static void check_no_zicbo_cpus(cpu_set_t *cpus, __u64 cbo) { struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_IMA_EXT_0, @@ -137,6 +165,7 @@ static void check_no_zicboz_cpus(cpu_set_t *cpus) cpu_set_t one_cpu; int i = 0, c = 0; long rc; + char *cbostr; while (i++ < CPU_COUNT(cpus)) { while (!CPU_ISSET(c, cpus)) @@ -148,10 +177,13 @@ static void check_no_zicboz_cpus(cpu_set_t *cpus) rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&one_cpu, 0); assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0); - if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ) - ksft_exit_fail_msg("Zicboz is only present on a subset of harts.\n" - "Use taskset to select a set of harts where Zicboz\n" - "presence (present or not) is consistent for each hart\n"); + cbostr = cbo == RISCV_HWPROBE_EXT_ZICBOZ ? "Zicboz" : "Zicbom"; + + if (pair.value & cbo) + ksft_exit_fail_msg("%s is only present on a subset of harts.\n" + "Use taskset to select a set of harts where %s\n" + "presence (present or not) is consistent for each hart\n", + cbostr, cbostr); ++c; } } @@ -159,7 +191,9 @@ static void check_no_zicboz_cpus(cpu_set_t *cpus) enum { TEST_ZICBOZ, TEST_NO_ZICBOZ, + TEST_ZICBOM, TEST_NO_ZICBOM, + TEST_NO_CBO_INVAL, }; static struct test_info { @@ -169,7 +203,9 @@ static struct test_info { } tests[] = { [TEST_ZICBOZ] = { .nr_tests = 3, test_zicboz }, [TEST_NO_ZICBOZ] = { .nr_tests = 1, test_no_zicboz }, - [TEST_NO_ZICBOM] = { .nr_tests = 3, test_no_zicbom }, + [TEST_ZICBOM] = { .nr_tests = 3, test_zicbom }, + [TEST_NO_ZICBOM] = { .nr_tests = 2, test_no_zicbom }, + [TEST_NO_CBO_INVAL] = { .nr_tests = 1, test_no_cbo_inval }, }; int main(int argc, char **argv) @@ -189,6 +225,7 @@ int main(int argc, char **argv) assert(rc == 0); tests[TEST_NO_ZICBOZ].enabled = true; tests[TEST_NO_ZICBOM].enabled = true; + tests[TEST_NO_CBO_INVAL].enabled = true; } rc = sched_getaffinity(0, sizeof(cpu_set_t), &cpus); @@ -206,7 +243,14 @@ int main(int argc, char **argv) tests[TEST_ZICBOZ].enabled = true; tests[TEST_NO_ZICBOZ].enabled = false; } else { - check_no_zicboz_cpus(&cpus); + check_no_zicbo_cpus(&cpus, RISCV_HWPROBE_EXT_ZICBOZ); + } + + if (pair.value & RISCV_HWPROBE_EXT_ZICBOM) { + tests[TEST_ZICBOM].enabled = true; + tests[TEST_NO_ZICBOM].enabled = false; + } else { + check_no_zicbo_cpus(&cpus, RISCV_HWPROBE_EXT_ZICBOM); } for (i = 0; i < ARRAY_SIZE(tests); ++i) diff --git a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c index 35c0812e32de..4dde05e45a04 100644 --- a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c +++ b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c @@ -6,7 +6,7 @@ * the values. To further ensure consistency, this file is compiled without * libc and without auto-vectorization. * - * To be "clean" all values must be either all ones or all zeroes. + * To be "clean" all values must be all zeroes. */ #define __stringify_1(x...) #x @@ -14,9 +14,8 @@ int main(int argc, char **argv) { - char prev_value = 0, value; + char value = 0; unsigned long vl; - int first = 1; if (argc > 2 && strcmp(argv[2], "x")) asm volatile ( @@ -44,14 +43,11 @@ int main(int argc, char **argv) "vsrl.vi " __stringify(register) ", " __stringify(register) ", 8\n\t" \ ".option pop\n\t" \ : "=r" (value)); \ - if (first) { \ - first = 0; \ - } else if (value != prev_value || !(value == 0x00 || value == 0xff)) { \ + if (value != 0x00) { \ printf("Register " __stringify(register) \ " values not clean! value: %u\n", value); \ exit(-1); \ } \ - prev_value = value; \ } \ }) diff --git a/tools/testing/selftests/rtc/.gitignore b/tools/testing/selftests/rtc/.gitignore index fb2d533aa575..a2afe7994e85 100644 --- a/tools/testing/selftests/rtc/.gitignore +++ b/tools/testing/selftests/rtc/.gitignore @@ -1,3 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only rtctest -setdate diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile index 9dbb395c5c79..547c244a2ca5 100644 --- a/tools/testing/selftests/rtc/Makefile +++ b/tools/testing/selftests/rtc/Makefile @@ -4,8 +4,6 @@ LDLIBS += -lrt -lpthread -lm TEST_GEN_PROGS = rtctest -TEST_GEN_PROGS_EXTENDED = setdate - TEST_FILES := settings include ../lib.mk diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c index e103097d0b5b..be175c0e6ae3 100644 --- a/tools/testing/selftests/rtc/rtctest.c +++ b/tools/testing/selftests/rtc/rtctest.c @@ -29,6 +29,7 @@ enum rtc_alarm_state { RTC_ALARM_UNKNOWN, RTC_ALARM_ENABLED, RTC_ALARM_DISABLED, + RTC_ALARM_RES_MINUTE, }; FIXTURE(rtc) { @@ -88,7 +89,7 @@ static void nanosleep_with_retries(long ns) } } -static enum rtc_alarm_state get_rtc_alarm_state(int fd) +static enum rtc_alarm_state get_rtc_alarm_state(int fd, int need_seconds) { struct rtc_param param = { 0 }; int rc; @@ -103,6 +104,10 @@ static enum rtc_alarm_state get_rtc_alarm_state(int fd) if ((param.uvalue & _BITUL(RTC_FEATURE_ALARM)) == 0) return RTC_ALARM_DISABLED; + /* Check if alarm has desired granularity */ + if (need_seconds && (param.uvalue & _BITUL(RTC_FEATURE_ALARM_RES_MINUTE))) + return RTC_ALARM_RES_MINUTE; + return RTC_ALARM_ENABLED; } @@ -227,9 +232,11 @@ TEST_F(rtc, alarm_alm_set) { SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); - alarm_state = get_rtc_alarm_state(self->fd); + alarm_state = get_rtc_alarm_state(self->fd, 1); if (alarm_state == RTC_ALARM_DISABLED) SKIP(return, "Skipping test since alarms are not supported."); + if (alarm_state == RTC_ALARM_RES_MINUTE) + SKIP(return, "Skipping test since alarms has only minute granularity."); rc = ioctl(self->fd, RTC_RD_TIME, &tm); ASSERT_NE(-1, rc); @@ -295,9 +302,11 @@ TEST_F(rtc, alarm_wkalm_set) { SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); - alarm_state = get_rtc_alarm_state(self->fd); + alarm_state = get_rtc_alarm_state(self->fd, 1); if (alarm_state == RTC_ALARM_DISABLED) SKIP(return, "Skipping test since alarms are not supported."); + if (alarm_state == RTC_ALARM_RES_MINUTE) + SKIP(return, "Skipping test since alarms has only minute granularity."); rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time); ASSERT_NE(-1, rc); @@ -357,7 +366,7 @@ TEST_F_TIMEOUT(rtc, alarm_alm_set_minute, 65) { SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); - alarm_state = get_rtc_alarm_state(self->fd); + alarm_state = get_rtc_alarm_state(self->fd, 0); if (alarm_state == RTC_ALARM_DISABLED) SKIP(return, "Skipping test since alarms are not supported."); @@ -425,7 +434,7 @@ TEST_F_TIMEOUT(rtc, alarm_wkalm_set_minute, 65) { SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); - alarm_state = get_rtc_alarm_state(self->fd); + alarm_state = get_rtc_alarm_state(self->fd, 0); if (alarm_state == RTC_ALARM_DISABLED) SKIP(return, "Skipping test since alarms are not supported."); diff --git a/tools/testing/selftests/rtc/setdate.c b/tools/testing/selftests/rtc/setdate.c deleted file mode 100644 index b303890b3de2..000000000000 --- a/tools/testing/selftests/rtc/setdate.c +++ /dev/null @@ -1,77 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* Real Time Clock Driver Test - * by: Benjamin Gaignard (benjamin.gaignard@linaro.org) - * - * To build - * gcc rtctest_setdate.c -o rtctest_setdate - */ - -#include <stdio.h> -#include <linux/rtc.h> -#include <sys/ioctl.h> -#include <sys/time.h> -#include <sys/types.h> -#include <fcntl.h> -#include <unistd.h> -#include <stdlib.h> -#include <errno.h> - -static const char default_time[] = "00:00:00"; - -int main(int argc, char **argv) -{ - int fd, retval; - struct rtc_time new, current; - const char *rtc, *date; - const char *time = default_time; - - switch (argc) { - case 4: - time = argv[3]; - /* FALLTHROUGH */ - case 3: - date = argv[2]; - rtc = argv[1]; - break; - default: - fprintf(stderr, "usage: rtctest_setdate <rtcdev> <DD-MM-YYYY> [HH:MM:SS]\n"); - return 1; - } - - fd = open(rtc, O_RDONLY); - if (fd == -1) { - perror(rtc); - exit(errno); - } - - sscanf(date, "%d-%d-%d", &new.tm_mday, &new.tm_mon, &new.tm_year); - new.tm_mon -= 1; - new.tm_year -= 1900; - sscanf(time, "%d:%d:%d", &new.tm_hour, &new.tm_min, &new.tm_sec); - - fprintf(stderr, "Test will set RTC date/time to %d-%d-%d, %02d:%02d:%02d.\n", - new.tm_mday, new.tm_mon + 1, new.tm_year + 1900, - new.tm_hour, new.tm_min, new.tm_sec); - - /* Write the new date in RTC */ - retval = ioctl(fd, RTC_SET_TIME, &new); - if (retval == -1) { - perror("RTC_SET_TIME ioctl"); - close(fd); - exit(errno); - } - - /* Read back */ - retval = ioctl(fd, RTC_RD_TIME, ¤t); - if (retval == -1) { - perror("RTC_RD_TIME ioctl"); - exit(errno); - } - - fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n", - current.tm_mday, current.tm_mon + 1, current.tm_year + 1900, - current.tm_hour, current.tm_min, current.tm_sec); - - close(fd); - return 0; -} diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json index ee2792998c89..4f21aeb8a3fb 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json @@ -305,7 +305,7 @@ "cmdUnderTest": "$TC actions add action nat ingress default 10.10.10.1 index 12", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 12", - "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref", + "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/0 10.10.10.1 pass.*index 12 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -332,7 +332,7 @@ "cmdUnderTest": "$TC actions add action nat ingress any 10.10.10.1 index 12", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 12", - "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref", + "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/0 10.10.10.1 pass.*index 12 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -359,7 +359,7 @@ "cmdUnderTest": "$TC actions add action nat ingress all 10.10.10.1 index 12", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 12", - "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref", + "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/0 10.10.10.1 pass.*index 12 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -548,7 +548,7 @@ "cmdUnderTest": "$TC actions add action nat egress default 20.20.20.1 pipe index 10", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 10", - "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref", + "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -575,7 +575,7 @@ "cmdUnderTest": "$TC actions add action nat egress any 20.20.20.1 pipe index 10", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 10", - "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref", + "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -602,7 +602,7 @@ "cmdUnderTest": "$TC actions add action nat egress all 20.20.20.1 pipe index 10", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 10", - "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref", + "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -629,7 +629,7 @@ "cmdUnderTest": "$TC actions add action nat egress all 20.20.20.1 pipe index 10 cookie aa1bc2d3eeff112233445566778800a1", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 10", - "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref.*cookie aa1bc2d3eeff112233445566778800a1", + "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref.*cookie aa1bc2d3eeff112233445566778800a1", "matchCount": "1", "teardown": [ "$TC actions flush action nat" diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 9044ac054167..25454fd95537 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -126,5 +126,37 @@ "$TC qdisc del dev $DUMMY root handle 1: drr", "$IP addr del 10.10.10.10/24 dev $DUMMY" ] - } + }, + { + "id": "c024", + "name": "Test TBF with SKBPRIO - catch qlen corner cases", + "category": [ + "qdisc", + "tbf", + "skbprio" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root tbf rate 100bit burst 2000 limit 1000", + "$TC qdisc add dev $DUMMY parent 1: handle 10: skbprio limit 1", + "ping -c 1 -W 0.1 -Q 0x00 -s 1400 -I $DUMMY 10.10.10.1 > /dev/null || true", + "ping -c 1 -W 0.1 -Q 0x1c -s 1400 -I $DUMMY 10.10.10.1 > /dev/null || true", + "ping -c 1 -W 0.1 -Q 0x00 -s 1400 -I $DUMMY 10.10.10.1 > /dev/null || true", + "ping -c 1 -W 0.1 -Q 0x1c -s 1400 -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.5" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc skbprio'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + } ] diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index 7817afe29005..c7781efea0f3 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -4,6 +4,8 @@ CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir) LDLIBS += -lpthread -lm -luring TEST_PROGS := test_generic_01.sh +TEST_PROGS += test_generic_02.sh +TEST_PROGS += test_generic_03.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh @@ -11,8 +13,11 @@ TEST_PROGS += test_loop_01.sh TEST_PROGS += test_loop_02.sh TEST_PROGS += test_loop_03.sh TEST_PROGS += test_loop_04.sh +TEST_PROGS += test_loop_05.sh TEST_PROGS += test_stripe_01.sh TEST_PROGS += test_stripe_02.sh +TEST_PROGS += test_stripe_03.sh +TEST_PROGS += test_stripe_04.sh TEST_PROGS += test_stress_01.sh TEST_PROGS += test_stress_02.sh diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 05147b53c361..91c282bc7674 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -99,7 +99,7 @@ static int __ublk_ctrl_cmd(struct ublk_dev *dev, static int ublk_ctrl_stop_dev(struct ublk_dev *dev) { struct ublk_ctrl_cmd_data data = { - .cmd_op = UBLK_CMD_STOP_DEV, + .cmd_op = UBLK_U_CMD_STOP_DEV, }; return __ublk_ctrl_cmd(dev, &data); @@ -169,7 +169,7 @@ static int ublk_ctrl_get_params(struct ublk_dev *dev, struct ublk_params *params) { struct ublk_ctrl_cmd_data data = { - .cmd_op = UBLK_CMD_GET_PARAMS, + .cmd_op = UBLK_U_CMD_GET_PARAMS, .flags = CTRL_CMD_HAS_BUF, .addr = (__u64)params, .len = sizeof(*params), @@ -215,7 +215,7 @@ static void ublk_ctrl_dump(struct ublk_dev *dev) ret = ublk_ctrl_get_params(dev, &p); if (ret < 0) { - ublk_err("failed to get params %m\n"); + ublk_err("failed to get params %d %s\n", ret, strerror(-ret)); return; } @@ -322,7 +322,7 @@ static int ublk_queue_init(struct ublk_queue *q) cmd_buf_size = ublk_queue_cmd_buf_sz(q); off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); - q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ, + q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ, MAP_SHARED | MAP_POPULATE, dev->fds[0], off); if (q->io_cmd_buf == MAP_FAILED) { ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n", diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index f31a5c4d4143..760ff8ffb810 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -128,7 +128,7 @@ struct ublk_queue { unsigned int io_inflight; struct ublk_dev *dev; const struct ublk_tgt_ops *tgt_ops; - char *io_cmd_buf; + struct ublksrv_io_desc *io_cmd_buf; struct io_uring ring; struct ublk_io ios[UBLK_QUEUE_DEPTH]; #define UBLKSRV_QUEUE_STOPPING (1U << 0) @@ -302,7 +302,7 @@ static inline void ublk_mark_io_done(struct ublk_io *io, int res) static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag) { - return (struct ublksrv_io_desc *)&(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]); + return &q->io_cmd_buf[tag]; } static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op) diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c index 899875ff50fe..91fec3690d4b 100644 --- a/tools/testing/selftests/ublk/null.c +++ b/tools/testing/selftests/ublk/null.c @@ -17,7 +17,8 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) dev->tgt.dev_size = dev_size; dev->tgt.params = (struct ublk_params) { - .types = UBLK_PARAM_TYPE_BASIC, + .types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DMA_ALIGN | + UBLK_PARAM_TYPE_SEGMENT, .basic = { .logical_bs_shift = 9, .physical_bs_shift = 12, @@ -26,6 +27,14 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) .max_sectors = info->max_io_buf_bytes >> 9, .dev_sectors = dev_size >> 9, }, + .dma = { + .alignment = 4095, + }, + .seg = { + .seg_boundary_mask = 4095, + .max_segment_size = 32 << 10, + .max_segments = 32, + }, }; if (info->flags & UBLK_F_SUPPORT_ZERO_COPY) diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 98c564b12f3c..179731c3dd6f 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -111,43 +111,67 @@ static void calculate_stripe_array(const struct stripe_conf *conf, } } -static inline enum io_uring_op stripe_to_uring_op(const struct ublksrv_io_desc *iod) +static inline enum io_uring_op stripe_to_uring_op( + const struct ublksrv_io_desc *iod, int zc) { unsigned ublk_op = ublksrv_get_op(iod); if (ublk_op == UBLK_IO_OP_READ) - return IORING_OP_READV; + return zc ? IORING_OP_READV_FIXED : IORING_OP_READV; else if (ublk_op == UBLK_IO_OP_WRITE) - return IORING_OP_WRITEV; + return zc ? IORING_OP_WRITEV_FIXED : IORING_OP_WRITEV; assert(0); } static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) { const struct stripe_conf *conf = get_chunk_shift(q); - enum io_uring_op op = stripe_to_uring_op(iod); + int zc = !!(ublk_queue_use_zc(q) != 0); + enum io_uring_op op = stripe_to_uring_op(iod, zc); struct io_uring_sqe *sqe[NR_STRIPE]; struct stripe_array *s = alloc_stripe_array(conf, iod); struct ublk_io *io = ublk_get_io(q, tag); - int i; + int i, extra = zc ? 2 : 0; io->private_data = s; calculate_stripe_array(conf, iod, s); - ublk_queue_alloc_sqes(q, sqe, s->nr); - for (i = 0; i < s->nr; i++) { - struct stripe *t = &s->s[i]; + ublk_queue_alloc_sqes(q, sqe, s->nr + extra); + + if (zc) { + io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag); + sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; + sqe[0]->user_data = build_user_data(tag, + ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1); + } + + for (i = zc; i < s->nr + extra - zc; i++) { + struct stripe *t = &s->s[i - zc]; io_uring_prep_rw(op, sqe[i], t->seq + 1, (void *)t->vec, t->nr_vec, t->start << 9); - io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); + if (zc) { + sqe[i]->buf_index = tag; + io_uring_sqe_set_flags(sqe[i], + IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK); + } else { + io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); + } /* bit63 marks us as tgt io */ - sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i, 1); + sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, 1); + } + if (zc) { + struct io_uring_sqe *unreg = sqe[s->nr + 1]; + + io_uring_prep_buf_unregister(unreg, 0, tag, q->q_id, tag); + unreg->user_data = build_user_data(tag, ublk_cmd_op_nr(unreg->cmd_op), 0, 1); } - return s->nr; + + /* register buffer is skip_success */ + return s->nr + zc; } static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) @@ -208,19 +232,27 @@ static void ublk_stripe_io_done(struct ublk_queue *q, int tag, struct ublk_io *io = ublk_get_io(q, tag); int res = cqe->res; - if (res < 0) { + if (res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) { if (!io->result) io->result = res; - ublk_err("%s: io failure %d tag %u\n", __func__, res, tag); + if (res < 0) + ublk_err("%s: io failure %d tag %u\n", __func__, res, tag); } + /* buffer register op is IOSQE_CQE_SKIP_SUCCESS */ + if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF)) + io->tgt_ios += 1; + /* fail short READ/WRITE simply */ if (op == UBLK_IO_OP_READ || op == UBLK_IO_OP_WRITE) { unsigned seq = user_data_to_tgt_data(cqe->user_data); struct stripe_array *s = io->private_data; - if (res < s->s[seq].vec->iov_len) + if (res < s->s[seq].nr_sects << 9) { io->result = -EIO; + ublk_err("%s: short rw op %u res %d exp %u tag %u\n", + __func__, op, res, s->s[seq].vec->iov_len, tag); + } } if (ublk_completed_tgt_io(q, tag)) { @@ -253,7 +285,7 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) struct stripe_conf *conf; unsigned chunk_shift; loff_t bytes = 0; - int ret, i; + int ret, i, mul = 1; if ((chunk_size & (chunk_size - 1)) || !chunk_size) { ublk_err("invalid chunk size %u\n", chunk_size); @@ -295,8 +327,11 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) dev->tgt.dev_size = bytes; p.basic.dev_sectors = bytes >> 9; dev->tgt.params = p; - dev->tgt.sq_depth = dev->dev_info.queue_depth * conf->nr_files; - dev->tgt.cq_depth = dev->dev_info.queue_depth * conf->nr_files; + + if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) + mul = 2; + dev->tgt.sq_depth = mul * dev->dev_info.queue_depth * conf->nr_files; + dev->tgt.cq_depth = mul * dev->dev_info.queue_depth * conf->nr_files; printf("%s: shift %u files %u\n", __func__, conf->shift, conf->nr_files); diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index 75f54ac6b1c4..a88b35943227 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -23,6 +23,12 @@ _get_disk_dev_t() { echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) )) } +_run_fio_verify_io() { + fio --name=verify --rw=randwrite --direct=1 --ioengine=libaio \ + --bs=8k --iodepth=32 --verify=crc32c --do_verify=1 \ + --verify_state_save=0 "$@" > /dev/null +} + _create_backfile() { local my_size=$1 local my_file diff --git a/tools/testing/selftests/ublk/test_generic_02.sh b/tools/testing/selftests/ublk/test_generic_02.sh new file mode 100755 index 000000000000..3e80121e3bf5 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_02.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_02" +ERR_CODE=0 + +if ! _have_program bpftrace; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "sequential io order for MQ" + +dev_id=$(_add_ublk_dev -t null -q 2) +_check_add_dev $TID $? + +dev_t=$(_get_disk_dev_t "$dev_id") +bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 & +btrace_pid=$! +sleep 2 + +if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then + _cleanup_test "null" + exit "$UBLK_SKIP_CODE" +fi + +# run fio over this ublk disk +fio --name=write_seq \ + --filename=/dev/ublkb"${dev_id}" \ + --ioengine=libaio --iodepth=16 \ + --rw=write \ + --size=512M \ + --direct=1 \ + --bs=4k > /dev/null 2>&1 +ERR_CODE=$? +kill "$btrace_pid" +wait +if grep -q "io_out_of_order" "$UBLK_TMP"; then + cat "$UBLK_TMP" + ERR_CODE=255 +fi +_cleanup_test "null" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_03.sh b/tools/testing/selftests/ublk/test_generic_03.sh new file mode 100755 index 000000000000..b551aa76cb0d --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_03.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_03" +ERR_CODE=0 + +_prep_test "null" "check dma & segment limits for zero copy" + +dev_id=$(_add_ublk_dev -t null -z) +_check_add_dev $TID $? + +sysfs_path=/sys/block/ublkb"${dev_id}" +dma_align=$(cat "$sysfs_path"/queue/dma_alignment) +max_segments=$(cat "$sysfs_path"/queue/max_segments) +max_segment_size=$(cat "$sysfs_path"/queue/max_segment_size) +if [ "$dma_align" != "4095" ]; then + ERR_CODE=255 +fi +if [ "$max_segments" != "32" ]; then + ERR_CODE=255 +fi +if [ "$max_segment_size" != "32768" ]; then + ERR_CODE=255 +fi +_cleanup_test "null" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_01.sh b/tools/testing/selftests/ublk/test_loop_01.sh index c882d2a08e13..1ef8b6044777 100755 --- a/tools/testing/selftests/ublk/test_loop_01.sh +++ b/tools/testing/selftests/ublk/test_loop_01.sh @@ -6,6 +6,10 @@ TID="loop_01" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "loop" "write and verify test" backfile_0=$(_create_backfile 256M) @@ -14,15 +18,7 @@ dev_id=$(_add_ublk_dev -t loop "$backfile_0") _check_add_dev $TID $? "${backfile_0}" # run fio over the ublk disk -fio --name=write_and_verify \ - --filename=/dev/ublkb"${dev_id}" \ - --ioengine=libaio --iodepth=16 \ - --rw=write \ - --size=256M \ - --direct=1 \ - --verify=crc32c \ - --do_verify=1 \ - --bs=4k > /dev/null 2>&1 +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M ERR_CODE=$? _cleanup_test "loop" diff --git a/tools/testing/selftests/ublk/test_loop_03.sh b/tools/testing/selftests/ublk/test_loop_03.sh index 269c96787d7d..e9ca744de8b1 100755 --- a/tools/testing/selftests/ublk/test_loop_03.sh +++ b/tools/testing/selftests/ublk/test_loop_03.sh @@ -6,6 +6,10 @@ TID="loop_03" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "loop" "write and verify over zero copy" backfile_0=$(_create_backfile 256M) @@ -13,15 +17,7 @@ dev_id=$(_add_ublk_dev -t loop -z "$backfile_0") _check_add_dev $TID $? "$backfile_0" # run fio over the ublk disk -fio --name=write_and_verify \ - --filename=/dev/ublkb"${dev_id}" \ - --ioengine=libaio --iodepth=64 \ - --rw=write \ - --size=256M \ - --direct=1 \ - --verify=crc32c \ - --do_verify=1 \ - --bs=4k > /dev/null 2>&1 +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M ERR_CODE=$? _cleanup_test "loop" diff --git a/tools/testing/selftests/ublk/test_loop_05.sh b/tools/testing/selftests/ublk/test_loop_05.sh new file mode 100755 index 000000000000..2e6e2e6978fc --- /dev/null +++ b/tools/testing/selftests/ublk/test_loop_05.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="loop_05" +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "loop" "write and verify test" + +backfile_0=$(_create_backfile 256M) + +dev_id=$(_add_ublk_dev -q 2 -t loop "$backfile_0") +_check_add_dev $TID $? "${backfile_0}" + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M +ERR_CODE=$? + +_cleanup_test "loop" + +_remove_backfile "$backfile_0" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh index 7177f6c57bc5..a8be24532b24 100755 --- a/tools/testing/selftests/ublk/test_stress_01.sh +++ b/tools/testing/selftests/ublk/test_stress_01.sh @@ -27,20 +27,20 @@ ublk_io_and_remove() _prep_test "stress" "run IO and remove device" -ublk_io_and_remove 8G -t null +ublk_io_and_remove 8G -t null -q 4 ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi BACK_FILE=$(_create_backfile 256M) -ublk_io_and_remove 256M -t loop "${BACK_FILE}" +ublk_io_and_remove 256M -t loop -q 4 "${BACK_FILE}" ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi -ublk_io_and_remove 256M -t loop -z "${BACK_FILE}" +ublk_io_and_remove 256M -t loop -q 4 -z "${BACK_FILE}" ERR_CODE=$? _cleanup_test "stress" _remove_backfile "${BACK_FILE}" diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh index 2a8e60579a06..2159e4cc8140 100755 --- a/tools/testing/selftests/ublk/test_stress_02.sh +++ b/tools/testing/selftests/ublk/test_stress_02.sh @@ -27,20 +27,20 @@ ublk_io_and_kill_daemon() _prep_test "stress" "run IO and kill ublk server" -ublk_io_and_kill_daemon 8G -t null +ublk_io_and_kill_daemon 8G -t null -q 4 ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi BACK_FILE=$(_create_backfile 256M) -ublk_io_and_kill_daemon 256M -t loop "${BACK_FILE}" +ublk_io_and_kill_daemon 256M -t loop -q 4 "${BACK_FILE}" ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi -ublk_io_and_kill_daemon 256M -t loop -z "${BACK_FILE}" +ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${BACK_FILE}" ERR_CODE=$? _cleanup_test "stress" _remove_backfile "${BACK_FILE}" diff --git a/tools/testing/selftests/ublk/test_stripe_01.sh b/tools/testing/selftests/ublk/test_stripe_01.sh index c01f3dc325ab..7e387ef656ea 100755 --- a/tools/testing/selftests/ublk/test_stripe_01.sh +++ b/tools/testing/selftests/ublk/test_stripe_01.sh @@ -6,6 +6,10 @@ TID="stripe_01" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "stripe" "write and verify test" backfile_0=$(_create_backfile 256M) @@ -15,15 +19,7 @@ dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1") _check_add_dev $TID $? "${backfile_0}" # run fio over the ublk disk -fio --name=write_and_verify \ - --filename=/dev/ublkb"${dev_id}" \ - --ioengine=libaio --iodepth=32 \ - --rw=write \ - --size=512M \ - --direct=1 \ - --verify=crc32c \ - --do_verify=1 \ - --bs=4k > /dev/null 2>&1 +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M ERR_CODE=$? _cleanup_test "stripe" diff --git a/tools/testing/selftests/ublk/test_stripe_03.sh b/tools/testing/selftests/ublk/test_stripe_03.sh new file mode 100755 index 000000000000..c1b34af36145 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_03.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="stripe_03" +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stripe" "write and verify test" + +backfile_0=$(_create_backfile 256M) +backfile_1=$(_create_backfile 256M) + +dev_id=$(_add_ublk_dev -q 2 -t stripe "$backfile_0" "$backfile_1") +_check_add_dev $TID $? "${backfile_0}" + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M +ERR_CODE=$? + +_cleanup_test "stripe" + +_remove_backfile "$backfile_0" +_remove_backfile "$backfile_1" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c index d53959e03593..94bee6e0c813 100644 --- a/tools/testing/selftests/x86/test_mremap_vdso.c +++ b/tools/testing/selftests/x86/test_mremap_vdso.c @@ -14,6 +14,7 @@ #include <errno.h> #include <unistd.h> #include <string.h> +#include <stdbool.h> #include <sys/mman.h> #include <sys/auxv.h> @@ -55,13 +56,55 @@ static int try_to_remap(void *vdso_addr, unsigned long size) } +#define VDSO_NAME "[vdso]" +#define VMFLAGS "VmFlags:" +#define MSEAL_FLAGS "sl" +#define MAX_LINE_LEN 512 + +bool vdso_sealed(FILE *maps) +{ + char line[MAX_LINE_LEN]; + bool has_vdso = false; + + while (fgets(line, sizeof(line), maps)) { + if (strstr(line, VDSO_NAME)) + has_vdso = true; + + if (has_vdso && !strncmp(line, VMFLAGS, strlen(VMFLAGS))) { + if (strstr(line, MSEAL_FLAGS)) + return true; + + return false; + } + } + + return false; +} + int main(int argc, char **argv, char **envp) { pid_t child; + FILE *maps; ksft_print_header(); ksft_set_plan(1); + maps = fopen("/proc/self/smaps", "r"); + if (!maps) { + ksft_test_result_skip( + "Could not open /proc/self/smaps, errno=%d\n", + errno); + + return 0; + } + + if (vdso_sealed(maps)) { + ksft_test_result_skip("vdso is sealed\n"); + return 0; + } + + fclose(maps); + child = fork(); if (child == -1) ksft_exit_fail_msg("failed to fork (%d): %m\n", errno); diff --git a/tools/testing/shared/interval_tree-shim.c b/tools/testing/shared/interval_tree-shim.c new file mode 100644 index 000000000000..122e74756571 --- /dev/null +++ b/tools/testing/shared/interval_tree-shim.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Very simple shim around the interval tree. */ + +#include "../../../lib/interval_tree.c" diff --git a/tools/testing/shared/linux/interval_tree.h b/tools/testing/shared/linux/interval_tree.h new file mode 100644 index 000000000000..129faf9f1d0a --- /dev/null +++ b/tools/testing/shared/linux/interval_tree.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TEST_INTERVAL_TREE_H +#define _TEST_INTERVAL_TREE_H + +#include "../../../../include/linux/interval_tree.h" + +#endif /* _TEST_INTERVAL_TREE_H */ diff --git a/tools/testing/shared/linux/interval_tree_generic.h b/tools/testing/shared/linux/interval_tree_generic.h new file mode 100644 index 000000000000..34cd654bee61 --- /dev/null +++ b/tools/testing/shared/linux/interval_tree_generic.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "../../../../include/linux/interval_tree_generic.h" diff --git a/tools/testing/shared/linux/rbtree.h b/tools/testing/shared/linux/rbtree.h new file mode 100644 index 000000000000..d644bb7360bf --- /dev/null +++ b/tools/testing/shared/linux/rbtree.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TEST_RBTREE_H +#define _TEST_RBTREE_H + +#include <linux/kernel.h> +#include "../../../../include/linux/rbtree.h" + +#endif /* _TEST_RBTREE_H */ diff --git a/tools/testing/shared/linux/rbtree_augmented.h b/tools/testing/shared/linux/rbtree_augmented.h new file mode 100644 index 000000000000..ad138fcf6652 --- /dev/null +++ b/tools/testing/shared/linux/rbtree_augmented.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TEST_RBTREE_AUGMENTED_H +#define _TEST_RBTREE_AUGMENTED_H + +#include "../../../../include/linux/rbtree_augmented.h" + +#endif /* _TEST_RBTREE_AUGMENTED_H */ diff --git a/tools/testing/shared/linux/rbtree_types.h b/tools/testing/shared/linux/rbtree_types.h new file mode 100644 index 000000000000..194194a5bf92 --- /dev/null +++ b/tools/testing/shared/linux/rbtree_types.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TEST_RBTREE_TYPES_H +#define _TEST_RBTREE_TYPES_H + +#include "../../../../include/linux/rbtree_types.h" + +#endif /* _TEST_RBTREE_TYPES_H */ + diff --git a/tools/testing/shared/rbtree-shim.c b/tools/testing/shared/rbtree-shim.c new file mode 100644 index 000000000000..7692a993e5f1 --- /dev/null +++ b/tools/testing/shared/rbtree-shim.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Very simple shim around the rbtree. */ + +#include "../../../lib/rbtree.c" + diff --git a/tools/testing/vma/linux/atomic.h b/tools/testing/vma/linux/atomic.h index 3e1b6adc027b..788c597c4fde 100644 --- a/tools/testing/vma/linux/atomic.h +++ b/tools/testing/vma/linux/atomic.h @@ -9,4 +9,9 @@ #define atomic_set(x, y) uatomic_set(x, y) #define U8_MAX UCHAR_MAX +#ifndef atomic_cmpxchg_relaxed +#define atomic_cmpxchg_relaxed uatomic_cmpxchg +#define atomic_cmpxchg_release uatomic_cmpxchg +#endif /* atomic_cmpxchg_relaxed */ + #endif /* _LINUX_ATOMIC_H */ diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c index 04ab45e27fb8..11f761769b5b 100644 --- a/tools/testing/vma/vma.c +++ b/tools/testing/vma/vma.c @@ -74,11 +74,23 @@ static struct vm_area_struct *alloc_vma(struct mm_struct *mm, ret->vm_end = end; ret->vm_pgoff = pgoff; ret->__vm_flags = flags; + vma_assert_detached(ret); return ret; } /* Helper function to allocate a VMA and link it to the tree. */ +static int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma) +{ + int res; + + res = vma_link(mm, vma); + if (!res) + vma_assert_attached(vma); + return res; +} + +/* Helper function to allocate a VMA and link it to the tree. */ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, unsigned long start, unsigned long end, @@ -90,7 +102,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, if (vma == NULL) return NULL; - if (vma_link(mm, vma)) { + if (attach_vma(mm, vma)) { vm_area_free(vma); return NULL; } @@ -108,6 +120,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, /* Helper function which provides a wrapper around a merge new VMA operation. */ static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg) { + struct vm_area_struct *vma; /* * For convenience, get prev and next VMAs. Which the new VMA operation * requires. @@ -116,7 +129,11 @@ static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg) vmg->prev = vma_prev(vmg->vmi); vma_iter_next_range(vmg->vmi); - return vma_merge_new_range(vmg); + vma = vma_merge_new_range(vmg); + if (vma) + vma_assert_attached(vma); + + return vma; } /* @@ -125,7 +142,12 @@ static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg) */ static struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg) { - return vma_merge_existing_range(vmg); + struct vm_area_struct *vma; + + vma = vma_merge_existing_range(vmg); + if (vma) + vma_assert_attached(vma); + return vma; } /* @@ -147,13 +169,20 @@ static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start, vma_iter_set(vmg->vmi, start); vmg->prev = NULL; + vmg->middle = NULL; vmg->next = NULL; - vmg->vma = NULL; + vmg->target = NULL; vmg->start = start; vmg->end = end; vmg->pgoff = pgoff; vmg->flags = flags; + + vmg->just_expand = false; + vmg->__remove_middle = false; + vmg->__remove_next = false; + vmg->__adjust_middle_start = false; + vmg->__adjust_next_start = false; } /* @@ -253,8 +282,8 @@ static bool test_simple_merge(void) .pgoff = 1, }; - ASSERT_FALSE(vma_link(&mm, vma_left)); - ASSERT_FALSE(vma_link(&mm, vma_right)); + ASSERT_FALSE(attach_vma(&mm, vma_left)); + ASSERT_FALSE(attach_vma(&mm, vma_right)); vma = merge_new(&vmg); ASSERT_NE(vma, NULL); @@ -278,7 +307,7 @@ static bool test_simple_modify(void) struct vm_area_struct *init_vma = alloc_vma(&mm, 0, 0x3000, 0, flags); VMA_ITERATOR(vmi, &mm, 0x1000); - ASSERT_FALSE(vma_link(&mm, init_vma)); + ASSERT_FALSE(attach_vma(&mm, init_vma)); /* * The flags will not be changed, the vma_modify_flags() function @@ -338,13 +367,13 @@ static bool test_simple_expand(void) VMA_ITERATOR(vmi, &mm, 0); struct vma_merge_struct vmg = { .vmi = &vmi, - .vma = vma, + .middle = vma, .start = 0, .end = 0x3000, .pgoff = 0, }; - ASSERT_FALSE(vma_link(&mm, vma)); + ASSERT_FALSE(attach_vma(&mm, vma)); ASSERT_FALSE(expand_existing(&vmg)); @@ -365,7 +394,7 @@ static bool test_simple_shrink(void) struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x3000, 0, flags); VMA_ITERATOR(vmi, &mm, 0); - ASSERT_FALSE(vma_link(&mm, vma)); + ASSERT_FALSE(attach_vma(&mm, vma)); ASSERT_FALSE(vma_shrink(&vmi, vma, 0, 0x1000, 0)); @@ -631,7 +660,7 @@ static bool test_vma_merge_special_flags(void) */ vma = alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, flags); ASSERT_NE(vma, NULL); - vmg.vma = vma; + vmg.middle = vma; for (i = 0; i < ARRAY_SIZE(special_flags); i++) { vm_flags_t special_flag = special_flags[i]; @@ -760,7 +789,7 @@ static bool test_vma_merge_with_close(void) vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; /* * The VMA being modified in a way that would otherwise merge should @@ -787,7 +816,7 @@ static bool test_vma_merge_with_close(void) vma->vm_ops = &vm_ops; vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), NULL); /* * Initially this is misapprehended as an out of memory report, as the @@ -817,7 +846,7 @@ static bool test_vma_merge_with_close(void) vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); @@ -843,7 +872,7 @@ static bool test_vma_merge_with_close(void) vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -940,7 +969,7 @@ static bool test_merge_existing(void) vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags); vma_next->vm_ops = &vm_ops; /* This should have no impact. */ vmg_set_range(&vmg, 0x3000, 0x6000, 3, flags); - vmg.vma = vma; + vmg.middle = vma; vmg.prev = vma; vma->anon_vma = &dummy_anon_vma; ASSERT_EQ(merge_existing(&vmg), vma_next); @@ -973,7 +1002,7 @@ static bool test_merge_existing(void) vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags); vma_next->vm_ops = &vm_ops; /* This should have no impact. */ vmg_set_range(&vmg, 0x2000, 0x6000, 2, flags); - vmg.vma = vma; + vmg.middle = vma; vma->anon_vma = &dummy_anon_vma; ASSERT_EQ(merge_existing(&vmg), vma_next); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -1003,7 +1032,7 @@ static bool test_merge_existing(void) vma->vm_ops = &vm_ops; /* This should have no impact. */ vmg_set_range(&vmg, 0x3000, 0x6000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; vma->anon_vma = &dummy_anon_vma; ASSERT_EQ(merge_existing(&vmg), vma_prev); @@ -1037,7 +1066,7 @@ static bool test_merge_existing(void) vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags); vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; vma->anon_vma = &dummy_anon_vma; ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -1067,7 +1096,7 @@ static bool test_merge_existing(void) vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, flags); vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; vma->anon_vma = &dummy_anon_vma; ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -1102,37 +1131,37 @@ static bool test_merge_existing(void) vmg_set_range(&vmg, 0x4000, 0x5000, 4, flags); vmg.prev = vma; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); vmg_set_range(&vmg, 0x5000, 0x6000, 5, flags); vmg.prev = vma; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); vmg_set_range(&vmg, 0x6000, 0x7000, 6, flags); vmg.prev = vma; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); vmg_set_range(&vmg, 0x4000, 0x7000, 4, flags); vmg.prev = vma; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); vmg_set_range(&vmg, 0x4000, 0x6000, 4, flags); vmg.prev = vma; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); vmg_set_range(&vmg, 0x5000, 0x6000, 5, flags); vmg.prev = vma; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), NULL); ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); @@ -1197,7 +1226,7 @@ static bool test_anon_vma_non_mergeable(void) vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -1277,7 +1306,7 @@ static bool test_dup_anon_vma(void) vma_next->anon_vma = &dummy_anon_vma; vmg_set_range(&vmg, 0, 0x5000, 0, flags); - vmg.vma = vma_prev; + vmg.middle = vma_prev; vmg.next = vma_next; ASSERT_EQ(expand_existing(&vmg), 0); @@ -1309,7 +1338,7 @@ static bool test_dup_anon_vma(void) vma_next->anon_vma = &dummy_anon_vma; vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -1338,7 +1367,7 @@ static bool test_dup_anon_vma(void) vma->anon_vma = &dummy_anon_vma; vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -1366,7 +1395,7 @@ static bool test_dup_anon_vma(void) vma->anon_vma = &dummy_anon_vma; vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -1394,7 +1423,7 @@ static bool test_dup_anon_vma(void) vma->anon_vma = &dummy_anon_vma; vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma; - vmg.vma = vma; + vmg.middle = vma; ASSERT_EQ(merge_existing(&vmg), vma_next); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -1432,7 +1461,7 @@ static bool test_vmi_prealloc_fail(void) vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma_prev; - vmg.vma = vma; + vmg.middle = vma; fail_prealloc = true; @@ -1458,7 +1487,7 @@ static bool test_vmi_prealloc_fail(void) vma->anon_vma = &dummy_anon_vma; vmg_set_range(&vmg, 0, 0x5000, 3, flags); - vmg.vma = vma_prev; + vmg.middle = vma_prev; vmg.next = vma; fail_prealloc = true; @@ -1515,11 +1544,11 @@ static bool test_copy_vma(void) vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags); vma_new = copy_vma(&vma, 0, 0x2000, 0, &need_locks); - ASSERT_NE(vma_new, vma); ASSERT_EQ(vma_new->vm_start, 0); ASSERT_EQ(vma_new->vm_end, 0x2000); ASSERT_EQ(vma_new->vm_pgoff, 0); + vma_assert_attached(vma_new); cleanup_mm(&mm, &vmi); @@ -1528,6 +1557,7 @@ static bool test_copy_vma(void) vma = alloc_and_link_vma(&mm, 0, 0x2000, 0, flags); vma_next = alloc_and_link_vma(&mm, 0x6000, 0x8000, 6, flags); vma_new = copy_vma(&vma, 0x4000, 0x2000, 4, &need_locks); + vma_assert_attached(vma_new); ASSERT_EQ(vma_new, vma_next); @@ -1546,7 +1576,7 @@ static bool test_expand_only_mode(void) /* * Place a VMA prior to the one we're expanding so we assert that we do * not erroneously try to traverse to the previous VMA even though we - * have, through the use of VMG_FLAG_JUST_EXPAND, indicated we do not + * have, through the use of the just_expand flag, indicated we do not * need to do so. */ alloc_and_link_vma(&mm, 0, 0x2000, 0, flags); @@ -1558,7 +1588,7 @@ static bool test_expand_only_mode(void) vma_iter_set(&vmi, 0x3000); vma_prev = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags); vmg.prev = vma_prev; - vmg.merge_flags = VMG_FLAG_JUST_EXPAND; + vmg.just_expand = true; vma = vma_merge_new_range(&vmg); ASSERT_NE(vma, NULL); @@ -1569,6 +1599,7 @@ static bool test_expand_only_mode(void) ASSERT_EQ(vma->vm_pgoff, 3); ASSERT_TRUE(vma_write_started(vma)); ASSERT_EQ(vma_iter_addr(&vmi), 0x3000); + vma_assert_attached(vma); cleanup_mm(&mm, &vmi); return true; diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 1eae23039854..572ab2cea763 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -25,7 +25,7 @@ #include <linux/maple_tree.h> #include <linux/mm.h> #include <linux/rbtree.h> -#include <linux/rwsem.h> +#include <linux/refcount.h> extern unsigned long stack_guard_gap; #ifdef CONFIG_MMU @@ -135,10 +135,6 @@ typedef __bitwise unsigned int vm_fault_t; */ #define pr_warn_once pr_err -typedef struct refcount_struct { - atomic_t refs; -} refcount_t; - struct kref { refcount_t refcount; }; @@ -233,15 +229,12 @@ struct mm_struct { unsigned long flags; /* Must use atomic bitops to access */ }; -struct vma_lock { - struct rw_semaphore lock; -}; - - struct file { struct address_space *f_mapping; }; +#define VMA_LOCK_OFFSET 0x40000000 + struct vm_area_struct { /* The first cache line has the info for VMA tree walking. */ @@ -269,16 +262,13 @@ struct vm_area_struct { }; #ifdef CONFIG_PER_VMA_LOCK - /* Flag to indicate areas detached from the mm->mm_mt tree */ - bool detached; - /* * Can only be written (using WRITE_ONCE()) while holding both: * - mmap_lock (in write mode) - * - vm_lock->lock (in write mode) + * - vm_refcnt bit at VMA_LOCK_OFFSET is set * Can be read reliably while holding one of: * - mmap_lock (in read or write mode) - * - vm_lock->lock (in read or write mode) + * - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1 * Can be read unreliably (using READ_ONCE()) for pessimistic bailout * while holding nothing (except RCU to keep the VMA struct allocated). * @@ -287,20 +277,9 @@ struct vm_area_struct { * slowpath. */ unsigned int vm_lock_seq; - struct vma_lock *vm_lock; #endif /* - * For areas with an address space and backing store, - * linkage into the address_space->i_mmap interval tree. - * - */ - struct { - struct rb_node rb; - unsigned long rb_subtree_last; - } shared; - - /* * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma * list, after a COW of one of the file pages. A MAP_SHARED vma * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack @@ -319,14 +298,6 @@ struct vm_area_struct { struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */ -#ifdef CONFIG_ANON_VMA_NAME - /* - * For private and shared anonymous mappings, a pointer to a null - * terminated string containing the name given to the vma, or NULL if - * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. - */ - struct anon_vma_name *anon_name; -#endif #ifdef CONFIG_SWAP atomic_long_t swap_readahead_info; #endif @@ -339,6 +310,27 @@ struct vm_area_struct { #ifdef CONFIG_NUMA_BALANCING struct vma_numab_state *numab_state; /* NUMA Balancing state */ #endif +#ifdef CONFIG_PER_VMA_LOCK + /* Unstable RCU readers are allowed to read this. */ + refcount_t vm_refcnt; +#endif + /* + * For areas with an address space and backing store, + * linkage into the address_space->i_mmap interval tree. + * + */ + struct { + struct rb_node rb; + unsigned long rb_subtree_last; + } shared; +#ifdef CONFIG_ANON_VMA_NAME + /* + * For private and shared anonymous mappings, a pointer to a null + * terminated string containing the name given to the vma, or NULL if + * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. + */ + struct anon_vma_name *anon_name; +#endif struct vm_userfaultfd_ctx vm_userfaultfd_ctx; } __randomize_layout; @@ -464,26 +456,40 @@ static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi) return mas_find(&vmi->mas, ULONG_MAX); } -static inline bool vma_lock_alloc(struct vm_area_struct *vma) +/* + * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these + * assertions should be made either under mmap_write_lock or when the object + * has been isolated under mmap_write_lock, ensuring no competing writers. + */ +static inline void vma_assert_attached(struct vm_area_struct *vma) { - vma->vm_lock = calloc(1, sizeof(struct vma_lock)); - - if (!vma->vm_lock) - return false; - - init_rwsem(&vma->vm_lock->lock); - vma->vm_lock_seq = UINT_MAX; + WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt)); +} - return true; +static inline void vma_assert_detached(struct vm_area_struct *vma) +{ + WARN_ON_ONCE(refcount_read(&vma->vm_refcnt)); } static inline void vma_assert_write_locked(struct vm_area_struct *); -static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached) +static inline void vma_mark_attached(struct vm_area_struct *vma) { - /* When detaching vma should be write-locked */ - if (detached) - vma_assert_write_locked(vma); - vma->detached = detached; + vma_assert_write_locked(vma); + vma_assert_detached(vma); + refcount_set_release(&vma->vm_refcnt, 1); +} + +static inline void vma_mark_detached(struct vm_area_struct *vma) +{ + vma_assert_write_locked(vma); + vma_assert_attached(vma); + /* We are the only writer, so no need to use vma_refcount_put(). */ + if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) { + /* + * Reader must have temporarily raised vm_refcnt but it will + * drop it without using the vma since vma is write-locked. + */ + } } extern const struct vm_operations_struct vma_dummy_vm_ops; @@ -496,7 +502,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) vma->vm_mm = mm; vma->vm_ops = &vma_dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); - vma_mark_detached(vma, false); + vma->vm_lock_seq = UINT_MAX; } static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) @@ -507,10 +513,6 @@ static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) return NULL; vma_init(vma, mm); - if (!vma_lock_alloc(vma)) { - free(vma); - return NULL; - } return vma; } @@ -523,10 +525,8 @@ static inline struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) return NULL; memcpy(new, orig, sizeof(*new)); - if (!vma_lock_alloc(new)) { - free(new); - return NULL; - } + refcount_set(&new->vm_refcnt, 0); + new->vm_lock_seq = UINT_MAX; INIT_LIST_HEAD(&new->anon_vma_chain); return new; @@ -696,20 +696,9 @@ static inline void mpol_put(struct mempolicy *) { } -static inline void vma_lock_free(struct vm_area_struct *vma) -{ - free(vma->vm_lock); -} - -static inline void __vm_area_free(struct vm_area_struct *vma) -{ - vma_lock_free(vma); - free(vma); -} - static inline void vm_area_free(struct vm_area_struct *vma) { - __vm_area_free(vma); + free(vma); } static inline void lru_add_drain(void) @@ -796,12 +785,12 @@ static inline void vma_start_write(struct vm_area_struct *vma) static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, - long adjust_next) + struct vm_area_struct *next) { (void)vma; (void)start; (void)end; - (void)adjust_next; + (void)next; } static inline void vma_iter_free(struct vma_iterator *vmi) |